1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/ 2 #include "objpool.hpp" 3 4 const char *const PetscStreamTypes[] = { 5 "global_blocking", 6 "default_blocking", 7 "global_nonblocking", 8 "max", 9 "PetscStreamType", 10 "PETSC_STREAM_", 11 nullptr 12 }; 13 14 const char *const PetscDeviceContextJoinModes[] = { 15 "destroy", 16 "sync", 17 "no_sync", 18 "PetscDeviceContextJoinMode", 19 "PETSC_DEVICE_CONTEXT_JOIN_", 20 nullptr 21 }; 22 23 /* Define the allocator */ 24 struct PetscDeviceContextAllocator : Petsc::AllocatorBase<PetscDeviceContext> 25 { 26 static PetscInt PetscDeviceContextID; 27 28 PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) noexcept 29 { 30 PetscDeviceContext dc; 31 32 PetscFunctionBegin; 33 PetscCall(PetscNew(&dc)); 34 dc->id = PetscDeviceContextID++; 35 dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 36 *dctx = dc; 37 PetscFunctionReturn(0); 38 } 39 40 PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept 41 { 42 PetscFunctionBegin; 43 PetscAssert(!dctx->numChildren,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying",dctx->numChildren); 44 if (dctx->ops->destroy) PetscCall((*dctx->ops->destroy)(dctx)); 45 PetscCall(PetscDeviceDestroy(&dctx->device)); 46 PetscCall(PetscFree(dctx->childIDs)); 47 PetscCall(PetscFree(dctx)); 48 PetscFunctionReturn(0); 49 } 50 51 PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) noexcept 52 { 53 PetscFunctionBegin; 54 /* don't deallocate the child array, rather just zero it out */ 55 PetscCall(PetscArrayzero(dctx->childIDs,dctx->maxNumChildren)); 56 dctx->setup = PETSC_FALSE; 57 dctx->numChildren = 0; 58 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 59 PetscFunctionReturn(0); 60 } 61 62 PETSC_NODISCARD static constexpr PetscErrorCode finalize() noexcept { return 0; } 63 }; 64 /* an ID = 0 is invalid */ 65 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1; 66 67 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool; 68 69 /*@C 70 PetscDeviceContextCreate - Creates a PetscDeviceContext 71 72 Not Collective, Asynchronous 73 74 Output Paramemter: 75 . dctx - The PetscDeviceContext 76 77 Notes: 78 Unlike almost every other PETSc class it is advised that most users use 79 PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts 80 of different types are incompatible with one another; using 81 PetscDeviceContextDuplicate() ensures compatible types. 82 83 Level: beginner 84 85 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(), 86 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(), 87 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy() 88 @*/ 89 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 90 { 91 PetscFunctionBegin; 92 PetscValidPointer(dctx,1); 93 PetscCall(PetscDeviceInitializePackage()); 94 PetscCall(contextPool.get(*dctx)); 95 PetscFunctionReturn(0); 96 } 97 98 /*@C 99 PetscDeviceContextDestroy - Frees a PetscDeviceContext 100 101 Not Collective, Asynchronous 102 103 Input Parameters: 104 . dctx - The PetscDeviceContext 105 106 Notes: 107 No implicit synchronization occurs due to this routine, all resources are released completely asynchronously 108 w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the 109 appropriate synchronization before calling this routine. 110 111 Developer Notes: 112 The context is never actually "destroyed", only returned to an ever growing pool of 113 contexts. There are currently no safeguards on the size of the pool, this should perhaps 114 be implemented. 115 116 Level: beginner 117 118 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize() 119 @*/ 120 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 121 { 122 PetscFunctionBegin; 123 if (!*dctx) PetscFunctionReturn(0); 124 PetscCall(contextPool.reclaim(std::move(*dctx))); 125 *dctx = nullptr; 126 PetscFunctionReturn(0); 127 } 128 129 /*@C 130 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext 131 132 Not Collective, Asynchronous 133 134 Input Parameters: 135 + dctx - The PetscDeviceContext 136 - type - The PetscStreamType 137 138 Notes: 139 See PetscStreamType in include/petscdevicetypes.h for more information on the available 140 types and their interactions. If the PetscDeviceContext was previously set up and stream 141 type was changed, you must call PetscDeviceContextSetUp() again after this routine. 142 143 Level: intermediate 144 145 .seealso: PetscStreamType, PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions() 146 @*/ 147 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 148 { 149 PetscFunctionBegin; 150 PetscValidDeviceContext(dctx,1); 151 PetscValidStreamType(type,2); 152 /* only need to do complex swapping if the object has already been setup */ 153 if (dctx->setup && (dctx->streamType != type)) { 154 PetscCall((*dctx->ops->changestreamtype)(dctx,type)); 155 dctx->setup = PETSC_FALSE; 156 } 157 dctx->streamType = type; 158 PetscFunctionReturn(0); 159 } 160 161 /*@C 162 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext 163 164 Not Collective, Asynchronous 165 166 Input Parameter: 167 . dctx - The PetscDeviceContext 168 169 Output Parameter: 170 . type - The PetscStreamType 171 172 Notes: 173 See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions 174 175 Level: intermediate 176 177 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions() 178 @*/ 179 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 180 { 181 PetscFunctionBegin; 182 PetscValidDeviceContext(dctx,1); 183 PetscValidIntPointer(type,2); 184 *type = dctx->streamType; 185 PetscFunctionReturn(0); 186 } 187 188 /*@C 189 PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext 190 191 Not Collective, Possibly Synchronous 192 193 Input Parameters: 194 + dctx - The PetscDeviceContext 195 - device - The PetscDevice 196 197 Notes: 198 This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext 199 must also have an attached PetscDevice). Unlike the usual set-type semantics, it is 200 not stricly necessary to set a contexts device to enable usage, any created device 201 contexts will always come equipped with the "default" device. 202 203 This routine is a no-op if dctx is already attached to device. 204 205 This routine may initialize the backend device and incur synchronization. 206 207 Level: intermediate 208 209 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice() 210 @*/ 211 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 212 { 213 PetscFunctionBegin; 214 PetscValidDeviceContext(dctx,1); 215 PetscValidDevice(device,2); 216 if (dctx->device) { 217 /* can't do a strict pointer equality check since PetscDevice's are reused */ 218 if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0); 219 } 220 PetscCall(PetscDeviceDestroy(&dctx->device)); 221 if (dctx->ops->destroy) PetscCall((*dctx->ops->destroy)(dctx)); 222 PetscCall(PetscMemzero(dctx->ops,sizeof(*dctx->ops))); 223 PetscCall((*device->ops->createcontext)(dctx)); 224 PetscCall(PetscDeviceReference_Internal(device)); 225 dctx->device = device; 226 dctx->setup = PETSC_FALSE; 227 PetscFunctionReturn(0); 228 } 229 230 /*@C 231 PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext 232 233 Not Collective, Asynchronous 234 235 Input Parameter: 236 . dctx - the PetscDeviceContext 237 238 Output Parameter: 239 . device - The PetscDevice 240 241 Notes: 242 This is a borrowed reference, the user should not destroy the device. 243 244 Level: intermediate 245 246 .seealso: PetscDeviceContextSetDevice(), PetscDevice 247 @*/ 248 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 249 { 250 PetscFunctionBegin; 251 PetscValidDeviceContext(dctx,1); 252 PetscValidPointer(device,2); 253 PetscAssert(dctx->device,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get",dctx->id); 254 *device = dctx->device; 255 PetscFunctionReturn(0); 256 } 257 258 /*@C 259 PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use 260 261 Not Collective, Asynchronous 262 263 Input Parameter: 264 . dctx - The PetscDeviceContext 265 266 Developer Notes: 267 This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams, 268 events, and (possibly) handles. 269 270 Level: beginner 271 272 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions() 273 @*/ 274 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 275 { 276 PetscFunctionBegin; 277 PetscValidDeviceContext(dctx,1); 278 if (!dctx->device) { 279 PetscCall(PetscInfo(nullptr,"PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceTypes[PETSC_DEVICE_DEFAULT])); 280 PetscCall(PetscDeviceContextSetDefaultDevice_Internal(dctx)); 281 } 282 if (dctx->setup) PetscFunctionReturn(0); 283 PetscCall((*dctx->ops->setup)(dctx)); 284 dctx->setup = PETSC_TRUE; 285 PetscFunctionReturn(0); 286 } 287 288 /*@C 289 PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object 290 291 Not Collective, Asynchronous 292 293 Input Parameter: 294 . dctx - The PetscDeviceContext to duplicate 295 296 Output Parameter: 297 . dctxdup - The duplicated PetscDeviceContext 298 299 Notes: 300 This is a shorthand method for creating a PetscDeviceContext with the exact same 301 settings as another. Note however that the duplicated PetscDeviceContext does not "share" 302 any of the underlying data with the original, (including its current stream-state) they 303 are completely separate objects. 304 305 Level: beginner 306 307 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType() 308 @*/ 309 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 310 { 311 PetscDeviceContext dup; 312 313 PetscFunctionBegin; 314 PetscValidDeviceContext(dctx,1); 315 PetscValidPointer(dctxdup,2); 316 PetscCall(PetscDeviceContextCreate(&dup)); 317 PetscCall(PetscDeviceContextSetStreamType(dup,dctx->streamType)); 318 if (dctx->device) PetscCall(PetscDeviceContextSetDevice(dup,dctx->device)); 319 PetscCall(PetscDeviceContextSetUp(dup)); 320 *dctxdup = dup; 321 PetscFunctionReturn(0); 322 } 323 324 /*@C 325 PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle 326 327 Not Collective, Asynchronous 328 329 Input Parameter: 330 . dctx - The PetscDeviceContext object 331 332 Output Parameter: 333 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work 334 335 Notes: 336 This routine only refers a singular context and does NOT take any of its children into 337 account. That is, if dctx is idle but has dependents who do have work, this routine still 338 returns PETSC_TRUE. 339 340 Level: intermediate 341 342 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork() 343 @*/ 344 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 345 { 346 PetscFunctionBegin; 347 PetscValidDeviceContext(dctx,1); 348 PetscValidBoolPointer(idle,2); 349 PetscCall((*dctx->ops->query)(dctx,idle)); 350 PetscCall(PetscInfo(nullptr,"PetscDeviceContext id %" PetscInt_FMT " %s idle\n",dctx->id,*idle ? "was" : "was not")); 351 PetscFunctionReturn(0); 352 } 353 354 /*@C 355 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 356 357 Not Collective, Asynchronous 358 359 Input Parameters: 360 + dctxa - The PetscDeviceContext object that is waiting 361 - dctxb - The PetscDeviceContext object that is being waited on 362 363 Notes: 364 Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was 365 called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments. 366 367 Level: beginner 368 369 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin() 370 @*/ 371 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 372 { 373 PetscFunctionBegin; 374 PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2); 375 if (dctxa == dctxb) PetscFunctionReturn(0); 376 PetscCall((*dctxa->ops->waitforcontext)(dctxa,dctxb)); 377 PetscFunctionReturn(0); 378 } 379 380 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO)) 381 #if PETSC_USE_DEBUG_AND_INFO 382 #include <string> 383 #endif 384 /*@C 385 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 386 387 Not Collective, Asynchronous 388 389 Input Parameters: 390 + dctx - The parent PetscDeviceContext 391 - n - The number of children to create 392 393 Output Parameter: 394 . dsub - The created child context(s) 395 396 Notes: 397 This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning 398 that work queued on child contexts will not start until the parent context finishes its work. This accounts for work 399 queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children. 400 401 Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects 402 to free all of it's children (and ONLY its children) before itself is freed. 403 404 DAG representation: 405 .vb 406 time -> 407 408 -> dctx \----> dctx ------> 409 \---> dsub[0] ---> 410 \--> ... -------> 411 \-> dsub[n-1] -> 412 .ve 413 414 Level: intermediate 415 416 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle() 417 @*/ 418 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 419 { 420 #if PETSC_USE_DEBUG_AND_INFO 421 const PetscInt nBefore = n; 422 static std::string idList; 423 #endif 424 PetscDeviceContext *dsubTmp = nullptr; 425 PetscInt i = 0; 426 427 PetscFunctionBegin; 428 PetscValidDeviceContext(dctx,1); 429 PetscValidPointer(dsub,3); 430 PetscAssert(n >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %" PetscInt_FMT " < 0",n); 431 #if PETSC_USE_DEBUG_AND_INFO 432 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 433 idList.reserve(4*n); 434 #endif 435 /* update child totals */ 436 dctx->numChildren += n; 437 /* now to find out if we have room */ 438 if (dctx->numChildren > dctx->maxNumChildren) { 439 /* no room, either from having too many kids or not having any */ 440 if (dctx->childIDs) { 441 /* have existing children, must reallocate them */ 442 PetscCall(PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs)); 443 /* clear the extra memory since realloc doesn't do it for us */ 444 PetscCall(PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren))); 445 } else { 446 /* have no children */ 447 PetscCall(PetscCalloc1(dctx->numChildren,&dctx->childIDs)); 448 } 449 /* update total number of children */ 450 dctx->maxNumChildren = dctx->numChildren; 451 } 452 PetscCall(PetscMalloc1(n,&dsubTmp)); 453 while (n) { 454 /* empty child slot */ 455 if (!(dctx->childIDs[i])) { 456 /* create the child context in the image of its parent */ 457 PetscCall(PetscDeviceContextDuplicate(dctx,dsubTmp+i)); 458 PetscCall(PetscDeviceContextWaitForContext(dsubTmp[i],dctx)); 459 /* register the child with its parent */ 460 dctx->childIDs[i] = dsubTmp[i]->id; 461 #if PETSC_USE_DEBUG_AND_INFO 462 idList += std::to_string(dsubTmp[i]->id); 463 if (n != 1) idList += ", "; 464 #endif 465 --n; 466 } 467 ++i; 468 } 469 #if PETSC_USE_DEBUG_AND_INFO 470 PetscCall(PetscInfo(nullptr,"Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n",nBefore,dctx->id,idList.c_str())); 471 /* resets the size but doesn't deallocate the memory */ 472 idList.clear(); 473 #endif 474 /* pass the children back to caller */ 475 *dsub = dsubTmp; 476 PetscFunctionReturn(0); 477 } 478 479 /*@C 480 PetscDeviceContextJoin - Converge a set of child contexts 481 482 Not Collective, Asynchronous 483 484 Input Parameters: 485 + dctx - A PetscDeviceContext to converge on 486 . n - The number of sub contexts to converge 487 . joinMode - The type of join to perform 488 - dsub - The sub contexts to converge 489 490 Notes: 491 If PetscDeviceContextFork() creates n edges from a source node which all depend on the 492 source node, then this routine is the exact mirror. That is, it creates a node 493 (represented in dctx) which recieves n edges (and optionally destroys them) which is 494 dependent on the completion of all incoming edges. 495 496 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed 497 by this routine. Thus all sub contexts must have been created with the dctx passed to 498 this routine. 499 500 if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the 501 sub contexts do not wait for one another afterwards. 502 503 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally 504 wait on dctx after converging. This has the effect of "synchronizing" the outgoing 505 edges. 506 507 DAG representations: 508 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY 509 .vb 510 time -> 511 512 -> dctx ---------/- dctx -> 513 -> dsub[0] -----/ 514 -> ... -------/ 515 -> dsub[n-1] -/ 516 .ve 517 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC 518 .vb 519 time -> 520 521 -> dctx ---------/- dctx -> 522 -> dsub[0] -----/---------> 523 -> ... -------/----------> 524 -> dsub[n-1] -/-----------> 525 .ve 526 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC 527 .vb 528 time -> 529 530 -> dctx ---------/- dctx -\----> dctx ------> 531 -> dsub[0] -----/ \---> dsub[0] ---> 532 -> ... -------/ \--> ... -------> 533 -> dsub[n-1] -/ \-> dsub[n-1] -> 534 .ve 535 536 Level: intermediate 537 538 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode 539 @*/ 540 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 541 { 542 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 543 static std::string idList; 544 #endif 545 546 PetscFunctionBegin; 547 /* validity of dctx is checked in the wait-for loop */ 548 PetscValidPointer(dsub,4); 549 PetscAssert(n >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %" PetscInt_FMT " < 0",n); 550 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 551 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 552 idList.reserve(4*n); 553 #endif 554 /* first dctx waits on all the incoming edges */ 555 for (PetscInt i = 0; i < n; ++i) { 556 PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4); 557 PetscCall(PetscDeviceContextWaitForContext(dctx,(*dsub)[i])); 558 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 559 idList += std::to_string((*dsub)[i]->id); 560 if (i+1 < n) idList += ", "; 561 #endif 562 } 563 564 /* now we handle the aftermath */ 565 switch (joinMode) { 566 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: 567 { 568 PetscInt j = 0; 569 570 PetscAssert(n <= dctx->numChildren,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent",n,dctx->numChildren); 571 /* update child count while it's still fresh in memory */ 572 dctx->numChildren -= n; 573 for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) { 574 if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) { 575 /* child is one of ours, can destroy it */ 576 PetscCall(PetscDeviceContextDestroy((*dsub)+j)); 577 /* reset the child slot */ 578 dctx->childIDs[i] = 0; 579 if (++j == n) break; 580 } 581 } 582 /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */ 583 PetscAssert(j == n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j); 584 PetscCall(PetscFree(*dsub)); 585 } 586 break; 587 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 588 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i],dctx)); 589 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 590 break; 591 default: 592 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given"); 593 } 594 595 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 596 PetscCall(PetscInfo(nullptr,"Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str())); 597 idList.clear(); 598 #endif 599 PetscFunctionReturn(0); 600 } 601 602 /*@C 603 PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished 604 605 Not Collective, Synchronous 606 607 Input Parameters: 608 . dctx - The PetscDeviceContext to synchronize 609 610 Level: beginner 611 612 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle() 613 @*/ 614 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 615 { 616 PetscFunctionBegin; 617 PetscValidDeviceContext(dctx,1); 618 /* if it isn't setup there is nothing to sync on */ 619 if (dctx->setup) PetscCall((*dctx->ops->synchronize)(dctx)); 620 PetscFunctionReturn(0); 621 } 622 623 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT 624 // REMOVE ME (change) 625 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING 626 627 static PetscDeviceType rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 628 static PetscStreamType rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 629 static PetscDeviceContext globalContext = nullptr; 630 631 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should 632 * match whatever device is eagerly intialized */ 633 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type) 634 { 635 PetscFunctionBegin; 636 PetscValidDeviceType(type,1); 637 rootDeviceType = type; 638 PetscFunctionReturn(0); 639 } 640 641 #if 0 642 /* currently unused */ 643 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type) 644 { 645 PetscFunctionBegin; 646 PetscValidStreamType(type,1); 647 rootStreamType = type; 648 PetscFunctionReturn(0); 649 } 650 #endif 651 652 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void) 653 { 654 static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode { 655 656 PetscFunctionBegin; 657 PetscCall(PetscDeviceContextDestroy(&globalContext)); 658 rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 659 rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 660 PetscFunctionReturn(0); 661 }; 662 663 PetscFunctionBegin; 664 if (globalContext) PetscFunctionReturn(0); 665 /* this exists purely as a valid device check. */ 666 PetscCall(PetscDeviceInitializePackage()); 667 PetscCall(PetscRegisterFinalize(PetscDeviceContextFinalizer)); 668 PetscCall(PetscInfo(nullptr,"Initializing global PetscDeviceContext\n")); 669 /* we call the allocator directly here since the ObjectPool creates a PetscContainer which 670 * eventually tries to call logging functions. However, this routine may be purposefully 671 * called __before__ logging is initialized, so the logging function would PETSCABORT */ 672 PetscCall(contextPool.allocator().create(&globalContext)); 673 PetscCall(PetscDeviceContextSetStreamType(globalContext,rootStreamType)); 674 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext,rootDeviceType)); 675 PetscCall(PetscDeviceContextSetUp(globalContext)); 676 PetscFunctionReturn(0); 677 } 678 679 /*@C 680 PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext 681 682 Not Collective, Asynchronous 683 684 Output Parameter: 685 . dctx - The PetscDeviceContext 686 687 Notes: 688 The user generally should not destroy contexts retrieved with this routine unless they 689 themselves have created them. There exists no protection against destroying the root 690 context. 691 692 Developer Notes: 693 Unless the user has set their own, this routine creates the "root" context the first time it 694 is called, registering its destructor to PetscFinalize(). 695 696 Level: beginner 697 698 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(), 699 PetscDeviceContextJoin(), PetscDeviceContextCreate() 700 @*/ 701 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx) 702 { 703 PetscFunctionBegin; 704 PetscValidPointer(dctx,1); 705 PetscCall(PetscDeviceContextSetupGlobalContext_Private()); 706 /* while the static analyzer can find global variables, it will throw a warning about not 707 * being able to connect this back to the function arguments */ 708 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext,-1)); 709 *dctx = globalContext; 710 PetscFunctionReturn(0); 711 } 712 713 /*@C 714 PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext 715 716 Not Collective, Asynchronous 717 718 Input Parameter: 719 . dctx - The PetscDeviceContext 720 721 Notes: 722 This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined 723 implementation by calling this routine immediately after PetscInitialize() and ensuring that 724 PetscDevice is not greedily intialized. In this case the user is responsible for destroying 725 their PetscDeviceContext before PetscFinalize() returns. 726 727 The old context is not stored in any way by this routine; if one is overriding a context that 728 they themselves do not control, one should take care to temporarily store it by calling 729 PetscDeviceContextGetCurrentContext() before calling this routine. 730 731 Level: beginner 732 733 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(), 734 PetscDeviceContextJoin(), PetscDeviceContextCreate() 735 @*/ 736 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx) 737 { 738 PetscFunctionBegin; 739 PetscValidDeviceContext(dctx,1); 740 PetscAssert(dctx->setup,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context",dctx->id); 741 globalContext = dctx; 742 PetscCall(PetscInfo(nullptr,"Set global PetscDeviceContext id %" PetscInt_FMT "\n",dctx->id)); 743 PetscFunctionReturn(0); 744 } 745 746 /*@C 747 PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database 748 749 Collective on comm, Asynchronous 750 751 Input Parameters: 752 + comm - MPI communicator on which to query the options database 753 . prefix - prefix to prepend to all options database queries, NULL if not needed 754 - dctx - The PetscDeviceContext to configure 755 756 Output Parameter: 757 . dctx - The PetscDeviceContext 758 759 Options Database: 760 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext - 761 PetscDeviceContextSetStreamType() 762 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType 763 764 Level: beginner 765 766 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice() 767 @*/ 768 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx) 769 { 770 PetscBool flag; 771 PetscInt stype,dtype; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 if (prefix) PetscValidCharPointer(prefix,2); 776 PetscValidDeviceContext(dctx,3); 777 ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");PetscCall(ierr); 778 PetscCall(PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,PETSC_STREAM_MAX,PetscStreamTypes[dctx->streamType],&stype,&flag)); 779 if (flag) PetscCall(PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype))); 780 PetscCall(PetscOptionsEList("-device_context_device_type","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceTypes+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE],&dtype,&flag)); 781 if (flag) { 782 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx,static_cast<PetscDeviceType>(dtype+1))); 783 } 784 ierr = PetscOptionsEnd();PetscCall(ierr); 785 PetscFunctionReturn(0); 786 } 787