1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/ 2 #include "objpool.hpp" 3 4 const char *const PetscStreamTypes[] = { 5 "global_blocking", 6 "default_blocking", 7 "global_nonblocking", 8 "max", 9 "PetscStreamType", 10 "PETSC_STREAM_", 11 nullptr 12 }; 13 14 const char *const PetscDeviceContextJoinModes[] = { 15 "destroy", 16 "sync", 17 "no_sync", 18 "PetscDeviceContextJoinMode", 19 "PETSC_DEVICE_CONTEXT_JOIN_", 20 nullptr 21 }; 22 23 /* Define the allocator */ 24 struct PetscDeviceContextAllocator : Petsc::AllocatorBase<PetscDeviceContext> 25 { 26 static PetscInt PetscDeviceContextID; 27 28 PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) noexcept 29 { 30 PetscDeviceContext dc; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = PetscNew(&dc);CHKERRQ(ierr); 35 dc->id = PetscDeviceContextID++; 36 dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 37 *dctx = dc; 38 PetscFunctionReturn(0); 39 } 40 41 PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept 42 { 43 PetscErrorCode ierr; 44 45 PetscFunctionBegin; 46 PetscAssert(!dctx->numChildren,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying",dctx->numChildren); 47 if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);} 48 ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr); 49 ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr); 50 ierr = PetscFree(dctx);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) noexcept 55 { 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 /* don't deallocate the child array, rather just zero it out */ 60 ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr); 61 dctx->setup = PETSC_FALSE; 62 dctx->numChildren = 0; 63 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 64 PetscFunctionReturn(0); 65 } 66 67 PETSC_NODISCARD static constexpr PetscErrorCode finalize() noexcept { return 0; } 68 }; 69 /* an ID = 0 is invalid */ 70 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1; 71 72 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool; 73 74 /*@C 75 PetscDeviceContextCreate - Creates a PetscDeviceContext 76 77 Not Collective, Asynchronous 78 79 Output Paramemter: 80 . dctx - The PetscDeviceContext 81 82 Notes: 83 Unlike almost every other PETSc class it is advised that most users use 84 PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts 85 of different types are incompatible with one another; using 86 PetscDeviceContextDuplicate() ensures compatible types. 87 88 Level: beginner 89 90 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(), 91 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(), 92 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy() 93 @*/ 94 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 95 { 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscValidPointer(dctx,1); 100 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 101 ierr = contextPool.get(*dctx);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 /*@C 106 PetscDeviceContextDestroy - Frees a PetscDeviceContext 107 108 Not Collective, Asynchronous 109 110 Input Parameters: 111 . dctx - The PetscDeviceContext 112 113 Notes: 114 No implicit synchronization occurs due to this routine, all resources are released completely asynchronously 115 w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the 116 appropriate synchronization before calling this routine. 117 118 Developer Notes: 119 The context is never actually "destroyed", only returned to an ever growing pool of 120 contexts. There are currently no safeguards on the size of the pool, this should perhaps 121 be implemented. 122 123 Level: beginner 124 125 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize() 126 @*/ 127 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 128 { 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 if (!*dctx) PetscFunctionReturn(0); 133 ierr = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr); 134 *dctx = nullptr; 135 PetscFunctionReturn(0); 136 } 137 138 /*@C 139 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext 140 141 Not Collective, Asynchronous 142 143 Input Parameters: 144 + dctx - The PetscDeviceContext 145 - type - The PetscStreamType 146 147 Notes: 148 See PetscStreamType in include/petscdevicetypes.h for more information on the available 149 types and their interactions. If the PetscDeviceContext was previously set up and stream 150 type was changed, you must call PetscDeviceContextSetUp() again after this routine. 151 152 Level: intermediate 153 154 .seealso: PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions() 155 @*/ 156 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 157 { 158 PetscFunctionBegin; 159 PetscValidDeviceContext(dctx,1); 160 PetscValidStreamType(type,2); 161 /* only need to do complex swapping if the object has already been setup */ 162 if (dctx->setup && (dctx->streamType != type)) { 163 PetscErrorCode ierr; 164 165 ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr); 166 dctx->setup = PETSC_FALSE; 167 } 168 dctx->streamType = type; 169 PetscFunctionReturn(0); 170 } 171 172 /*@C 173 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext 174 175 Not Collective, Asynchronous 176 177 Input Parameter: 178 . dctx - The PetscDeviceContext 179 180 Output Parameter: 181 . type - The PetscStreamType 182 183 Notes: 184 See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions 185 186 Level: intermediate 187 188 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions() 189 @*/ 190 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 191 { 192 PetscFunctionBegin; 193 PetscValidDeviceContext(dctx,1); 194 PetscValidIntPointer(type,2); 195 *type = dctx->streamType; 196 PetscFunctionReturn(0); 197 } 198 199 /*@C 200 PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext 201 202 Not Collective, Possibly Synchronous 203 204 Input Parameters: 205 + dctx - The PetscDeviceContext 206 - device - The PetscDevice 207 208 Notes: 209 This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext 210 must also have an attached PetscDevice). Unlike the usual set-type semantics, it is 211 not stricly necessary to set a contexts device to enable usage, any created device 212 contexts will always come equipped with the "default" device. 213 214 This routine is a no-op if dctx is already attached to device. 215 216 This routine may initialize the backend device and incur synchronization. 217 218 Level: intermediate 219 220 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice() 221 @*/ 222 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 223 { 224 PetscErrorCode ierr; 225 226 PetscFunctionBegin; 227 PetscValidDeviceContext(dctx,1); 228 PetscValidDevice(device,2); 229 if (dctx->device) { 230 /* can't do a strict pointer equality check since PetscDevice's are reused */ 231 if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0); 232 } 233 ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr); 234 if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);} 235 ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr); 236 ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr); 237 ierr = PetscDeviceReference_Internal(device);CHKERRQ(ierr); 238 dctx->device = device; 239 dctx->setup = PETSC_FALSE; 240 PetscFunctionReturn(0); 241 } 242 243 /*@C 244 PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext 245 246 Not Collective, Asynchronous 247 248 Input Parameter: 249 . dctx - the PetscDeviceContext 250 251 Output Parameter: 252 . device - The PetscDevice 253 254 Notes: 255 This is a borrowed reference, the user should not destroy the device. 256 257 Level: intermediate 258 259 .seealso: PetscDeviceContextSetDevice(), PetscDevice 260 @*/ 261 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 262 { 263 PetscFunctionBegin; 264 PetscValidDeviceContext(dctx,1); 265 PetscValidPointer(device,2); 266 PetscAssert(dctx->device,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get",dctx->id); 267 *device = dctx->device; 268 PetscFunctionReturn(0); 269 } 270 271 /*@C 272 PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use 273 274 Not Collective, Asynchronous 275 276 Input Parameter: 277 . dctx - The PetscDeviceContext 278 279 Developer Notes: 280 This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams, 281 events, and (possibly) handles. 282 283 Level: beginner 284 285 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions() 286 @*/ 287 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 288 { 289 PetscErrorCode ierr; 290 291 PetscFunctionBegin; 292 PetscValidDeviceContext(dctx,1); 293 if (!dctx->device) { 294 ierr = PetscInfo(nullptr,"PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceTypes[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr); 295 ierr = PetscDeviceContextSetDefaultDevice_Internal(dctx);CHKERRQ(ierr); 296 } 297 if (dctx->setup) PetscFunctionReturn(0); 298 ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr); 299 dctx->setup = PETSC_TRUE; 300 PetscFunctionReturn(0); 301 } 302 303 /*@C 304 PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object 305 306 Not Collective, Asynchronous 307 308 Input Parameter: 309 . dctx - The PetscDeviceContext to duplicate 310 311 Output Paramter: 312 . dctxdup - The duplicated PetscDeviceContext 313 314 Notes: 315 This is a shorthand method for creating a PetscDeviceContext with the exact same 316 settings as another. Note however that the duplicated PetscDeviceContext does not "share" 317 any of the underlying data with the original, (including its current stream-state) they 318 are completely separate objects. 319 320 Level: beginner 321 322 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType() 323 @*/ 324 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 325 { 326 PetscDeviceContext dup; 327 PetscErrorCode ierr; 328 329 PetscFunctionBegin; 330 PetscValidDeviceContext(dctx,1); 331 PetscValidPointer(dctxdup,2); 332 ierr = PetscDeviceContextCreate(&dup);CHKERRQ(ierr); 333 ierr = PetscDeviceContextSetStreamType(dup,dctx->streamType);CHKERRQ(ierr); 334 if (dctx->device) {ierr = PetscDeviceContextSetDevice(dup,dctx->device);CHKERRQ(ierr);} 335 ierr = PetscDeviceContextSetUp(dup);CHKERRQ(ierr); 336 *dctxdup = dup; 337 PetscFunctionReturn(0); 338 } 339 340 /*@C 341 PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle 342 343 Not Collective, Asynchronous 344 345 Input Parameter: 346 . dctx - The PetscDeviceContext object 347 348 Output Parameter: 349 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work 350 351 Notes: 352 This routine only refers a singular context and does NOT take any of its children into 353 account. That is, if dctx is idle but has dependents who do have work, this routine still 354 returns PETSC_TRUE. 355 356 Level: intermediate 357 358 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork() 359 @*/ 360 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 361 { 362 PetscErrorCode ierr; 363 364 PetscFunctionBegin; 365 PetscValidDeviceContext(dctx,1); 366 PetscValidBoolPointer(idle,2); 367 ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr); 368 ierr = PetscInfo(nullptr,"PetscDeviceContext id %" PetscInt_FMT " %s idle\n",dctx->id,*idle ? "was" : "was not");CHKERRQ(ierr); 369 PetscFunctionReturn(0); 370 } 371 372 /*@C 373 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 374 375 Not Collective, Asynchronous 376 377 Input Parameters: 378 + dctxa - The PetscDeviceContext object that is waiting 379 - dctxb - The PetscDeviceContext object that is being waited on 380 381 Notes: 382 Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was 383 called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments. 384 385 Level: beginner 386 387 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin() 388 @*/ 389 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 390 { 391 PetscErrorCode ierr; 392 393 PetscFunctionBegin; 394 PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2); 395 if (dctxa == dctxb) PetscFunctionReturn(0); 396 ierr = (*dctxa->ops->waitforcontext)(dctxa,dctxb);CHKERRQ(ierr); 397 PetscFunctionReturn(0); 398 } 399 400 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO)) 401 #if PETSC_USE_DEBUG_AND_INFO 402 #include <string> 403 #endif 404 /*@C 405 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 406 407 Not Collective, Asynchronous 408 409 Input Parameters: 410 + dctx - The parent PetscDeviceContext 411 - n - The number of children to create 412 413 Output Parameter: 414 . dsub - The created child context(s) 415 416 Notes: 417 This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning 418 that work queued on child contexts will not start until the parent context finishes its work. This accounts for work 419 queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children. 420 421 Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects 422 to free all of it's children (and ONLY its children) before itself is freed. 423 424 DAG representation: 425 .vb 426 time -> 427 428 -> dctx \----> dctx ------> 429 \---> dsub[0] ---> 430 \--> ... -------> 431 \-> dsub[n-1] -> 432 .ve 433 434 Level: intermediate 435 436 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle() 437 @*/ 438 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 439 { 440 #if PETSC_USE_DEBUG_AND_INFO 441 const PetscInt nBefore = n; 442 static std::string idList; 443 #endif 444 PetscDeviceContext *dsubTmp = nullptr; 445 PetscInt i = 0; 446 PetscErrorCode ierr; 447 448 PetscFunctionBegin; 449 PetscValidDeviceContext(dctx,1); 450 PetscValidPointer(dsub,3); 451 PetscAssert(n >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %" PetscInt_FMT " < 0",n); 452 #if PETSC_USE_DEBUG_AND_INFO 453 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 454 idList.reserve(4*n); 455 #endif 456 /* update child totals */ 457 dctx->numChildren += n; 458 /* now to find out if we have room */ 459 if (dctx->numChildren > dctx->maxNumChildren) { 460 /* no room, either from having too many kids or not having any */ 461 if (dctx->childIDs) { 462 /* have existing children, must reallocate them */ 463 ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr); 464 /* clear the extra memory since realloc doesn't do it for us */ 465 ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr); 466 } else { 467 /* have no children */ 468 ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr); 469 } 470 /* update total number of children */ 471 dctx->maxNumChildren = dctx->numChildren; 472 } 473 ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr); 474 while (n) { 475 /* empty child slot */ 476 if (!(dctx->childIDs[i])) { 477 /* create the child context in the image of its parent */ 478 ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr); 479 ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr); 480 /* register the child with its parent */ 481 dctx->childIDs[i] = dsubTmp[i]->id; 482 #if PETSC_USE_DEBUG_AND_INFO 483 idList += std::to_string(dsubTmp[i]->id); 484 if (n != 1) idList += ", "; 485 #endif 486 --n; 487 } 488 ++i; 489 } 490 #if PETSC_USE_DEBUG_AND_INFO 491 ierr = PetscInfo(nullptr,"Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr); 492 /* resets the size but doesn't deallocate the memory */ 493 idList.clear(); 494 #endif 495 /* pass the children back to caller */ 496 *dsub = dsubTmp; 497 PetscFunctionReturn(0); 498 } 499 500 /*@C 501 PetscDeviceContextJoin - Converge a set of child contexts 502 503 Not Collective, Asynchronous 504 505 Input Parameters: 506 + dctx - A PetscDeviceContext to converge on 507 . n - The number of sub contexts to converge 508 . joinMode - The type of join to perform 509 - dsub - The sub contexts to converge 510 511 Notes: 512 If PetscDeviceContextFork() creates n edges from a source node which all depend on the 513 source node, then this routine is the exact mirror. That is, it creates a node 514 (represented in dctx) which recieves n edges (and optionally destroys them) which is 515 dependent on the completion of all incoming edges. 516 517 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed 518 by this routine. Thus all sub contexts must have been created with the dctx passed to 519 this routine. 520 521 if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the 522 sub contexts do not wait for one another afterwards. 523 524 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally 525 wait on dctx after converging. This has the effect of "synchronizing" the outgoing 526 edges. 527 528 DAG representations: 529 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY 530 .vb 531 time -> 532 533 -> dctx ---------/- dctx -> 534 -> dsub[0] -----/ 535 -> ... -------/ 536 -> dsub[n-1] -/ 537 .ve 538 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC 539 .vb 540 time -> 541 542 -> dctx ---------/- dctx -> 543 -> dsub[0] -----/---------> 544 -> ... -------/----------> 545 -> dsub[n-1] -/-----------> 546 .ve 547 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC 548 .vb 549 time -> 550 551 -> dctx ---------/- dctx -\----> dctx ------> 552 -> dsub[0] -----/ \---> dsub[0] ---> 553 -> ... -------/ \--> ... -------> 554 -> dsub[n-1] -/ \-> dsub[n-1] -> 555 .ve 556 557 Level: intermediate 558 559 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode 560 @*/ 561 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 562 { 563 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 564 static std::string idList; 565 #endif 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 /* validity of dctx is checked in the wait-for loop */ 570 PetscValidPointer(dsub,4); 571 PetscAssert(n >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %" PetscInt_FMT " < 0",n); 572 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 573 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 574 idList.reserve(4*n); 575 #endif 576 /* first dctx waits on all the incoming edges */ 577 for (PetscInt i = 0; i < n; ++i) { 578 PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4); 579 ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr); 580 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 581 idList += std::to_string((*dsub)[i]->id); 582 if (i+1 < n) idList += ", "; 583 #endif 584 } 585 586 /* now we handle the aftermath */ 587 switch (joinMode) { 588 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: 589 { 590 PetscInt j = 0; 591 592 PetscAssert(n <= dctx->numChildren,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent",n,dctx->numChildren); 593 /* update child count while it's still fresh in memory */ 594 dctx->numChildren -= n; 595 for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) { 596 if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) { 597 /* child is one of ours, can destroy it */ 598 ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr); 599 /* reset the child slot */ 600 dctx->childIDs[i] = 0; 601 if (++j == n) break; 602 } 603 } 604 /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */ 605 PetscAssert(j == n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j); 606 ierr = PetscFree(*dsub);CHKERRQ(ierr); 607 } 608 break; 609 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 610 for (PetscInt i = 0; i < n; ++i) { 611 ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr); 612 } 613 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 614 break; 615 default: 616 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given"); 617 } 618 619 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 620 ierr = PetscInfo(nullptr,"Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr); 621 idList.clear(); 622 #endif 623 PetscFunctionReturn(0); 624 } 625 626 /*@C 627 PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished 628 629 Not Collective, Synchronous 630 631 Input Parameters: 632 . dctx - The PetscDeviceContext to synchronize 633 634 Level: beginner 635 636 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle() 637 @*/ 638 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 639 { 640 PetscFunctionBegin; 641 PetscValidDeviceContext(dctx,1); 642 /* if it isn't setup there is nothing to sync on */ 643 if (dctx->setup) {auto ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);} 644 PetscFunctionReturn(0); 645 } 646 647 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT 648 // REMOVE ME (change) 649 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING 650 651 static PetscDeviceType rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 652 static PetscStreamType rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 653 static PetscDeviceContext globalContext = nullptr; 654 655 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should 656 * match whatever device is eagerly intialized */ 657 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type) 658 { 659 PetscFunctionBegin; 660 PetscValidDeviceType(type,1); 661 rootDeviceType = type; 662 PetscFunctionReturn(0); 663 } 664 665 #if 0 666 /* currently unused */ 667 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type) 668 { 669 PetscFunctionBegin; 670 PetscValidStreamType(type,1); 671 rootStreamType = type; 672 PetscFunctionReturn(0); 673 } 674 #endif 675 676 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void) 677 { 678 PetscErrorCode ierr; 679 static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode { 680 PetscErrorCode ierr; 681 682 PetscFunctionBegin; 683 ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr); 684 rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 685 rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 686 PetscFunctionReturn(0); 687 }; 688 689 PetscFunctionBegin; 690 if (globalContext) PetscFunctionReturn(0); 691 /* this exists purely as a valid device check. */ 692 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 693 ierr = PetscRegisterFinalize(PetscDeviceContextFinalizer);CHKERRQ(ierr); 694 ierr = PetscInfo(nullptr,"Initializing global PetscDeviceContext\n");CHKERRQ(ierr); 695 /* we call the allocator directly here since the ObjectPool creates a PetscContainer which 696 * eventually tries to call logging functions. However, this routine may be purposefully 697 * called __before__ logging is initialized, so the logging function would PETSCABORT */ 698 ierr = contextPool.allocator().create(&globalContext);CHKERRQ(ierr); 699 ierr = PetscDeviceContextSetStreamType(globalContext,rootStreamType);CHKERRQ(ierr); 700 ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext,rootDeviceType);CHKERRQ(ierr); 701 ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 705 /*@C 706 PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext 707 708 Not Collective, Asynchronous 709 710 Output Parameter: 711 . dctx - The PetscDeviceContext 712 713 Notes: 714 The user generally should not destroy contexts retrieved with this routine unless they 715 themselves have created them. There exists no protection against destroying the root 716 context. 717 718 Developer Notes: 719 Unless the user has set their own, this routine creates the "root" context the first time it 720 is called, registering its destructor to PetscFinalize(). 721 722 Level: beginner 723 724 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(), 725 PetscDeviceContextJoin(), PetscDeviceContextCreate() 726 @*/ 727 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx) 728 { 729 PetscErrorCode ierr; 730 731 PetscFunctionBegin; 732 PetscValidPointer(dctx,1); 733 ierr = PetscDeviceContextSetupGlobalContext_Private();CHKERRQ(ierr); 734 /* while the static analyzer can find global variables, it will throw a warning about not 735 * being able to connect this back to the function arguments */ 736 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext,-1)); 737 *dctx = globalContext; 738 PetscFunctionReturn(0); 739 } 740 741 /*@C 742 PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext 743 744 Not Collective, Asynchronous 745 746 Input Parameter: 747 . dctx - The PetscDeviceContext 748 749 Notes: 750 This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined 751 implementation by calling this routine immediately after PetscInitialize() and ensuring that 752 PetscDevice is not greedily intialized. In this case the user is responsible for destroying 753 their PetscDeviceContext before PetscFinalize() returns. 754 755 The old context is not stored in any way by this routine; if one is overriding a context that 756 they themselves do not control, one should take care to temporarily store it by calling 757 PetscDeviceContextGetCurrentContext() before calling this routine. 758 759 Level: beginner 760 761 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(), 762 PetscDeviceContextJoin(), PetscDeviceContextCreate() 763 @*/ 764 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx) 765 { 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 PetscValidDeviceContext(dctx,1); 770 PetscAssert(dctx->setup,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context",dctx->id); 771 globalContext = dctx; 772 ierr = PetscInfo(nullptr,"Set global PetscDeviceContext id %" PetscInt_FMT "\n",dctx->id);CHKERRQ(ierr); 773 PetscFunctionReturn(0); 774 } 775 776 /*@C 777 PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database 778 779 Collective on comm, Asynchronous 780 781 Input Parameters: 782 + comm - MPI communicator on which to query the options database 783 . prefix - prefix to prepend to all options database queries, NULL if not needed 784 - dctx - The PetscDeviceContext to configure 785 786 Output Parameter: 787 . dctx - The PetscDeviceContext 788 789 Options Database: 790 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext - 791 PetscDeviceContextSetStreamType() 792 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType 793 794 Level: beginner 795 796 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice() 797 @*/ 798 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx) 799 { 800 PetscBool flag; 801 PetscInt stype,dtype; 802 PetscErrorCode ierr; 803 804 PetscFunctionBegin; 805 if (prefix) PetscValidCharPointer(prefix,2); 806 PetscValidDeviceContext(dctx,3); 807 ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr); 808 ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,PETSC_STREAM_MAX,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr); 809 if (flag) { 810 ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr); 811 } 812 ierr = PetscOptionsEList("-device_context_device_type","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceTypes+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE],&dtype,&flag);CHKERRQ(ierr); 813 if (flag) { 814 ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(dctx,static_cast<PetscDeviceType>(dtype+1));CHKERRQ(ierr); 815 } 816 ierr = PetscOptionsEnd();CHKERRQ(ierr); 817 PetscFunctionReturn(0); 818 } 819