1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/ 2 #include "objpool.hpp" 3 4 const char *const PetscStreamTypes[] = { 5 "global_blocking", 6 "default_blocking", 7 "global_nonblocking", 8 "max", 9 "PetscStreamType", 10 "PETSC_STREAM_", 11 PETSC_NULLPTR 12 }; 13 14 const char *const PetscDeviceContextJoinModes[] = { 15 "destroy", 16 "sync", 17 "no_sync", 18 "PetscDeviceContextJoinMode", 19 "PETSC_DEVICE_CONTEXT_JOIN_", 20 PETSC_NULLPTR 21 }; 22 23 /* Define the allocator */ 24 struct PetscDeviceContextAllocator : Petsc::Allocator<PetscDeviceContext> 25 { 26 static PetscInt PetscDeviceContextID; 27 28 PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) PETSC_NOEXCEPT 29 { 30 PetscDeviceContext dc; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = PetscNew(&dc);CHKERRQ(ierr); 35 dc->id = PetscDeviceContextID++; 36 dc->idle = PETSC_TRUE; 37 dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 38 *dctx = dc; 39 PetscFunctionReturn(0); 40 } 41 42 PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) PETSC_NOEXCEPT 43 { 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 if (PetscUnlikelyDebug(dctx->numChildren)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying",dctx->numChildren); 48 if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);} 49 ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr); 50 ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr); 51 ierr = PetscFree(dctx);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 55 PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) PETSC_NOEXCEPT 56 { 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 /* don't deallocate the child array, rather just zero it out */ 61 ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr); 62 dctx->setup = PETSC_FALSE; 63 dctx->numChildren = 0; 64 dctx->idle = PETSC_TRUE; 65 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 66 PetscFunctionReturn(0); 67 } 68 69 PETSC_NODISCARD static constexpr PetscErrorCode finalize() PETSC_NOEXCEPT { return 0; } 70 }; 71 /* an ID = 0 is invalid */ 72 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1; 73 74 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool; 75 76 /*@C 77 PetscDeviceContextCreate - Creates a PetscDeviceContext 78 79 Not Collective, Asynchronous 80 81 Output Paramemter: 82 . dctx - The PetscDeviceContext 83 84 Notes: 85 Unlike almost every other PETSc class it is advised that most users use 86 PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts 87 of different types are incompatible with one another; using 88 PetscDeviceContextDuplicate() ensures compatible types. 89 90 Level: beginner 91 92 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(), 93 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(), 94 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy() 95 @*/ 96 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 97 { 98 PetscErrorCode ierr; 99 100 PetscFunctionBegin; 101 PetscValidPointer(dctx,1); 102 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 103 ierr = contextPool.get(*dctx);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 /*@C 108 PetscDeviceContextDestroy - Frees a PetscDeviceContext 109 110 Not Collective, Asynchronous 111 112 Input Parameters: 113 . dctx - The PetscDeviceContext 114 115 Notes: 116 No implicit synchronization occurs due to this routine, all resources are released completely asynchronously 117 w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the 118 appropriate synchronization before calling this routine. 119 120 Developer Notes: 121 The context is never actually "destroyed", only returned to an ever growing pool of 122 contexts. There are currently no safeguards on the size of the pool, this should perhaps 123 be implemented. 124 125 Level: beginner 126 127 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize() 128 @*/ 129 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 130 { 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 if (!*dctx) PetscFunctionReturn(0); 135 ierr = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr); 136 *dctx = PETSC_NULLPTR; 137 PetscFunctionReturn(0); 138 } 139 140 /*@C 141 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext 142 143 Not Collective, Asynchronous 144 145 Input Parameters: 146 + dctx - The PetscDeviceContext 147 - type - The PetscStreamType 148 149 Notes: 150 See PetscStreamType in include/petscdevicetypes.h for more information on the available 151 types and their interactions. If the PetscDeviceContext was previously set up and stream 152 type was changed, you must call PetscDeviceContextSetUp() again after this routine. 153 154 Level: intermediate 155 156 .seealso: PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions() 157 @*/ 158 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 159 { 160 PetscFunctionBegin; 161 PetscValidDeviceContext(dctx,1); 162 PetscValidStreamType(type,2); 163 /* only need to do complex swapping if the object has already been setup */ 164 if (dctx->setup && (dctx->streamType != type)) { 165 PetscErrorCode ierr; 166 167 ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr); 168 dctx->setup = PETSC_FALSE; 169 } 170 dctx->streamType = type; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext 176 177 Not Collective, Asynchronous 178 179 Input Parameter: 180 . dctx - The PetscDeviceContext 181 182 Output Parameter: 183 . type - The PetscStreamType 184 185 Notes: 186 See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions 187 188 Level: intermediate 189 190 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions() 191 @*/ 192 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 193 { 194 PetscFunctionBegin; 195 PetscValidDeviceContext(dctx,1); 196 PetscValidIntPointer(type,2); 197 *type = dctx->streamType; 198 PetscFunctionReturn(0); 199 } 200 201 /*@C 202 PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext 203 204 Not Collective, Possibly Synchronous 205 206 Input Parameters: 207 + dctx - The PetscDeviceContext 208 - device - The PetscDevice 209 210 Notes: 211 This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext 212 must also have an attached PetscDevice). Unlike the usual set-type semantics, it is 213 not stricly necessary to set a contexts device to enable usage, any created device 214 contexts will always come equipped with the "default" device. 215 216 This routine is a no-op if dctx is already attached to device. 217 218 This routine may initialize the backend device and incur synchronization. 219 220 Level: intermediate 221 222 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice() 223 @*/ 224 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 225 { 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 PetscValidDeviceContext(dctx,1); 230 PetscValidDevice(device,2); 231 if (dctx->device) { 232 /* can't do a strict pointer equality check since PetscDevice's are reused */ 233 if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0); 234 } 235 ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr); 236 if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);} 237 ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr); 238 ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr); 239 ierr = PetscDeviceReference_Internal(device);CHKERRQ(ierr); 240 dctx->device = device; 241 dctx->setup = PETSC_FALSE; 242 PetscFunctionReturn(0); 243 } 244 245 /*@C 246 PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext 247 248 Not Collective, Asynchronous 249 250 Input Parameter: 251 . dctx - the PetscDeviceContext 252 253 Output Parameter: 254 . device - The PetscDevice 255 256 Notes: 257 This is a borrowed reference, the user should not destroy the device. 258 259 Level: intermediate 260 261 .seealso: PetscDeviceContextSetDevice(), PetscDevice 262 @*/ 263 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 264 { 265 PetscFunctionBegin; 266 PetscValidDeviceContext(dctx,1); 267 PetscValidPointer(device,2); 268 if (PetscUnlikelyDebug(!dctx->device)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get",dctx->id); 269 *device = dctx->device; 270 PetscFunctionReturn(0); 271 } 272 273 /*@C 274 PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use 275 276 Not Collective, Asynchronous 277 278 Input Parameter: 279 . dctx - The PetscDeviceContext 280 281 Developer Notes: 282 This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams, 283 events, and (possibly) handles. 284 285 Level: beginner 286 287 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions() 288 @*/ 289 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 290 { 291 PetscErrorCode ierr; 292 293 PetscFunctionBegin; 294 PetscValidDeviceContext(dctx,1); 295 if (!dctx->device) { 296 ierr = PetscInfo2(PETSC_NULLPTR,"PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceTypes[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr); 297 ierr = PetscDeviceContextSetDefaultDevice_Internal(dctx);CHKERRQ(ierr); 298 } 299 if (dctx->setup) PetscFunctionReturn(0); 300 ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr); 301 dctx->setup = PETSC_TRUE; 302 PetscFunctionReturn(0); 303 } 304 305 /*@C 306 PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object 307 308 Not Collective, Asynchronous 309 310 Input Parameter: 311 . dctx - The PetscDeviceContext to duplicate 312 313 Output Paramter: 314 . strmdup - The duplicated PetscDeviceContext 315 316 Notes: 317 This is a shorthand method for creating a PetscDeviceContext with the exact same 318 settings as another. Note however that the duplicated PetscDeviceContext does not "share" 319 any of the underlying data with the original, (including its current stream-state) they 320 are completely separate objects. 321 322 Level: beginner 323 324 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType() 325 @*/ 326 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 327 { 328 PetscDeviceContext dup; 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 PetscValidDeviceContext(dctx,1); 333 PetscValidPointer(dctxdup,2); 334 ierr = PetscDeviceContextCreate(&dup);CHKERRQ(ierr); 335 ierr = PetscDeviceContextSetStreamType(dup,dctx->streamType);CHKERRQ(ierr); 336 if (dctx->device) {ierr = PetscDeviceContextSetDevice(dup,dctx->device);CHKERRQ(ierr);} 337 ierr = PetscDeviceContextSetUp(dup);CHKERRQ(ierr); 338 *dctxdup = dup; 339 PetscFunctionReturn(0); 340 } 341 342 /*@C 343 PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle 344 345 Not Collective, Asynchronous 346 347 Input Parameter: 348 . dctx - The PetscDeviceContext object 349 350 Output Parameter: 351 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work 352 353 Notes: 354 This routine only refers a singular context and does NOT take any of its children into account. That is, if dctx is 355 idle but has dependents who do have work, this routine still returns PETSC_TRUE. 356 357 Results of PetscDeviceContextQueryIdle() are cached on return, allowing this function to be called repeatedly in an 358 efficient manner. When debug mode is enabled this cache is verified on every call to 359 this routine, but is blindly believed when debugging is disabled. 360 361 Level: intermediate 362 363 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork() 364 @*/ 365 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 366 { 367 PetscErrorCode ierr; 368 369 PetscFunctionBegin; 370 PetscValidDeviceContext(dctx,1); 371 PetscValidBoolPointer(idle,2); 372 if (dctx->idle) { 373 *idle = PETSC_TRUE; 374 ierr = PetscDeviceContextValidateIdle_Internal(dctx);CHKERRQ(ierr); 375 } else { 376 ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr); 377 dctx->idle = *idle; 378 } 379 PetscFunctionReturn(0); 380 } 381 382 /*@C 383 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 384 385 Not Collective, Asynchronous 386 387 Input Parameters: 388 + dctxa - The PetscDeviceContext object that is waiting 389 - dctxb - The PetscDeviceContext object that is being waited on 390 391 Notes: 392 Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was 393 called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments. 394 395 Level: beginner 396 397 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin() 398 @*/ 399 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 400 { 401 PetscErrorCode ierr; 402 403 PetscFunctionBegin; 404 PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2); 405 if (dctxa == dctxb) PetscFunctionReturn(0); 406 if (dctxb->idle) { 407 /* No need to do the extra function lookup and event record if the stream were waiting on isn't doing anything */ 408 ierr = PetscDeviceContextValidateIdle_Internal(dctxb);CHKERRQ(ierr); 409 } else { 410 ierr = (*dctxa->ops->waitforctx)(dctxa,dctxb);CHKERRQ(ierr); 411 } 412 PetscFunctionReturn(0); 413 } 414 415 /*@C 416 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 417 418 Not Collective, Asynchronous 419 420 Input Parameters: 421 + dctx - The parent PetscDeviceContext 422 - n - The number of children to create 423 424 Output Parameter: 425 . dsub - The created child context(s) 426 427 Notes: 428 This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning 429 that work queued on child contexts will not start until the parent context finishes its work. This accounts for work 430 queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children. 431 432 Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects 433 to free all of it's children (and ONLY its children) before itself is freed. 434 435 DAG representation: 436 .vb 437 time -> 438 439 -> dctx \----> dctx ------> 440 \---> dsub[0] ---> 441 \--> ... -------> 442 \-> dsub[n-1] -> 443 .ve 444 445 Level: intermediate 446 447 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle() 448 @*/ 449 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 450 { 451 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 452 const PetscInt nBefore = n; 453 static std::string idList; 454 #endif 455 PetscDeviceContext *dsubTmp = PETSC_NULLPTR; 456 PetscInt i = 0; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 PetscValidDeviceContext(dctx,1); 461 PetscValidPointer(dsub,3); 462 if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %" PetscInt_FMT " < 0",n); 463 #if PETSC_USE_DEBUG_AND_INFO 464 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 465 idList.reserve(4*n); 466 #endif 467 /* update child totals */ 468 dctx->numChildren += n; 469 /* now to find out if we have room */ 470 if (dctx->numChildren > dctx->maxNumChildren) { 471 /* no room, either from having too many kids or not having any */ 472 if (dctx->childIDs) { 473 /* have existing children, must reallocate them */ 474 ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr); 475 /* clear the extra memory since realloc doesn't do it for us */ 476 ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr); 477 } else { 478 /* have no children */ 479 ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr); 480 } 481 /* update total number of children */ 482 dctx->maxNumChildren = dctx->numChildren; 483 } 484 ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr); 485 while (n) { 486 /* empty child slot */ 487 if (!(dctx->childIDs[i])) { 488 /* create the child context in the image of its parent */ 489 ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr); 490 ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr); 491 /* register the child with its parent */ 492 dctx->childIDs[i] = dsubTmp[i]->id; 493 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 494 idList += std::to_string(dsubTmp[i]->id); 495 if (n != 1) idList += ", "; 496 #endif 497 --n; 498 } 499 ++i; 500 } 501 #if PETSC_USE_DEBUG_AND_INFO 502 ierr = PetscInfo3(PETSC_NULLPTR,"Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr); 503 /* resets the size but doesn't deallocate the memory */ 504 idList.clear(); 505 #endif 506 /* pass the children back to caller */ 507 *dsub = dsubTmp; 508 PetscFunctionReturn(0); 509 } 510 511 /*@C 512 PetscDeviceContextJoin - Converge a set of child contexts 513 514 Not Collective, Asynchronous 515 516 Input Parameters: 517 + dctx - A PetscDeviceContext to converge on 518 . n - The number of sub contexts to converge 519 . joinMode - The type of join to perform 520 - dsub - The sub contexts to converge 521 522 Notes: 523 If PetscDeviceContextFork() creates n edges from a source node which all depend on the 524 source node, then this routine is the exact mirror. That is, it creates a node 525 (represented in dctx) which recieves n edges (and optionally destroys them) which is 526 dependent on the completion of all incoming edges. 527 528 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed 529 by this routine. Thus all sub contexts must have been created with the dctx passed to 530 this routine. 531 532 if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the 533 sub contexts do not wait for one another afterwards. 534 535 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally 536 wait on dctx after converging. This has the effect of "synchronizing" the outgoing 537 edges. 538 539 DAG representations: 540 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY 541 .vb 542 time -> 543 544 -> dctx ---------/- dctx -> 545 -> dsub[0] -----/ 546 -> ... -------/ 547 -> dsub[n-1] -/ 548 .ve 549 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC 550 .vb 551 time -> 552 553 -> dctx ---------/- dctx -> 554 -> dsub[0] -----/---------> 555 -> ... -------/----------> 556 -> dsub[n-1] -/-----------> 557 .ve 558 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC 559 .vb 560 time -> 561 562 -> dctx ---------/- dctx -\----> dctx ------> 563 -> dsub[0] -----/ \---> dsub[0] ---> 564 -> ... -------/ \--> ... -------> 565 -> dsub[n-1] -/ \-> dsub[n-1] -> 566 .ve 567 568 Level: intermediate 569 570 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode 571 @*/ 572 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 573 { 574 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 575 static std::string idList; 576 #endif 577 PetscErrorCode ierr; 578 579 PetscFunctionBegin; 580 /* validity of dctx is checked in the wait-for loop */ 581 PetscValidPointer(dsub,4); 582 if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %" PetscInt_FMT " < 0",n); 583 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 584 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 585 idList.reserve(4*n); 586 #endif 587 /* first dctx waits on all the incoming edges */ 588 for (PetscInt i = 0; i < n; ++i) { 589 PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4); 590 ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr); 591 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 592 idList += std::to_string((*dsub)[i]->id); 593 if (i+1 < n) idList += ", "; 594 #endif 595 } 596 597 /* now we handle the aftermath */ 598 switch (joinMode) { 599 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: 600 { 601 PetscInt j = 0; 602 603 if (PetscUnlikelyDebug(n > dctx->numChildren)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent",n,dctx->numChildren); 604 /* update child count while it's still fresh in memory */ 605 dctx->numChildren -= n; 606 for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) { 607 if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) { 608 /* child is one of ours, can destroy it */ 609 ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr); 610 /* reset the child slot */ 611 dctx->childIDs[i] = 0; 612 if (++j == n) break; 613 } 614 } 615 /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */ 616 if (PetscUnlikelyDebug(j != n)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j); 617 ierr = PetscFree(*dsub);CHKERRQ(ierr); 618 } 619 break; 620 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 621 for (PetscInt i = 0; i < n; ++i) { 622 ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr); 623 } 624 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 625 break; 626 default: 627 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given"); 628 } 629 630 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 631 ierr = PetscInfo4(PETSC_NULLPTR,"Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr); 632 idList.clear(); 633 #endif 634 PetscFunctionReturn(0); 635 } 636 637 /*@C 638 PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished 639 640 Not Collective, Synchronous 641 642 Input Parameters: 643 . dctx - The PetscDeviceContext to synchronize 644 645 Level: beginner 646 647 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle() 648 @*/ 649 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 650 { 651 PetscErrorCode ierr; 652 653 PetscFunctionBegin; 654 PetscValidDeviceContext(dctx,1); 655 /* if it isn't setup there is nothing to sync on */ 656 if (dctx->setup) {ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);} 657 dctx->idle = PETSC_TRUE; 658 PetscFunctionReturn(0); 659 } 660 661 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT 662 // REMOVE ME (change) 663 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING 664 665 static PetscDeviceType rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 666 static PetscStreamType rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 667 static PetscDeviceContext globalContext = PETSC_NULLPTR; 668 669 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should 670 * match whatever device is eagerly intialized */ 671 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type) 672 { 673 PetscFunctionBegin; 674 PetscValidDeviceType(type,1); 675 rootDeviceType = type; 676 PetscFunctionReturn(0); 677 } 678 679 #if 0 680 /* currently unused */ 681 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type) 682 { 683 PetscFunctionBegin; 684 PetscValidStreamType(type,1); 685 rootStreamType = type; 686 PetscFunctionReturn(0); 687 } 688 #endif 689 690 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void) 691 { 692 PetscErrorCode ierr; 693 static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode { 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr); 698 rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 699 rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 700 PetscFunctionReturn(0); 701 }; 702 703 PetscFunctionBegin; 704 if (globalContext) PetscFunctionReturn(0); 705 /* this exists purely as a valid device check. */ 706 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 707 ierr = PetscRegisterFinalize(PetscDeviceContextFinalizer);CHKERRQ(ierr); 708 ierr = PetscInfo(PETSC_NULLPTR,"Initializing global PetscDeviceContext\n");CHKERRQ(ierr); 709 /* we call the allocator directly here since the ObjectPool creates a PetscContainer which 710 * eventually tries to call logging functions. However, this routine may be purposefully 711 * called __before__ logging is initialized, so the logging function would PETSCABORT */ 712 ierr = PetscDeviceContextAllocator::create(&globalContext);CHKERRQ(ierr); 713 ierr = PetscDeviceContextSetStreamType(globalContext,rootStreamType);CHKERRQ(ierr); 714 ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext,rootDeviceType);CHKERRQ(ierr); 715 ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 /*@C 720 PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext 721 722 Not Collective, Asynchronous 723 724 Output Parameter: 725 . dctx - The PetscDeviceContext 726 727 Notes: 728 The user generally should not destroy contexts retrieved with this routine unless they 729 themselves have created them. There exists no protection against destroying the root 730 context. 731 732 Developer Notes: 733 Unless the user has set their own, this routine creates the "root" context the first time it 734 is called, registering its destructor to PetscFinalize(). 735 736 Level: beginner 737 738 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(), 739 PetscDeviceContextJoin(), PetscDeviceContextCreate() 740 @*/ 741 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx) 742 { 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 PetscValidPointer(dctx,1); 747 ierr = PetscDeviceContextSetupGlobalContext_Private();CHKERRQ(ierr); 748 /* while the static analyzer can find global variables, it will throw a warning about not 749 * being able to connect this back to the function arguments */ 750 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext,-1)); 751 *dctx = globalContext; 752 PetscFunctionReturn(0); 753 } 754 755 /*@C 756 PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext 757 758 Not Collective, Asynchronous 759 760 Input Parameter: 761 . dctx - The PetscDeviceContext 762 763 Notes: 764 This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined 765 implementation by calling this routine immediately after PetscInitialize() and ensuring that 766 PetscDevice is not greedily intialized. In this case the user is responsible for destroying 767 their PetscDeviceContext before PetscFinalize() returns. 768 769 The old context is not stored in any way by this routine; if one is overriding a context that 770 they themselves do not control, one should take care to temporarily store it by calling 771 PetscDeviceContextGetCurrentContext() before calling this routine. 772 773 Level: beginner 774 775 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(), 776 PetscDeviceContextJoin(), PetscDeviceContextCreate() 777 @*/ 778 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx) 779 { 780 PetscErrorCode ierr; 781 782 PetscFunctionBegin; 783 PetscValidDeviceContext(dctx,1); 784 if (PetscUnlikelyDebug(!dctx->setup)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context",dctx->id); 785 globalContext = dctx; 786 ierr = PetscInfo1(PETSC_NULLPTR,"Set global PetscDeviceContext id %" PetscInt_FMT "\n",dctx->id);CHKERRQ(ierr); 787 PetscFunctionReturn(0); 788 } 789 790 /*@C 791 PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database 792 793 Collective on comm, Asynchronous 794 795 Input Parameters: 796 + comm - MPI communicator on which to query the options database 797 . prefix - prefix to prepend to all options database queries, NULL if not needed 798 - dctx - The PetscDeviceContext to configure 799 800 Output Parameter: 801 . dctx - The PetscDeviceContext 802 803 Options Database: 804 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext - 805 PetscDeviceContextSetStreamType() 806 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType 807 808 Level: beginner 809 810 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice() 811 @*/ 812 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx) 813 { 814 PetscBool flag; 815 PetscInt stype,dtype; 816 PetscErrorCode ierr; 817 818 PetscFunctionBegin; 819 if (prefix) PetscValidCharPointer(prefix,2); 820 PetscValidDeviceContext(dctx,3); 821 ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr); 822 ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,PETSC_STREAM_MAX,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr); 823 if (flag) { 824 ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr); 825 } 826 ierr = PetscOptionsEList("-device_context_device_type","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceTypes+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE],&dtype,&flag);CHKERRQ(ierr); 827 if (flag) { 828 ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(dctx,static_cast<PetscDeviceType>(dtype+1));CHKERRQ(ierr); 829 } 830 ierr = PetscOptionsEnd();CHKERRQ(ierr); 831 PetscFunctionReturn(0); 832 } 833