1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/ 2 #include "objpool.hpp" 3 4 const char *const PetscStreamTypes[] = { 5 "global_blocking", 6 "default_blocking", 7 "global_nonblocking", 8 "max", 9 "PetscStreamType", 10 "PETSC_STREAM_", 11 PETSC_NULLPTR 12 }; 13 14 const char *const PetscDeviceContextJoinModes[] = { 15 "destroy", 16 "sync", 17 "no_sync", 18 "PetscDeviceContextJoinMode", 19 "PETSC_DEVICE_CONTEXT_JOIN_", 20 PETSC_NULLPTR 21 }; 22 23 /* Define the allocator */ 24 struct PetscDeviceContextAllocator : Petsc::Allocator<PetscDeviceContext> 25 { 26 static PetscInt PetscDeviceContextID; 27 28 PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) PETSC_NOEXCEPT 29 { 30 PetscDeviceContext dc; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = PetscNew(&dc);CHKERRQ(ierr); 35 dc->id = PetscDeviceContextID++; 36 dc->idle = PETSC_TRUE; 37 dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 38 *dctx = dc; 39 PetscFunctionReturn(0); 40 } 41 42 PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) PETSC_NOEXCEPT 43 { 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 if (PetscUnlikelyDebug(dctx->numChildren)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying",dctx->numChildren); 48 if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);} 49 ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr); 50 ierr = PetscFree(dctx->childIDs);CHKERRQ(ierr); 51 ierr = PetscFree(dctx);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 55 PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) PETSC_NOEXCEPT 56 { 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 /* don't deallocate the child array, rather just zero it out */ 61 ierr = PetscArrayzero(dctx->childIDs,dctx->maxNumChildren);CHKERRQ(ierr); 62 dctx->setup = PETSC_FALSE; 63 dctx->numChildren = 0; 64 dctx->idle = PETSC_TRUE; 65 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 66 PetscFunctionReturn(0); 67 } 68 69 PETSC_NODISCARD static constexpr PetscErrorCode finalize() PETSC_NOEXCEPT { return 0; } 70 }; 71 /* an ID = 0 is invalid */ 72 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1; 73 74 static Petsc::ObjectPool<PetscDeviceContext,PetscDeviceContextAllocator> contextPool; 75 76 /*@C 77 PetscDeviceContextCreate - Creates a PetscDeviceContext 78 79 Not Collective, Asynchronous 80 81 Output Paramemter: 82 . dctx - The PetscDeviceContext 83 84 Notes: 85 Unlike almost every other PETSc class it is advised that most users use 86 PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts 87 of different types are incompatible with one another; using 88 PetscDeviceContextDuplicate() ensures compatible types. 89 90 Level: beginner 91 92 .seealso: PetscDeviceContextDuplicate(), PetscDeviceContextSetDevice(), 93 PetscDeviceContextSetStreamType(), PetscDeviceContextSetUp(), 94 PetscDeviceContextSetFromOptions(), PetscDeviceContextDestroy() 95 @*/ 96 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 97 { 98 PetscErrorCode ierr; 99 100 PetscFunctionBegin; 101 PetscValidPointer(dctx,1); 102 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 103 ierr = contextPool.get(*dctx);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 /*@C 108 PetscDeviceContextDestroy - Frees a PetscDeviceContext 109 110 Not Collective, Asynchronous 111 112 Input Parameters: 113 . dctx - The PetscDeviceContext 114 115 Notes: 116 No implicit synchronization occurs due to this routine, all resources are released completely asynchronously 117 w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the 118 appropriate synchronization before calling this routine. 119 120 Developer Notes: 121 The context is never actually "destroyed", only returned to an ever growing pool of 122 contexts. There are currently no safeguards on the size of the pool, this should perhaps 123 be implemented. 124 125 Level: beginner 126 127 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetUp(), PetscDeviceContextSynchronize() 128 @*/ 129 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 130 { 131 PetscErrorCode ierr; 132 133 PetscFunctionBegin; 134 if (!*dctx) PetscFunctionReturn(0); 135 ierr = contextPool.reclaim(std::move(*dctx));CHKERRQ(ierr); 136 *dctx = PETSC_NULLPTR; 137 PetscFunctionReturn(0); 138 } 139 140 /*@C 141 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext 142 143 Not Collective, Asynchronous 144 145 Input Parameters: 146 + dctx - The PetscDeviceContext 147 - type - The PetscStreamType 148 149 Notes: 150 See PetscStreamType in include/petscdevicetypes.h for more information on the available 151 types and their interactions. If the PetscDeviceContext was previously set up and stream 152 type was changed, you must call PetscDeviceContextSetUp() again after this routine. 153 154 Level: intermediate 155 156 .seealso: PetscDeviceContextGetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetUp(), PetscDeviceContextSetFromOptions() 157 @*/ 158 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 159 { 160 PetscFunctionBegin; 161 PetscValidDeviceContext(dctx,1); 162 PetscValidStreamType(type,2); 163 /* only need to do complex swapping if the object has already been setup */ 164 if (dctx->setup && (dctx->streamType != type)) { 165 PetscErrorCode ierr; 166 167 ierr = (*dctx->ops->changestreamtype)(dctx,type);CHKERRQ(ierr); 168 dctx->setup = PETSC_FALSE; 169 } 170 dctx->streamType = type; 171 PetscFunctionReturn(0); 172 } 173 174 /*@C 175 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext 176 177 Not Collective, Asynchronous 178 179 Input Parameter: 180 . dctx - The PetscDeviceContext 181 182 Output Parameter: 183 . type - The PetscStreamType 184 185 Notes: 186 See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions 187 188 Level: intermediate 189 190 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextCreate(), PetscDeviceContextSetFromOptions() 191 @*/ 192 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 193 { 194 PetscFunctionBegin; 195 PetscValidDeviceContext(dctx,1); 196 PetscValidIntPointer(type,2); 197 *type = dctx->streamType; 198 PetscFunctionReturn(0); 199 } 200 201 /*@C 202 PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext 203 204 Not Collective, Possibly Synchronous 205 206 Input Parameters: 207 + dctx - The PetscDeviceContext 208 - device - The PetscDevice 209 210 Notes: 211 This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext 212 must also have an attached PetscDevice). Unlike the usual set-type semantics, it is 213 not stricly necessary to set a contexts device to enable usage, any created device 214 contexts will always come equipped with the "default" device. 215 216 This routine is a no-op if dctx is already attached to device. 217 218 This routine may initialize the backend device and incur synchronization. 219 220 Level: intermediate 221 222 .seealso: PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceContextGetDevice() 223 @*/ 224 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 225 { 226 PetscErrorCode ierr; 227 228 PetscFunctionBegin; 229 PetscValidDeviceContext(dctx,1); 230 PetscValidDevice(device,2); 231 if (dctx->device) { 232 /* can't do a strict pointer equality check since PetscDevice's are reused */ 233 if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0); 234 } 235 ierr = PetscDeviceDestroy(&dctx->device);CHKERRQ(ierr); 236 if (dctx->ops->destroy) {ierr = (*dctx->ops->destroy)(dctx);CHKERRQ(ierr);} 237 ierr = PetscMemzero(dctx->ops,sizeof(*dctx->ops));CHKERRQ(ierr); 238 ierr = (*device->ops->createcontext)(dctx);CHKERRQ(ierr); 239 ierr = PetscDeviceReference_Internal(device);CHKERRQ(ierr); 240 dctx->device = device; 241 dctx->setup = PETSC_FALSE; 242 PetscFunctionReturn(0); 243 } 244 245 /*@C 246 PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext 247 248 Not Collective, Asynchronous 249 250 Input Parameter: 251 . dctx - the PetscDeviceContext 252 253 Output Parameter: 254 . device - The PetscDevice 255 256 Notes: 257 This is a borrowed reference, the user should not destroy the device. 258 259 Level: intermediate 260 261 .seealso: PetscDeviceContextSetDevice(), PetscDevice 262 @*/ 263 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 264 { 265 PetscFunctionBegin; 266 PetscValidDeviceContext(dctx,1); 267 PetscValidPointer(device,2); 268 if (PetscUnlikelyDebug(!dctx->device)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get",dctx->id); 269 *device = dctx->device; 270 PetscFunctionReturn(0); 271 } 272 273 /*@C 274 PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use 275 276 Not Collective, Asynchronous 277 278 Input Parameter: 279 . dctx - The PetscDeviceContext 280 281 Developer Notes: 282 This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams, 283 events, and (possibly) handles. 284 285 Level: beginner 286 287 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextDestroy(), PetscDeviceContextSetFromOptions() 288 @*/ 289 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 290 { 291 PetscErrorCode ierr; 292 293 PetscFunctionBegin; 294 PetscValidDeviceContext(dctx,1); 295 if (!dctx->device) { 296 ierr = PetscInfo2(PETSC_NULLPTR,"PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n",dctx->id,PetscDeviceTypes[PETSC_DEVICE_DEFAULT]);CHKERRQ(ierr); 297 ierr = PetscDeviceContextSetDefaultDevice_Internal(dctx);CHKERRQ(ierr); 298 } 299 if (dctx->setup) PetscFunctionReturn(0); 300 ierr = (*dctx->ops->setup)(dctx);CHKERRQ(ierr); 301 dctx->setup = PETSC_TRUE; 302 PetscFunctionReturn(0); 303 } 304 305 /*@C 306 PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object 307 308 Not Collective, Asynchronous 309 310 Input Parameter: 311 . dctx - The PetscDeviceContext to duplicate 312 313 Output Paramter: 314 . dctxdup - The duplicated PetscDeviceContext 315 316 Notes: 317 This is a shorthand method for creating a PetscDeviceContext with the exact same 318 settings as another. Note however that the duplicated PetscDeviceContext does not "share" 319 any of the underlying data with the original, (including its current stream-state) they 320 are completely separate objects. 321 322 Level: beginner 323 324 .seealso: PetscDeviceContextCreate(), PetscDeviceContextSetDevice(), PetscDeviceContextSetStreamType() 325 @*/ 326 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 327 { 328 PetscDeviceContext dup; 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 PetscValidDeviceContext(dctx,1); 333 PetscValidPointer(dctxdup,2); 334 ierr = PetscDeviceContextCreate(&dup);CHKERRQ(ierr); 335 ierr = PetscDeviceContextSetStreamType(dup,dctx->streamType);CHKERRQ(ierr); 336 if (dctx->device) {ierr = PetscDeviceContextSetDevice(dup,dctx->device);CHKERRQ(ierr);} 337 ierr = PetscDeviceContextSetUp(dup);CHKERRQ(ierr); 338 *dctxdup = dup; 339 PetscFunctionReturn(0); 340 } 341 342 /*@C 343 PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle 344 345 Not Collective, Asynchronous 346 347 Input Parameter: 348 . dctx - The PetscDeviceContext object 349 350 Output Parameter: 351 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work 352 353 Notes: 354 This routine only refers a singular context and does NOT take any of its children into account. That is, if dctx is 355 idle but has dependents who do have work, this routine still returns PETSC_TRUE. 356 357 Results of PetscDeviceContextQueryIdle() are cached on return, allowing this function to be called repeatedly in an 358 efficient manner. When debug mode is enabled this cache is verified on every call to 359 this routine, but is blindly believed when debugging is disabled. 360 361 Level: intermediate 362 363 .seealso: PetscDeviceContextCreate(), PetscDeviceContextWaitForContext(), PetscDeviceContextFork() 364 @*/ 365 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 366 { 367 PetscErrorCode ierr; 368 369 PetscFunctionBegin; 370 PetscValidDeviceContext(dctx,1); 371 PetscValidBoolPointer(idle,2); 372 if (dctx->idle) { 373 *idle = PETSC_TRUE; 374 ierr = PetscDeviceContextValidateIdle_Internal(dctx);CHKERRQ(ierr); 375 } else { 376 ierr = (*dctx->ops->query)(dctx,idle);CHKERRQ(ierr); 377 dctx->idle = *idle; 378 } 379 PetscFunctionReturn(0); 380 } 381 382 /*@C 383 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 384 385 Not Collective, Asynchronous 386 387 Input Parameters: 388 + dctxa - The PetscDeviceContext object that is waiting 389 - dctxb - The PetscDeviceContext object that is being waited on 390 391 Notes: 392 Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was 393 called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments. 394 395 Level: beginner 396 397 .seealso: PetscDeviceContextCreate(), PetscDeviceContextQueryIdle(), PetscDeviceContextJoin() 398 @*/ 399 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 400 { 401 PetscErrorCode ierr; 402 403 PetscFunctionBegin; 404 PetscCheckCompatibleDeviceContexts(dctxa,1,dctxb,2); 405 if (dctxa == dctxb) PetscFunctionReturn(0); 406 if (dctxb->idle) { 407 /* No need to do the extra function lookup and event record if the stream were waiting on isn't doing anything */ 408 ierr = PetscDeviceContextValidateIdle_Internal(dctxb);CHKERRQ(ierr); 409 } else { 410 ierr = (*dctxa->ops->waitforctx)(dctxa,dctxb);CHKERRQ(ierr); 411 } 412 PetscFunctionReturn(0); 413 } 414 415 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO)) 416 #if PETSC_USE_DEBUG_AND_INFO 417 #include <string> 418 #endif 419 /*@C 420 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 421 422 Not Collective, Asynchronous 423 424 Input Parameters: 425 + dctx - The parent PetscDeviceContext 426 - n - The number of children to create 427 428 Output Parameter: 429 . dsub - The created child context(s) 430 431 Notes: 432 This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning 433 that work queued on child contexts will not start until the parent context finishes its work. This accounts for work 434 queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children. 435 436 Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects 437 to free all of it's children (and ONLY its children) before itself is freed. 438 439 DAG representation: 440 .vb 441 time -> 442 443 -> dctx \----> dctx ------> 444 \---> dsub[0] ---> 445 \--> ... -------> 446 \-> dsub[n-1] -> 447 .ve 448 449 Level: intermediate 450 451 .seealso: PetscDeviceContextJoin(), PetscDeviceContextSynchronize(), PetscDeviceContextQueryIdle() 452 @*/ 453 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 454 { 455 #if PETSC_USE_DEBUG_AND_INFO 456 const PetscInt nBefore = n; 457 static std::string idList; 458 #endif 459 PetscDeviceContext *dsubTmp = PETSC_NULLPTR; 460 PetscInt i = 0; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 PetscValidDeviceContext(dctx,1); 465 PetscValidPointer(dsub,3); 466 if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts requested %" PetscInt_FMT " < 0",n); 467 #if PETSC_USE_DEBUG_AND_INFO 468 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 469 idList.reserve(4*n); 470 #endif 471 /* update child totals */ 472 dctx->numChildren += n; 473 /* now to find out if we have room */ 474 if (dctx->numChildren > dctx->maxNumChildren) { 475 /* no room, either from having too many kids or not having any */ 476 if (dctx->childIDs) { 477 /* have existing children, must reallocate them */ 478 ierr = PetscRealloc(dctx->numChildren*sizeof(*dctx->childIDs),&dctx->childIDs);CHKERRQ(ierr); 479 /* clear the extra memory since realloc doesn't do it for us */ 480 ierr = PetscArrayzero((dctx->childIDs)+(dctx->maxNumChildren),(dctx->numChildren)-(dctx->maxNumChildren));CHKERRQ(ierr); 481 } else { 482 /* have no children */ 483 ierr = PetscCalloc1(dctx->numChildren,&dctx->childIDs);CHKERRQ(ierr); 484 } 485 /* update total number of children */ 486 dctx->maxNumChildren = dctx->numChildren; 487 } 488 ierr = PetscMalloc1(n,&dsubTmp);CHKERRQ(ierr); 489 while (n) { 490 /* empty child slot */ 491 if (!(dctx->childIDs[i])) { 492 /* create the child context in the image of its parent */ 493 ierr = PetscDeviceContextDuplicate(dctx,dsubTmp+i);CHKERRQ(ierr); 494 ierr = PetscDeviceContextWaitForContext(dsubTmp[i],dctx);CHKERRQ(ierr); 495 /* register the child with its parent */ 496 dctx->childIDs[i] = dsubTmp[i]->id; 497 #if PETSC_USE_DEBUG_AND_INFO 498 idList += std::to_string(dsubTmp[i]->id); 499 if (n != 1) idList += ", "; 500 #endif 501 --n; 502 } 503 ++i; 504 } 505 #if PETSC_USE_DEBUG_AND_INFO 506 ierr = PetscInfo3(PETSC_NULLPTR,"Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n",nBefore,dctx->id,idList.c_str());CHKERRQ(ierr); 507 /* resets the size but doesn't deallocate the memory */ 508 idList.clear(); 509 #endif 510 /* pass the children back to caller */ 511 *dsub = dsubTmp; 512 PetscFunctionReturn(0); 513 } 514 515 /*@C 516 PetscDeviceContextJoin - Converge a set of child contexts 517 518 Not Collective, Asynchronous 519 520 Input Parameters: 521 + dctx - A PetscDeviceContext to converge on 522 . n - The number of sub contexts to converge 523 . joinMode - The type of join to perform 524 - dsub - The sub contexts to converge 525 526 Notes: 527 If PetscDeviceContextFork() creates n edges from a source node which all depend on the 528 source node, then this routine is the exact mirror. That is, it creates a node 529 (represented in dctx) which recieves n edges (and optionally destroys them) which is 530 dependent on the completion of all incoming edges. 531 532 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed 533 by this routine. Thus all sub contexts must have been created with the dctx passed to 534 this routine. 535 536 if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the 537 sub contexts do not wait for one another afterwards. 538 539 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally 540 wait on dctx after converging. This has the effect of "synchronizing" the outgoing 541 edges. 542 543 DAG representations: 544 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY 545 .vb 546 time -> 547 548 -> dctx ---------/- dctx -> 549 -> dsub[0] -----/ 550 -> ... -------/ 551 -> dsub[n-1] -/ 552 .ve 553 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC 554 .vb 555 time -> 556 557 -> dctx ---------/- dctx -> 558 -> dsub[0] -----/---------> 559 -> ... -------/----------> 560 -> dsub[n-1] -/-----------> 561 .ve 562 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC 563 .vb 564 time -> 565 566 -> dctx ---------/- dctx -\----> dctx ------> 567 -> dsub[0] -----/ \---> dsub[0] ---> 568 -> ... -------/ \--> ... -------> 569 -> dsub[n-1] -/ \-> dsub[n-1] -> 570 .ve 571 572 Level: intermediate 573 574 .seealso: PetscDeviceContextFork(), PetscDeviceContextSynchronize(), PetscDeviceContextJoinMode 575 @*/ 576 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 577 { 578 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 579 static std::string idList; 580 #endif 581 PetscErrorCode ierr; 582 583 PetscFunctionBegin; 584 /* validity of dctx is checked in the wait-for loop */ 585 PetscValidPointer(dsub,4); 586 if (PetscUnlikelyDebug(n < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of contexts merged %" PetscInt_FMT " < 0",n); 587 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 588 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 589 idList.reserve(4*n); 590 #endif 591 /* first dctx waits on all the incoming edges */ 592 for (PetscInt i = 0; i < n; ++i) { 593 PetscCheckCompatibleDeviceContexts(dctx,1,(*dsub)[i],4); 594 ierr = PetscDeviceContextWaitForContext(dctx,(*dsub)[i]);CHKERRQ(ierr); 595 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 596 idList += std::to_string((*dsub)[i]->id); 597 if (i+1 < n) idList += ", "; 598 #endif 599 } 600 601 /* now we handle the aftermath */ 602 switch (joinMode) { 603 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: 604 { 605 PetscInt j = 0; 606 607 if (PetscUnlikelyDebug(n > dctx->numChildren)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent",n,dctx->numChildren); 608 /* update child count while it's still fresh in memory */ 609 dctx->numChildren -= n; 610 for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) { 611 if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) { 612 /* child is one of ours, can destroy it */ 613 ierr = PetscDeviceContextDestroy((*dsub)+j);CHKERRQ(ierr); 614 /* reset the child slot */ 615 dctx->childIDs[i] = 0; 616 if (++j == n) break; 617 } 618 } 619 /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */ 620 if (PetscUnlikelyDebug(j != n)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.",n-j); 621 ierr = PetscFree(*dsub);CHKERRQ(ierr); 622 } 623 break; 624 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 625 for (PetscInt i = 0; i < n; ++i) { 626 ierr = PetscDeviceContextWaitForContext((*dsub)[i],dctx);CHKERRQ(ierr); 627 } 628 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 629 break; 630 default: 631 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown PetscDeviceContextJoinMode given"); 632 } 633 634 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 635 ierr = PetscInfo4(PETSC_NULLPTR,"Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n",n,dctx->id,PetscDeviceContextJoinModes[joinMode],idList.c_str());CHKERRQ(ierr); 636 idList.clear(); 637 #endif 638 PetscFunctionReturn(0); 639 } 640 641 /*@C 642 PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished 643 644 Not Collective, Synchronous 645 646 Input Parameters: 647 . dctx - The PetscDeviceContext to synchronize 648 649 Level: beginner 650 651 .seealso: PetscDeviceContextFork(), PetscDeviceContextJoin(), PetscDeviceContextQueryIdle() 652 @*/ 653 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 654 { 655 PetscErrorCode ierr; 656 657 PetscFunctionBegin; 658 PetscValidDeviceContext(dctx,1); 659 /* if it isn't setup there is nothing to sync on */ 660 if (dctx->setup) {ierr = (*dctx->ops->synchronize)(dctx);CHKERRQ(ierr);} 661 dctx->idle = PETSC_TRUE; 662 PetscFunctionReturn(0); 663 } 664 665 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT 666 // REMOVE ME (change) 667 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING 668 669 static PetscDeviceType rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 670 static PetscStreamType rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 671 static PetscDeviceContext globalContext = PETSC_NULLPTR; 672 673 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should 674 * match whatever device is eagerly intialized */ 675 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type) 676 { 677 PetscFunctionBegin; 678 PetscValidDeviceType(type,1); 679 rootDeviceType = type; 680 PetscFunctionReturn(0); 681 } 682 683 #if 0 684 /* currently unused */ 685 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type) 686 { 687 PetscFunctionBegin; 688 PetscValidStreamType(type,1); 689 rootStreamType = type; 690 PetscFunctionReturn(0); 691 } 692 #endif 693 694 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void) 695 { 696 PetscErrorCode ierr; 697 static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode { 698 PetscErrorCode ierr; 699 700 PetscFunctionBegin; 701 ierr = PetscDeviceContextDestroy(&globalContext);CHKERRQ(ierr); 702 rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 703 rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 704 PetscFunctionReturn(0); 705 }; 706 707 PetscFunctionBegin; 708 if (globalContext) PetscFunctionReturn(0); 709 /* this exists purely as a valid device check. */ 710 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 711 ierr = PetscRegisterFinalize(PetscDeviceContextFinalizer);CHKERRQ(ierr); 712 ierr = PetscInfo(PETSC_NULLPTR,"Initializing global PetscDeviceContext\n");CHKERRQ(ierr); 713 /* we call the allocator directly here since the ObjectPool creates a PetscContainer which 714 * eventually tries to call logging functions. However, this routine may be purposefully 715 * called __before__ logging is initialized, so the logging function would PETSCABORT */ 716 ierr = PetscDeviceContextAllocator::create(&globalContext);CHKERRQ(ierr); 717 ierr = PetscDeviceContextSetStreamType(globalContext,rootStreamType);CHKERRQ(ierr); 718 ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext,rootDeviceType);CHKERRQ(ierr); 719 ierr = PetscDeviceContextSetUp(globalContext);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 /*@C 724 PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext 725 726 Not Collective, Asynchronous 727 728 Output Parameter: 729 . dctx - The PetscDeviceContext 730 731 Notes: 732 The user generally should not destroy contexts retrieved with this routine unless they 733 themselves have created them. There exists no protection against destroying the root 734 context. 735 736 Developer Notes: 737 Unless the user has set their own, this routine creates the "root" context the first time it 738 is called, registering its destructor to PetscFinalize(). 739 740 Level: beginner 741 742 .seealso: PetscDeviceContextSetCurrentContext(), PetscDeviceContextFork(), 743 PetscDeviceContextJoin(), PetscDeviceContextCreate() 744 @*/ 745 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx) 746 { 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 PetscValidPointer(dctx,1); 751 ierr = PetscDeviceContextSetupGlobalContext_Private();CHKERRQ(ierr); 752 /* while the static analyzer can find global variables, it will throw a warning about not 753 * being able to connect this back to the function arguments */ 754 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext,-1)); 755 *dctx = globalContext; 756 PetscFunctionReturn(0); 757 } 758 759 /*@C 760 PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext 761 762 Not Collective, Asynchronous 763 764 Input Parameter: 765 . dctx - The PetscDeviceContext 766 767 Notes: 768 This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined 769 implementation by calling this routine immediately after PetscInitialize() and ensuring that 770 PetscDevice is not greedily intialized. In this case the user is responsible for destroying 771 their PetscDeviceContext before PetscFinalize() returns. 772 773 The old context is not stored in any way by this routine; if one is overriding a context that 774 they themselves do not control, one should take care to temporarily store it by calling 775 PetscDeviceContextGetCurrentContext() before calling this routine. 776 777 Level: beginner 778 779 .seealso: PetscDeviceContextGetCurrentContext(), PetscDeviceContextFork(), 780 PetscDeviceContextJoin(), PetscDeviceContextCreate() 781 @*/ 782 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx) 783 { 784 PetscErrorCode ierr; 785 786 PetscFunctionBegin; 787 PetscValidDeviceContext(dctx,1); 788 if (PetscUnlikelyDebug(!dctx->setup)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context",dctx->id); 789 globalContext = dctx; 790 ierr = PetscInfo1(PETSC_NULLPTR,"Set global PetscDeviceContext id %" PetscInt_FMT "\n",dctx->id);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 794 /*@C 795 PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database 796 797 Collective on comm, Asynchronous 798 799 Input Parameters: 800 + comm - MPI communicator on which to query the options database 801 . prefix - prefix to prepend to all options database queries, NULL if not needed 802 - dctx - The PetscDeviceContext to configure 803 804 Output Parameter: 805 . dctx - The PetscDeviceContext 806 807 Options Database: 808 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext - 809 PetscDeviceContextSetStreamType() 810 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType 811 812 Level: beginner 813 814 .seealso: PetscDeviceContextSetStreamType(), PetscDeviceContextSetDevice() 815 @*/ 816 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx) 817 { 818 PetscBool flag; 819 PetscInt stype,dtype; 820 PetscErrorCode ierr; 821 822 PetscFunctionBegin; 823 if (prefix) PetscValidCharPointer(prefix,2); 824 PetscValidDeviceContext(dctx,3); 825 ierr = PetscOptionsBegin(comm,prefix,"PetscDeviceContext Options","Sys");CHKERRQ(ierr); 826 ierr = PetscOptionsEList("-device_context_stream_type","PetscDeviceContext PetscStreamType","PetscDeviceContextSetStreamType",PetscStreamTypes,PETSC_STREAM_MAX,PetscStreamTypes[dctx->streamType],&stype,&flag);CHKERRQ(ierr); 827 if (flag) { 828 ierr = PetscDeviceContextSetStreamType(dctx,static_cast<PetscStreamType>(stype));CHKERRQ(ierr); 829 } 830 ierr = PetscOptionsEList("-device_context_device_type","Underlying PetscDevice","PetscDeviceContextSetDevice",PetscDeviceTypes+1,PETSC_DEVICE_MAX-1,dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE],&dtype,&flag);CHKERRQ(ierr); 831 if (flag) { 832 ierr = PetscDeviceContextSetDefaultDeviceForType_Internal(dctx,static_cast<PetscDeviceType>(dtype+1));CHKERRQ(ierr); 833 } 834 ierr = PetscOptionsEnd();CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837