1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/ 2 #include "objpool.hpp" 3 4 const char *const PetscStreamTypes[] = {"global_blocking", "default_blocking", "global_nonblocking", "max", "PetscStreamType", "PETSC_STREAM_", nullptr}; 5 6 const char *const PetscDeviceContextJoinModes[] = {"destroy", "sync", "no_sync", "PetscDeviceContextJoinMode", "PETSC_DEVICE_CONTEXT_JOIN_", nullptr}; 7 8 /* Define the allocator */ 9 struct PetscDeviceContextAllocator : Petsc::AllocatorBase<PetscDeviceContext> { 10 static PetscInt PetscDeviceContextID; 11 12 PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) noexcept { 13 PetscDeviceContext dc; 14 15 PetscFunctionBegin; 16 PetscCall(PetscNew(&dc)); 17 dc->id = PetscDeviceContextID++; 18 dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 19 *dctx = dc; 20 PetscFunctionReturn(0); 21 } 22 23 PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept { 24 PetscFunctionBegin; 25 PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren); 26 PetscTryTypeMethod(dctx, destroy); 27 PetscCall(PetscDeviceDestroy(&dctx->device)); 28 PetscCall(PetscFree(dctx->childIDs)); 29 PetscCall(PetscFree(dctx)); 30 PetscFunctionReturn(0); 31 } 32 33 PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) noexcept { 34 PetscFunctionBegin; 35 /* don't deallocate the child array, rather just zero it out */ 36 PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren)); 37 dctx->setup = PETSC_FALSE; 38 dctx->numChildren = 0; 39 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 40 PetscFunctionReturn(0); 41 } 42 43 PETSC_NODISCARD static constexpr PetscErrorCode finalize() noexcept { return 0; } 44 }; 45 /* an ID = 0 is invalid */ 46 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1; 47 48 static Petsc::ObjectPool<PetscDeviceContext, PetscDeviceContextAllocator> contextPool; 49 50 /*@C 51 PetscDeviceContextCreate - Creates a PetscDeviceContext 52 53 Not Collective, Asynchronous 54 55 Output Paramemter: 56 . dctx - The PetscDeviceContext 57 58 Notes: 59 Unlike almost every other PETSc class it is advised that most users use 60 PetscDeviceContextDuplicate() rather than this routine to create new contexts. Contexts 61 of different types are incompatible with one another; using 62 PetscDeviceContextDuplicate() ensures compatible types. 63 64 Level: beginner 65 66 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`, 67 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`, 68 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextDestroy()` 69 @*/ 70 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) { 71 PetscFunctionBegin; 72 PetscValidPointer(dctx, 1); 73 PetscCall(PetscDeviceInitializePackage()); 74 PetscCall(contextPool.get(*dctx)); 75 PetscFunctionReturn(0); 76 } 77 78 /*@C 79 PetscDeviceContextDestroy - Frees a PetscDeviceContext 80 81 Not Collective, Asynchronous 82 83 Input Parameters: 84 . dctx - The PetscDeviceContext 85 86 Notes: 87 No implicit synchronization occurs due to this routine, all resources are released completely asynchronously 88 w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the 89 appropriate synchronization before calling this routine. 90 91 Developer Notes: 92 The context is never actually "destroyed", only returned to an ever growing pool of 93 contexts. There are currently no safeguards on the size of the pool, this should perhaps 94 be implemented. 95 96 Level: beginner 97 98 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()` 99 @*/ 100 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) { 101 PetscFunctionBegin; 102 if (!*dctx) PetscFunctionReturn(0); 103 PetscCall(contextPool.reclaim(std::move(*dctx))); 104 *dctx = nullptr; 105 PetscFunctionReturn(0); 106 } 107 108 /*@C 109 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a PetscDeviceContext 110 111 Not Collective, Asynchronous 112 113 Input Parameters: 114 + dctx - The PetscDeviceContext 115 - type - The PetscStreamType 116 117 Notes: 118 See PetscStreamType in include/petscdevicetypes.h for more information on the available 119 types and their interactions. If the PetscDeviceContext was previously set up and stream 120 type was changed, you must call PetscDeviceContextSetUp() again after this routine. 121 122 Level: intermediate 123 124 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()` 125 @*/ 126 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) { 127 PetscFunctionBegin; 128 PetscValidDeviceContext(dctx, 1); 129 PetscValidStreamType(type, 2); 130 /* only need to do complex swapping if the object has already been setup */ 131 if (dctx->setup && (dctx->streamType != type)) { 132 PetscUseTypeMethod(dctx, changestreamtype, type); 133 dctx->setup = PETSC_FALSE; 134 } 135 dctx->streamType = type; 136 PetscFunctionReturn(0); 137 } 138 139 /*@C 140 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a PetscDeviceContext 141 142 Not Collective, Asynchronous 143 144 Input Parameter: 145 . dctx - The PetscDeviceContext 146 147 Output Parameter: 148 . type - The PetscStreamType 149 150 Notes: 151 See PetscStreamType in include/petscdevicetypes.h for more information on the available types and their interactions 152 153 Level: intermediate 154 155 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetFromOptions()` 156 @*/ 157 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) { 158 PetscFunctionBegin; 159 PetscValidDeviceContext(dctx, 1); 160 PetscValidIntPointer(type, 2); 161 *type = dctx->streamType; 162 PetscFunctionReturn(0); 163 } 164 165 /*@C 166 PetscDeviceContextSetDevice - Set the underlying device for the PetscDeviceContext 167 168 Not Collective, Possibly Synchronous 169 170 Input Parameters: 171 + dctx - The PetscDeviceContext 172 - device - The PetscDevice 173 174 Notes: 175 This routine is effectively PetscDeviceContext's "set-type" (so every PetscDeviceContext 176 must also have an attached PetscDevice). Unlike the usual set-type semantics, it is 177 not stricly necessary to set a contexts device to enable usage, any created device 178 contexts will always come equipped with the "default" device. 179 180 This routine is a no-op if dctx is already attached to device. 181 182 This routine may initialize the backend device and incur synchronization. 183 184 Level: intermediate 185 186 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()` 187 @*/ 188 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) { 189 PetscFunctionBegin; 190 PetscValidDeviceContext(dctx, 1); 191 PetscValidDevice(device, 2); 192 if (dctx->device) { 193 /* can't do a strict pointer equality check since PetscDevice's are reused */ 194 if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0); 195 } 196 PetscCall(PetscDeviceDestroy(&dctx->device)); 197 PetscTryTypeMethod(dctx, destroy); 198 PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops))); 199 PetscCall((*device->ops->createcontext)(dctx)); 200 PetscCall(PetscDeviceReference_Internal(device)); 201 dctx->device = device; 202 dctx->setup = PETSC_FALSE; 203 PetscFunctionReturn(0); 204 } 205 206 /*@C 207 PetscDeviceContextGetDevice - Get the underlying PetscDevice for a PetscDeviceContext 208 209 Not Collective, Asynchronous 210 211 Input Parameter: 212 . dctx - the PetscDeviceContext 213 214 Output Parameter: 215 . device - The PetscDevice 216 217 Notes: 218 This is a borrowed reference, the user should not destroy the device. 219 220 Level: intermediate 221 222 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice` 223 @*/ 224 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) { 225 PetscFunctionBegin; 226 PetscValidDeviceContext(dctx, 1); 227 PetscValidPointer(device, 2); 228 PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get", dctx->id); 229 *device = dctx->device; 230 PetscFunctionReturn(0); 231 } 232 233 /*@C 234 PetscDeviceContextSetUp - Prepares a PetscDeviceContext for use 235 236 Not Collective, Asynchronous 237 238 Input Parameter: 239 . dctx - The PetscDeviceContext 240 241 Developer Notes: 242 This routine is usually the stage where a PetscDeviceContext acquires device-side data structures such as streams, 243 events, and (possibly) handles. 244 245 Level: beginner 246 247 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()` 248 @*/ 249 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) { 250 PetscFunctionBegin; 251 PetscValidDeviceContext(dctx, 1); 252 if (!dctx->device) { 253 PetscCall(PetscInfo(nullptr, "PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", dctx->id, PetscDeviceTypes[PETSC_DEVICE_DEFAULT])); 254 PetscCall(PetscDeviceContextSetDefaultDevice_Internal(dctx)); 255 } 256 if (dctx->setup) PetscFunctionReturn(0); 257 PetscUseTypeMethod(dctx, setup); 258 dctx->setup = PETSC_TRUE; 259 PetscFunctionReturn(0); 260 } 261 262 /*@C 263 PetscDeviceContextDuplicate - Duplicates a PetscDeviceContext object 264 265 Not Collective, Asynchronous 266 267 Input Parameter: 268 . dctx - The PetscDeviceContext to duplicate 269 270 Output Parameter: 271 . dctxdup - The duplicated PetscDeviceContext 272 273 Notes: 274 This is a shorthand method for creating a PetscDeviceContext with the exact same 275 settings as another. Note however that the duplicated PetscDeviceContext does not "share" 276 any of the underlying data with the original, (including its current stream-state) they 277 are completely separate objects. 278 279 Level: beginner 280 281 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetStreamType()` 282 @*/ 283 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) { 284 PetscDeviceContext dup; 285 286 PetscFunctionBegin; 287 PetscValidDeviceContext(dctx, 1); 288 PetscValidPointer(dctxdup, 2); 289 PetscCall(PetscDeviceContextCreate(&dup)); 290 PetscCall(PetscDeviceContextSetStreamType(dup, dctx->streamType)); 291 if (dctx->device) PetscCall(PetscDeviceContextSetDevice(dup, dctx->device)); 292 PetscCall(PetscDeviceContextSetUp(dup)); 293 *dctxdup = dup; 294 PetscFunctionReturn(0); 295 } 296 297 /*@C 298 PetscDeviceContextQueryIdle - Returns whether or not a PetscDeviceContext is idle 299 300 Not Collective, Asynchronous 301 302 Input Parameter: 303 . dctx - The PetscDeviceContext object 304 305 Output Parameter: 306 . idle - PETSC_TRUE if PetscDeviceContext has NO work, PETSC_FALSE if it has work 307 308 Notes: 309 This routine only refers a singular context and does NOT take any of its children into 310 account. That is, if dctx is idle but has dependents who do have work, this routine still 311 returns PETSC_TRUE. 312 313 Level: intermediate 314 315 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()` 316 @*/ 317 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) { 318 PetscFunctionBegin; 319 PetscValidDeviceContext(dctx, 1); 320 PetscValidBoolPointer(idle, 2); 321 PetscUseTypeMethod(dctx, query, idle); 322 PetscCall(PetscInfo(nullptr, "PetscDeviceContext id %" PetscInt_FMT " %s idle\n", dctx->id, *idle ? "was" : "was not")); 323 PetscFunctionReturn(0); 324 } 325 326 /*@C 327 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 328 329 Not Collective, Asynchronous 330 331 Input Parameters: 332 + dctxa - The PetscDeviceContext object that is waiting 333 - dctxb - The PetscDeviceContext object that is being waited on 334 335 Notes: 336 Serializes two PetscDeviceContexts. This routine uses only the state of dctxb at the moment this routine was 337 called, so any future work queued will not affect dctxa. It is safe to pass the same context to both arguments. 338 339 Level: beginner 340 341 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()` 342 @*/ 343 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) { 344 PetscFunctionBegin; 345 PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2); 346 if (dctxa == dctxb) PetscFunctionReturn(0); 347 PetscUseTypeMethod(dctxa, waitforcontext, dctxb); 348 PetscFunctionReturn(0); 349 } 350 351 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO)) 352 #if PETSC_USE_DEBUG_AND_INFO 353 #include <string> 354 #endif 355 /*@C 356 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 357 358 Not Collective, Asynchronous 359 360 Input Parameters: 361 + dctx - The parent PetscDeviceContext 362 - n - The number of children to create 363 364 Output Parameter: 365 . dsub - The created child context(s) 366 367 Notes: 368 This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning 369 that work queued on child contexts will not start until the parent context finishes its work. This accounts for work 370 queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children. 371 372 Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects 373 to free all of it's children (and ONLY its children) before itself is freed. 374 375 DAG representation: 376 .vb 377 time -> 378 379 -> dctx \----> dctx ------> 380 \---> dsub[0] ---> 381 \--> ... -------> 382 \-> dsub[n-1] -> 383 .ve 384 385 Level: intermediate 386 387 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()` 388 @*/ 389 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) { 390 #if PETSC_USE_DEBUG_AND_INFO 391 const PetscInt nBefore = n; 392 static std::string idList; 393 #endif 394 PetscDeviceContext *dsubTmp = nullptr; 395 PetscInt i = 0; 396 397 PetscFunctionBegin; 398 PetscValidDeviceContext(dctx, 1); 399 PetscValidPointer(dsub, 3); 400 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n); 401 #if PETSC_USE_DEBUG_AND_INFO 402 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 403 idList.reserve(4 * n); 404 #endif 405 /* update child totals */ 406 dctx->numChildren += n; 407 /* now to find out if we have room */ 408 if (dctx->numChildren > dctx->maxNumChildren) { 409 /* no room, either from having too many kids or not having any */ 410 if (dctx->childIDs) { 411 /* have existing children, must reallocate them */ 412 PetscCall(PetscRealloc(dctx->numChildren * sizeof(*dctx->childIDs), &dctx->childIDs)); 413 /* clear the extra memory since realloc doesn't do it for us */ 414 PetscCall(PetscArrayzero((dctx->childIDs) + (dctx->maxNumChildren), (dctx->numChildren) - (dctx->maxNumChildren))); 415 } else { 416 /* have no children */ 417 PetscCall(PetscCalloc1(dctx->numChildren, &dctx->childIDs)); 418 } 419 /* update total number of children */ 420 dctx->maxNumChildren = dctx->numChildren; 421 } 422 PetscCall(PetscMalloc1(n, &dsubTmp)); 423 while (n) { 424 /* empty child slot */ 425 if (!(dctx->childIDs[i])) { 426 /* create the child context in the image of its parent */ 427 PetscCall(PetscDeviceContextDuplicate(dctx, dsubTmp + i)); 428 PetscCall(PetscDeviceContextWaitForContext(dsubTmp[i], dctx)); 429 /* register the child with its parent */ 430 dctx->childIDs[i] = dsubTmp[i]->id; 431 #if PETSC_USE_DEBUG_AND_INFO 432 idList += std::to_string(dsubTmp[i]->id); 433 if (n != 1) idList += ", "; 434 #endif 435 --n; 436 } 437 ++i; 438 } 439 #if PETSC_USE_DEBUG_AND_INFO 440 PetscCall(PetscInfo(nullptr, "Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n", nBefore, dctx->id, idList.c_str())); 441 /* resets the size but doesn't deallocate the memory */ 442 idList.clear(); 443 #endif 444 /* pass the children back to caller */ 445 *dsub = dsubTmp; 446 PetscFunctionReturn(0); 447 } 448 449 /*@C 450 PetscDeviceContextJoin - Converge a set of child contexts 451 452 Not Collective, Asynchronous 453 454 Input Parameters: 455 + dctx - A PetscDeviceContext to converge on 456 . n - The number of sub contexts to converge 457 . joinMode - The type of join to perform 458 - dsub - The sub contexts to converge 459 460 Notes: 461 If PetscDeviceContextFork() creates n edges from a source node which all depend on the 462 source node, then this routine is the exact mirror. That is, it creates a node 463 (represented in dctx) which recieves n edges (and optionally destroys them) which is 464 dependent on the completion of all incoming edges. 465 466 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY all contexts in dsub will be destroyed 467 by this routine. Thus all sub contexts must have been created with the dctx passed to 468 this routine. 469 470 if joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC dctx waits for all sub contexts but the 471 sub contexts do not wait for one another afterwards. 472 473 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC all sub contexts will additionally 474 wait on dctx after converging. This has the effect of "synchronizing" the outgoing 475 edges. 476 477 DAG representations: 478 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_DESTROY 479 .vb 480 time -> 481 482 -> dctx ---------/- dctx -> 483 -> dsub[0] -----/ 484 -> ... -------/ 485 -> dsub[n-1] -/ 486 .ve 487 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC 488 .vb 489 time -> 490 491 -> dctx ---------/- dctx -> 492 -> dsub[0] -----/---------> 493 -> ... -------/----------> 494 -> dsub[n-1] -/-----------> 495 .ve 496 If joinMode is PETSC_DEVICE_CONTEXT_JOIN_SYNC 497 .vb 498 time -> 499 500 -> dctx ---------/- dctx -\----> dctx ------> 501 -> dsub[0] -----/ \---> dsub[0] ---> 502 -> ... -------/ \--> ... -------> 503 -> dsub[n-1] -/ \-> dsub[n-1] -> 504 .ve 505 506 Level: intermediate 507 508 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode` 509 @*/ 510 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) { 511 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 512 static std::string idList; 513 #endif 514 515 PetscFunctionBegin; 516 /* validity of dctx is checked in the wait-for loop */ 517 PetscValidPointer(dsub, 4); 518 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n); 519 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 520 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 521 idList.reserve(4 * n); 522 #endif 523 /* first dctx waits on all the incoming edges */ 524 for (PetscInt i = 0; i < n; ++i) { 525 PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4); 526 PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i])); 527 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 528 idList += std::to_string((*dsub)[i]->id); 529 if (i + 1 < n) idList += ", "; 530 #endif 531 } 532 533 /* now we handle the aftermath */ 534 switch (joinMode) { 535 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: { 536 PetscInt j = 0; 537 538 PetscAssert(n <= dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, dctx->numChildren); 539 /* update child count while it's still fresh in memory */ 540 dctx->numChildren -= n; 541 for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) { 542 if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) { 543 /* child is one of ours, can destroy it */ 544 PetscCall(PetscDeviceContextDestroy((*dsub) + j)); 545 /* reset the child slot */ 546 dctx->childIDs[i] = 0; 547 if (++j == n) break; 548 } 549 } 550 /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */ 551 PetscAssert(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.", n - j); 552 PetscCall(PetscFree(*dsub)); 553 } break; 554 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 555 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx)); 556 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: break; 557 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given"); 558 } 559 560 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 561 PetscCall(PetscInfo(nullptr, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n", n, dctx->id, PetscDeviceContextJoinModes[joinMode], idList.c_str())); 562 idList.clear(); 563 #endif 564 PetscFunctionReturn(0); 565 } 566 567 /*@C 568 PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a PetscDeviceContext has finished 569 570 Not Collective, Synchronous 571 572 Input Parameters: 573 . dctx - The PetscDeviceContext to synchronize 574 575 Level: beginner 576 577 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()` 578 @*/ 579 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) { 580 PetscFunctionBegin; 581 PetscValidDeviceContext(dctx, 1); 582 /* if it isn't setup there is nothing to sync on */ 583 if (dctx->setup) PetscUseTypeMethod(dctx, synchronize); 584 PetscFunctionReturn(0); 585 } 586 587 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT 588 // REMOVE ME (change) 589 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING 590 591 static PetscDeviceType rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 592 static PetscStreamType rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 593 static PetscDeviceContext globalContext = nullptr; 594 595 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should 596 * match whatever device is eagerly intialized */ 597 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type) { 598 PetscFunctionBegin; 599 PetscValidDeviceType(type, 1); 600 rootDeviceType = type; 601 PetscFunctionReturn(0); 602 } 603 604 #if 0 605 /* currently unused */ 606 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type) 607 { 608 PetscFunctionBegin; 609 PetscValidStreamType(type,1); 610 rootStreamType = type; 611 PetscFunctionReturn(0); 612 } 613 #endif 614 615 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void) { 616 static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode { 617 PetscFunctionBegin; 618 PetscCall(PetscDeviceContextDestroy(&globalContext)); 619 rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 620 rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 621 PetscFunctionReturn(0); 622 }; 623 624 PetscFunctionBegin; 625 if (globalContext) PetscFunctionReturn(0); 626 /* this exists purely as a valid device check. */ 627 PetscCall(PetscDeviceInitializePackage()); 628 PetscCall(PetscRegisterFinalize(PetscDeviceContextFinalizer)); 629 PetscCall(PetscInfo(nullptr, "Initializing global PetscDeviceContext\n")); 630 /* we call the allocator directly here since the ObjectPool creates a PetscContainer which 631 * eventually tries to call logging functions. However, this routine may be purposefully 632 * called __before__ logging is initialized, so the logging function would PETSCABORT */ 633 PetscCall(contextPool.allocator().create(&globalContext)); 634 PetscCall(PetscDeviceContextSetStreamType(globalContext, rootStreamType)); 635 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext, rootDeviceType)); 636 PetscCall(PetscDeviceContextSetUp(globalContext)); 637 PetscFunctionReturn(0); 638 } 639 640 /*@C 641 PetscDeviceContextGetCurrentContext - Get the current active PetscDeviceContext 642 643 Not Collective, Asynchronous 644 645 Output Parameter: 646 . dctx - The PetscDeviceContext 647 648 Notes: 649 The user generally should not destroy contexts retrieved with this routine unless they 650 themselves have created them. There exists no protection against destroying the root 651 context. 652 653 Developer Notes: 654 Unless the user has set their own, this routine creates the "root" context the first time it 655 is called, registering its destructor to PetscFinalize(). 656 657 Level: beginner 658 659 .seealso: `PetscDeviceContextSetCurrentContext()`, `PetscDeviceContextFork()`, 660 `PetscDeviceContextJoin()`, `PetscDeviceContextCreate()` 661 @*/ 662 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx) { 663 PetscFunctionBegin; 664 PetscValidPointer(dctx, 1); 665 PetscCall(PetscDeviceContextSetupGlobalContext_Private()); 666 /* while the static analyzer can find global variables, it will throw a warning about not 667 * being able to connect this back to the function arguments */ 668 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext, -1)); 669 *dctx = globalContext; 670 PetscFunctionReturn(0); 671 } 672 673 /*@C 674 PetscDeviceContextSetCurrentContext - Set the current active PetscDeviceContext 675 676 Not Collective, Asynchronous 677 678 Input Parameter: 679 . dctx - The PetscDeviceContext 680 681 Notes: 682 This routine can be used to set the defacto "root" PetscDeviceContext to a user-defined 683 implementation by calling this routine immediately after PetscInitialize() and ensuring that 684 PetscDevice is not greedily intialized. In this case the user is responsible for destroying 685 their PetscDeviceContext before PetscFinalize() returns. 686 687 The old context is not stored in any way by this routine; if one is overriding a context that 688 they themselves do not control, one should take care to temporarily store it by calling 689 PetscDeviceContextGetCurrentContext() before calling this routine. 690 691 Level: beginner 692 693 .seealso: `PetscDeviceContextGetCurrentContext()`, `PetscDeviceContextFork()`, 694 `PetscDeviceContextJoin()`, `PetscDeviceContextCreate()` 695 @*/ 696 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx) { 697 PetscFunctionBegin; 698 PetscValidDeviceContext(dctx, 1); 699 PetscAssert(dctx->setup, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context", dctx->id); 700 globalContext = dctx; 701 PetscCall(PetscInfo(nullptr, "Set global PetscDeviceContext id %" PetscInt_FMT "\n", dctx->id)); 702 PetscFunctionReturn(0); 703 } 704 705 /*@C 706 PetscDeviceContextSetFromOptions - Configure a PetscDeviceContext from the options database 707 708 Collective on comm, Asynchronous 709 710 Input Parameters: 711 + comm - MPI communicator on which to query the options database 712 . prefix - prefix to prepend to all options database queries, NULL if not needed 713 - dctx - The PetscDeviceContext to configure 714 715 Output Parameter: 716 . dctx - The PetscDeviceContext 717 718 Options Database: 719 + -device_context_stream_type - type of stream to create inside the PetscDeviceContext - 720 PetscDeviceContextSetStreamType() 721 - -device_context_device_type - the type of PetscDevice to attach by default - PetscDeviceType 722 723 Level: beginner 724 725 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()` 726 @*/ 727 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx) { 728 PetscBool flag; 729 PetscInt stype, dtype; 730 731 PetscFunctionBegin; 732 if (prefix) PetscValidCharPointer(prefix, 2); 733 PetscValidDeviceContext(dctx, 3); 734 PetscOptionsBegin(comm, prefix, "PetscDeviceContext Options", "Sys"); 735 PetscCall(PetscOptionsEList("-device_context_stream_type", "PetscDeviceContext PetscStreamType", "PetscDeviceContextSetStreamType", PetscStreamTypes, PETSC_STREAM_MAX, PetscStreamTypes[dctx->streamType], &stype, &flag)); 736 if (flag) PetscCall(PetscDeviceContextSetStreamType(dctx, static_cast<PetscStreamType>(stype))); 737 PetscCall(PetscOptionsEList("-device_context_device_type", "Underlying PetscDevice", "PetscDeviceContextSetDevice", PetscDeviceTypes + 1, PETSC_DEVICE_MAX - 1, dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE], &dtype, &flag)); 738 if (flag) { PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, static_cast<PetscDeviceType>(dtype + 1))); } 739 PetscOptionsEnd(); 740 PetscFunctionReturn(0); 741 } 742