1 #include <petsc/private/deviceimpl.h> /*I "petscdevice.h" I*/ 2 #include "objpool.hpp" 3 4 const char *const PetscStreamTypes[] = {"global_blocking", "default_blocking", "global_nonblocking", "max", "PetscStreamType", "PETSC_STREAM_", nullptr}; 5 6 const char *const PetscDeviceContextJoinModes[] = {"destroy", "sync", "no_sync", "PetscDeviceContextJoinMode", "PETSC_DEVICE_CONTEXT_JOIN_", nullptr}; 7 8 /* Define the allocator */ 9 struct PetscDeviceContextAllocator : Petsc::AllocatorBase<PetscDeviceContext> { 10 static PetscInt PetscDeviceContextID; 11 12 PETSC_NODISCARD static PetscErrorCode create(PetscDeviceContext *dctx) noexcept { 13 PetscDeviceContext dc; 14 15 PetscFunctionBegin; 16 PetscCall(PetscNew(&dc)); 17 dc->id = PetscDeviceContextID++; 18 dc->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 19 *dctx = dc; 20 PetscFunctionReturn(0); 21 } 22 23 PETSC_NODISCARD static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept { 24 PetscFunctionBegin; 25 PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren); 26 PetscTryTypeMethod(dctx, destroy); 27 PetscCall(PetscDeviceDestroy(&dctx->device)); 28 PetscCall(PetscFree(dctx->childIDs)); 29 PetscCall(PetscFree(dctx)); 30 PetscFunctionReturn(0); 31 } 32 33 PETSC_NODISCARD static PetscErrorCode reset(PetscDeviceContext dctx) noexcept { 34 PetscFunctionBegin; 35 /* don't deallocate the child array, rather just zero it out */ 36 PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren)); 37 dctx->setup = PETSC_FALSE; 38 dctx->numChildren = 0; 39 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 40 PetscFunctionReturn(0); 41 } 42 43 PETSC_NODISCARD static constexpr PetscErrorCode finalize() noexcept { return 0; } 44 }; 45 /* an ID = 0 is invalid */ 46 PetscInt PetscDeviceContextAllocator::PetscDeviceContextID = 1; 47 48 static Petsc::ObjectPool<PetscDeviceContext, PetscDeviceContextAllocator> contextPool; 49 50 /*@C 51 PetscDeviceContextCreate - Creates a `PetscDeviceContext` 52 53 Not Collective, Asynchronous 54 55 Output Paramemter: 56 . dctx - The `PetscDeviceContext` 57 58 Note: 59 Unlike almost every other PETSc class it is advised that most users use 60 `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts 61 of different types are incompatible with one another; using 62 `PetscDeviceContextDuplicate()` ensures compatible types. 63 64 Level: beginner 65 66 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`, 67 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`, 68 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextDestroy()` 69 @*/ 70 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) { 71 PetscFunctionBegin; 72 PetscValidPointer(dctx, 1); 73 PetscCall(PetscDeviceInitializePackage()); 74 PetscCall(contextPool.get(*dctx)); 75 PetscFunctionReturn(0); 76 } 77 78 /*@C 79 PetscDeviceContextDestroy - Frees a `PetscDeviceContext` 80 81 Not Collective, Asynchronous 82 83 Input Parameters: 84 . dctx - The `PetscDeviceContext` 85 86 Note: 87 No implicit synchronization occurs due to this routine, all resources are released completely asynchronously 88 w.r.t. the host. If one needs to guarantee access to the data produced on this contexts stream one should perform the 89 appropriate synchronization before calling this routine. 90 91 Developer Note: 92 The context is never actually "destroyed", only returned to an ever growing pool of 93 contexts. There are currently no safeguards on the size of the pool, this should perhaps 94 be implemented. 95 96 Level: beginner 97 98 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()` 99 @*/ 100 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) { 101 PetscFunctionBegin; 102 if (!*dctx) PetscFunctionReturn(0); 103 PetscCall(contextPool.reclaim(std::move(*dctx))); 104 *dctx = nullptr; 105 PetscFunctionReturn(0); 106 } 107 108 /*@C 109 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a `PetscDeviceContext` 110 111 Not Collective, Asynchronous 112 113 Input Parameters: 114 + dctx - The `PetscDeviceContext` 115 - type - The `PetscStreamType` 116 117 Notes: 118 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 119 types and their interactions. 120 121 If the `PetscDeviceContext` was previously set up and stream 122 type was changed, you must call `PetscDeviceContextSetUp()` again after this routine. 123 124 Level: intermediate 125 126 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()` 127 @*/ 128 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) { 129 PetscFunctionBegin; 130 PetscValidDeviceContext(dctx, 1); 131 PetscValidStreamType(type, 2); 132 /* only need to do complex swapping if the object has already been setup */ 133 if (dctx->setup && (dctx->streamType != type)) { 134 PetscUseTypeMethod(dctx, changestreamtype, type); 135 dctx->setup = PETSC_FALSE; 136 } 137 dctx->streamType = type; 138 PetscFunctionReturn(0); 139 } 140 141 /*@C 142 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a `PetscDeviceContext` 143 144 Not Collective, Asynchronous 145 146 Input Parameter: 147 . dctx - The `PetscDeviceContext` 148 149 Output Parameter: 150 . type - The `PetscStreamType` 151 152 Note: 153 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available types and their interactions 154 155 Level: intermediate 156 157 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetFromOptions()` 158 @*/ 159 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) { 160 PetscFunctionBegin; 161 PetscValidDeviceContext(dctx, 1); 162 PetscValidIntPointer(type, 2); 163 *type = dctx->streamType; 164 PetscFunctionReturn(0); 165 } 166 167 /*@C 168 PetscDeviceContextSetDevice - Set the underlying device for the `PetscDeviceContext` 169 170 Not Collective, Possibly Synchronous 171 172 Input Parameters: 173 + dctx - The `PetscDeviceContext` 174 - device - The `PetscDevice` 175 176 Notes: 177 This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` 178 must also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is 179 not stricly necessary to set a contexts device to enable usage, any created device 180 contexts will always come equipped with the "default" device. 181 182 This routine is a no-op if dctx is already attached to device. 183 184 This routine may initialize the backend device and incur synchronization. 185 186 Level: intermediate 187 188 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()` 189 @*/ 190 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) { 191 PetscFunctionBegin; 192 PetscValidDeviceContext(dctx, 1); 193 PetscValidDevice(device, 2); 194 if (dctx->device) { 195 /* can't do a strict pointer equality check since PetscDevice's are reused */ 196 if (dctx->device->ops->createcontext == device->ops->createcontext) PetscFunctionReturn(0); 197 } 198 PetscCall(PetscDeviceDestroy(&dctx->device)); 199 PetscTryTypeMethod(dctx, destroy); 200 PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops))); 201 PetscCall((*device->ops->createcontext)(dctx)); 202 PetscCall(PetscDeviceReference_Internal(device)); 203 dctx->device = device; 204 dctx->setup = PETSC_FALSE; 205 PetscFunctionReturn(0); 206 } 207 208 /*@C 209 PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext` 210 211 Not Collective, Asynchronous 212 213 Input Parameter: 214 . dctx - the `PetscDeviceContext` 215 216 Output Parameter: 217 . device - The `PetscDevice` 218 219 Note: 220 This is a borrowed reference, the user should not destroy `device`. 221 222 Level: intermediate 223 224 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice` 225 @*/ 226 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) { 227 PetscFunctionBegin; 228 PetscValidDeviceContext(dctx, 1); 229 PetscValidPointer(device, 2); 230 PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt_FMT " has no attached PetscDevice to get", dctx->id); 231 *device = dctx->device; 232 PetscFunctionReturn(0); 233 } 234 235 /*@C 236 PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use 237 238 Not Collective, Asynchronous 239 240 Input Parameter: 241 . dctx - The `PetscDeviceContext` 242 243 Developer Note: 244 This routine is usually the stage where a `PetscDeviceContext` acquires device-side data structures such as streams, 245 events, and (possibly) handles. 246 247 Level: beginner 248 249 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()` 250 @*/ 251 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) { 252 PetscFunctionBegin; 253 PetscValidDeviceContext(dctx, 1); 254 if (!dctx->device) { 255 PetscCall(PetscInfo(nullptr, "PetscDeviceContext %" PetscInt_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", dctx->id, PetscDeviceTypes[PETSC_DEVICE_DEFAULT])); 256 PetscCall(PetscDeviceContextSetDefaultDevice_Internal(dctx)); 257 } 258 if (dctx->setup) PetscFunctionReturn(0); 259 PetscUseTypeMethod(dctx, setup); 260 dctx->setup = PETSC_TRUE; 261 PetscFunctionReturn(0); 262 } 263 264 /*@C 265 PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object 266 267 Not Collective, Asynchronous 268 269 Input Parameter: 270 . dctx - The `PetscDeviceContext` to duplicate 271 272 Output Parameter: 273 . dctxdup - The duplicated `PetscDeviceContext` 274 275 Note: 276 This is a shorthand method for creating a `PetscDeviceContext` with the exact same 277 settings as another. However the `dctxdup` does not "share" 278 any of the underlying data with the original, (including its current stream-state) they 279 are completely separate objects. 280 281 Level: beginner 282 283 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetStreamType()` 284 @*/ 285 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) { 286 PetscDeviceContext dup; 287 288 PetscFunctionBegin; 289 PetscValidDeviceContext(dctx, 1); 290 PetscValidPointer(dctxdup, 2); 291 PetscCall(PetscDeviceContextCreate(&dup)); 292 PetscCall(PetscDeviceContextSetStreamType(dup, dctx->streamType)); 293 if (dctx->device) PetscCall(PetscDeviceContextSetDevice(dup, dctx->device)); 294 PetscCall(PetscDeviceContextSetUp(dup)); 295 *dctxdup = dup; 296 PetscFunctionReturn(0); 297 } 298 299 /*@C 300 PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle 301 302 Not Collective, Asynchronous 303 304 Input Parameter: 305 . dctx - The `PetscDeviceContext` object 306 307 Output Parameter: 308 . idle - `PETSC_TRUE` if `PetscDeviceContext` has NO work, `PETSC_FALSE` if it has work 309 310 Note: 311 This routine only refers a singular context and does NOT take any of its children into 312 account. That is, if `dctx` is idle but has dependents who do have work, this routine still 313 returns `PETSC_TRUE`. 314 315 Level: intermediate 316 317 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()` 318 @*/ 319 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) { 320 PetscFunctionBegin; 321 PetscValidDeviceContext(dctx, 1); 322 PetscValidBoolPointer(idle, 2); 323 PetscUseTypeMethod(dctx, query, idle); 324 PetscCall(PetscInfo(nullptr, "PetscDeviceContext id %" PetscInt_FMT " %s idle\n", dctx->id, *idle ? "was" : "was not")); 325 PetscFunctionReturn(0); 326 } 327 328 /*@C 329 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 330 331 Not Collective, Asynchronous 332 333 Input Parameters: 334 + dctxa - The `PetscDeviceContext` object that is waiting 335 - dctxb - The `PetscDeviceContext` object that is being waited on 336 337 Notes: 338 Serializes two `PetscDeviceContexts`. This routine uses only the state of `dctxb` at the moment this routine was 339 called, so any future work queued will not affect `dctxa`. 340 341 It is safe to pass the same context to both arguments. 342 343 Level: beginner 344 345 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()` 346 @*/ 347 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) { 348 PetscFunctionBegin; 349 PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2); 350 if (dctxa == dctxb) PetscFunctionReturn(0); 351 PetscUseTypeMethod(dctxa, waitforcontext, dctxb); 352 PetscFunctionReturn(0); 353 } 354 355 #define PETSC_USE_DEBUG_AND_INFO (PetscDefined(USE_DEBUG) && PetscDefined(USE_INFO)) 356 #if PETSC_USE_DEBUG_AND_INFO 357 #include <string> 358 #endif 359 /*@C 360 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 361 362 Not Collective, Asynchronous 363 364 Input Parameters: 365 + dctx - The parent `PetscDeviceContext` 366 - n - The number of children to create 367 368 Output Parameter: 369 . dsub - The created child context(s) 370 371 Notes: 372 This routine creates n edges of a DAG from a source node which are causally dependent on the source node, meaning 373 that work queued on child contexts will not start until the parent context finishes its work. This accounts for work 374 queued on the parent up until calling this function, any subsequent work enqueued on the parent has no effect on the children. 375 376 Any children created with this routine have their lifetimes bounded by the parent. That is, the parent context expects 377 to free all of it's children (and ONLY its children) before itself is freed. 378 379 DAG representation: 380 .vb 381 time -> 382 383 -> dctx \----> dctx ------> 384 \---> dsub[0] ---> 385 \--> ... -------> 386 \-> dsub[n-1] -> 387 .ve 388 389 Level: intermediate 390 391 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()` 392 @*/ 393 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) { 394 #if PETSC_USE_DEBUG_AND_INFO 395 const PetscInt nBefore = n; 396 static std::string idList; 397 #endif 398 PetscDeviceContext *dsubTmp = nullptr; 399 PetscInt i = 0; 400 401 PetscFunctionBegin; 402 PetscValidDeviceContext(dctx, 1); 403 PetscValidPointer(dsub, 3); 404 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n); 405 #if PETSC_USE_DEBUG_AND_INFO 406 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 407 idList.reserve(4 * n); 408 #endif 409 /* update child totals */ 410 dctx->numChildren += n; 411 /* now to find out if we have room */ 412 if (dctx->numChildren > dctx->maxNumChildren) { 413 /* no room, either from having too many kids or not having any */ 414 if (dctx->childIDs) { 415 /* have existing children, must reallocate them */ 416 PetscCall(PetscRealloc(dctx->numChildren * sizeof(*dctx->childIDs), &dctx->childIDs)); 417 /* clear the extra memory since realloc doesn't do it for us */ 418 PetscCall(PetscArrayzero((dctx->childIDs) + (dctx->maxNumChildren), (dctx->numChildren) - (dctx->maxNumChildren))); 419 } else { 420 /* have no children */ 421 PetscCall(PetscCalloc1(dctx->numChildren, &dctx->childIDs)); 422 } 423 /* update total number of children */ 424 dctx->maxNumChildren = dctx->numChildren; 425 } 426 PetscCall(PetscMalloc1(n, &dsubTmp)); 427 while (n) { 428 /* empty child slot */ 429 if (!(dctx->childIDs[i])) { 430 /* create the child context in the image of its parent */ 431 PetscCall(PetscDeviceContextDuplicate(dctx, dsubTmp + i)); 432 PetscCall(PetscDeviceContextWaitForContext(dsubTmp[i], dctx)); 433 /* register the child with its parent */ 434 dctx->childIDs[i] = dsubTmp[i]->id; 435 #if PETSC_USE_DEBUG_AND_INFO 436 idList += std::to_string(dsubTmp[i]->id); 437 if (n != 1) idList += ", "; 438 #endif 439 --n; 440 } 441 ++i; 442 } 443 #if PETSC_USE_DEBUG_AND_INFO 444 PetscCall(PetscInfo(nullptr, "Forked %" PetscInt_FMT " children from parent %" PetscInt_FMT " with IDs: %s\n", nBefore, dctx->id, idList.c_str())); 445 /* resets the size but doesn't deallocate the memory */ 446 idList.clear(); 447 #endif 448 /* pass the children back to caller */ 449 *dsub = dsubTmp; 450 PetscFunctionReturn(0); 451 } 452 453 /*@C 454 PetscDeviceContextJoin - Converge a set of child contexts 455 456 Not Collective, Asynchronous 457 458 Input Parameters: 459 + dctx - A `PetscDeviceContext` to converge on 460 . n - The number of sub contexts to converge 461 . joinMode - The type of join to perform 462 - dsub - The sub contexts to converge 463 464 Notes: 465 If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the 466 source node, then this routine is the exact mirror. That is, it creates a node 467 (represented in `dctx`) which receives `n` edges (and optionally destroys them) which is 468 dependent on the completion of all incoming edges. 469 470 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` all contexts in `dsub` will be destroyed 471 by this routine. Thus all sub contexts must have been created with the `dctx` passed to 472 this routine. 473 474 if `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` `dctx` waits for all sub contexts but the 475 sub contexts do not wait for one another afterwards. 476 477 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` all sub contexts will additionally 478 wait on `dctx` after converging. This has the effect of "synchronizing" the outgoing 479 edges. 480 481 DAG representations: 482 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` 483 .vb 484 time -> 485 486 -> dctx ---------/- dctx -> 487 -> dsub[0] -----/ 488 -> ... -------/ 489 -> dsub[n-1] -/ 490 .ve 491 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` 492 .vb 493 time -> 494 495 -> dctx ---------/- dctx -> 496 -> dsub[0] -----/---------> 497 -> ... -------/----------> 498 -> dsub[n-1] -/-----------> 499 .ve 500 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` 501 .vb 502 time -> 503 504 -> dctx ---------/- dctx -\----> dctx ------> 505 -> dsub[0] -----/ \---> dsub[0] ---> 506 -> ... -------/ \--> ... -------> 507 -> dsub[n-1] -/ \-> dsub[n-1] -> 508 .ve 509 510 Level: intermediate 511 512 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode` 513 @*/ 514 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) { 515 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 516 static std::string idList; 517 #endif 518 519 PetscFunctionBegin; 520 /* validity of dctx is checked in the wait-for loop */ 521 PetscValidPointer(dsub, 4); 522 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n); 523 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 524 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 525 idList.reserve(4 * n); 526 #endif 527 /* first dctx waits on all the incoming edges */ 528 for (PetscInt i = 0; i < n; ++i) { 529 PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4); 530 PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i])); 531 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 532 idList += std::to_string((*dsub)[i]->id); 533 if (i + 1 < n) idList += ", "; 534 #endif 535 } 536 537 /* now we handle the aftermath */ 538 switch (joinMode) { 539 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: { 540 PetscInt j = 0; 541 542 PetscAssert(n <= dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, dctx->numChildren); 543 /* update child count while it's still fresh in memory */ 544 dctx->numChildren -= n; 545 for (PetscInt i = 0; i < dctx->maxNumChildren; ++i) { 546 if (dctx->childIDs[i] && (dctx->childIDs[i] == (*dsub)[j]->id)) { 547 /* child is one of ours, can destroy it */ 548 PetscCall(PetscDeviceContextDestroy((*dsub) + j)); 549 /* reset the child slot */ 550 dctx->childIDs[i] = 0; 551 if (++j == n) break; 552 } 553 } 554 /* gone through the loop but did not find every child, if this triggers (or well, doesn't) on perf-builds we leak the remaining contexts memory */ 555 PetscAssert(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in.", n - j); 556 PetscCall(PetscFree(*dsub)); 557 } break; 558 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 559 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx)); 560 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: break; 561 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given"); 562 } 563 564 #if defined(PETSC_USE_DEBUG) && defined(PETSC_USE_INFO) 565 PetscCall(PetscInfo(nullptr, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt_FMT ", mode %s with IDs: %s\n", n, dctx->id, PetscDeviceContextJoinModes[joinMode], idList.c_str())); 566 idList.clear(); 567 #endif 568 PetscFunctionReturn(0); 569 } 570 571 /*@C 572 PetscDeviceContextSynchronize - Block the host until all work queued on or associated with a `PetscDeviceContext` has finished 573 574 Not Collective, Synchronous 575 576 Input Parameters: 577 . dctx - The `PetscDeviceContext` to synchronize 578 579 Level: beginner 580 581 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()` 582 @*/ 583 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) { 584 PetscFunctionBegin; 585 PetscValidDeviceContext(dctx, 1); 586 /* if it isn't setup there is nothing to sync on */ 587 if (dctx->setup) PetscUseTypeMethod(dctx, synchronize); 588 PetscFunctionReturn(0); 589 } 590 591 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE PETSC_DEVICE_DEFAULT 592 // REMOVE ME (change) 593 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM PETSC_STREAM_GLOBAL_BLOCKING 594 595 static PetscDeviceType rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 596 static PetscStreamType rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 597 static PetscDeviceContext globalContext = nullptr; 598 599 /* when PetsDevice initializes PetscDeviceContext eagerly the type of device created should 600 * match whatever device is eagerly intialized */ 601 PetscErrorCode PetscDeviceContextSetRootDeviceType_Internal(PetscDeviceType type) { 602 PetscFunctionBegin; 603 PetscValidDeviceType(type, 1); 604 rootDeviceType = type; 605 PetscFunctionReturn(0); 606 } 607 608 #if 0 609 /* currently unused */ 610 PetscErrorCode PetscDeviceContextSetRootStreamType_Internal(PetscStreamType type) 611 { 612 PetscFunctionBegin; 613 PetscValidStreamType(type,1); 614 rootStreamType = type; 615 PetscFunctionReturn(0); 616 } 617 #endif 618 619 static PetscErrorCode PetscDeviceContextSetupGlobalContext_Private(void) { 620 static const auto PetscDeviceContextFinalizer = []() -> PetscErrorCode { 621 PetscFunctionBegin; 622 PetscCall(PetscDeviceContextDestroy(&globalContext)); 623 rootDeviceType = PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE; 624 rootStreamType = PETSC_DEVICE_CONTEXT_DEFAULT_STREAM; 625 PetscFunctionReturn(0); 626 }; 627 628 PetscFunctionBegin; 629 if (globalContext) PetscFunctionReturn(0); 630 /* this exists purely as a valid device check. */ 631 PetscCall(PetscDeviceInitializePackage()); 632 PetscCall(PetscRegisterFinalize(PetscDeviceContextFinalizer)); 633 PetscCall(PetscInfo(nullptr, "Initializing global PetscDeviceContext\n")); 634 /* we call the allocator directly here since the ObjectPool creates a PetscContainer which 635 * eventually tries to call logging functions. However, this routine may be purposefully 636 * called __before__ logging is initialized, so the logging function would PETSCABORT */ 637 PetscCall(contextPool.allocator().create(&globalContext)); 638 PetscCall(PetscDeviceContextSetStreamType(globalContext, rootStreamType)); 639 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(globalContext, rootDeviceType)); 640 PetscCall(PetscDeviceContextSetUp(globalContext)); 641 PetscFunctionReturn(0); 642 } 643 644 /*@C 645 PetscDeviceContextGetCurrentContext - Get the current active `PetscDeviceContext` 646 647 Not Collective, Asynchronous 648 649 Output Parameter: 650 . dctx - The `PetscDeviceContext` 651 652 Note: 653 The user generally should not destroy contexts retrieved with this routine unless they 654 themselves have created them. There exists no protection against destroying the root 655 context. 656 657 Developer Note: 658 Unless the user has set their own, this routine creates the "root" context the first time it 659 is called, registering its destructor to `PetscFinalize()`. 660 661 Level: beginner 662 663 .seealso: `PetscDeviceContextSetCurrentContext()`, `PetscDeviceContextFork()`, 664 `PetscDeviceContextJoin()`, `PetscDeviceContextCreate()` 665 @*/ 666 PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *dctx) { 667 PetscFunctionBegin; 668 PetscValidPointer(dctx, 1); 669 PetscCall(PetscDeviceContextSetupGlobalContext_Private()); 670 /* while the static analyzer can find global variables, it will throw a warning about not 671 * being able to connect this back to the function arguments */ 672 PetscDisableStaticAnalyzerForExpressionUnderstandingThatThisIsDangerousAndBugprone(PetscValidDeviceContext(globalContext, -1)); 673 *dctx = globalContext; 674 PetscFunctionReturn(0); 675 } 676 677 /*@C 678 PetscDeviceContextSetCurrentContext - Set the current active `PetscDeviceContext` 679 680 Not Collective, Asynchronous 681 682 Input Parameter: 683 . dctx - The `PetscDeviceContext` 684 685 Notes: 686 This routine can be used to set the defacto "root" `PetscDeviceContext` to a user-defined 687 implementation by calling this routine immediately after `PetscInitialize()` and ensuring that 688 `PetscDevice` is not eagerly initialized. In this case the user is responsible for destroying 689 their `PetscDeviceContext` before `PetscFinalize()` returns. 690 691 The old context is not stored in any way by this routine; if one is overriding a context that 692 they themselves do not control, one should take care to temporarily store it by calling 693 `PetscDeviceContextGetCurrentContext()` before calling this routine. 694 695 Level: beginner 696 697 .seealso: `PetscDeviceContextGetCurrentContext()`, `PetscDeviceContextFork()`, 698 `PetscDeviceContextJoin()`, `PetscDeviceContextCreate()` 699 @*/ 700 PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext dctx) { 701 PetscFunctionBegin; 702 PetscValidDeviceContext(dctx, 1); 703 PetscAssert(dctx->setup, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt_FMT " must be set up before being set as global context", dctx->id); 704 globalContext = dctx; 705 PetscCall(PetscInfo(nullptr, "Set global PetscDeviceContext id %" PetscInt_FMT "\n", dctx->id)); 706 PetscFunctionReturn(0); 707 } 708 709 /*@C 710 PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database 711 712 Collective on comm, Asynchronous 713 714 Input Parameters: 715 + comm - MPI communicator on which to query the options database 716 . prefix - prefix to prepend to all options database queries, NULL if not needed 717 - dctx - The `PetscDeviceContext` to configure 718 719 Output Parameter: 720 . dctx - The `PetscDeviceContext` 721 722 Options Database Keys: 723 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` - `PetscDeviceContextSetStreamType()` 724 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType` 725 726 Level: beginner 727 728 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()` 729 @*/ 730 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, const char prefix[], PetscDeviceContext dctx) { 731 PetscBool flag; 732 PetscInt stype, dtype; 733 734 PetscFunctionBegin; 735 if (prefix) PetscValidCharPointer(prefix, 2); 736 PetscValidDeviceContext(dctx, 3); 737 PetscOptionsBegin(comm, prefix, "PetscDeviceContext Options", "Sys"); 738 PetscCall(PetscOptionsEList("-device_context_stream_type", "PetscDeviceContext PetscStreamType", "PetscDeviceContextSetStreamType", PetscStreamTypes, PETSC_STREAM_MAX, PetscStreamTypes[dctx->streamType], &stype, &flag)); 739 if (flag) PetscCall(PetscDeviceContextSetStreamType(dctx, static_cast<PetscStreamType>(stype))); 740 PetscCall(PetscOptionsEList("-device_context_device_type", "Underlying PetscDevice", "PetscDeviceContextSetDevice", PetscDeviceTypes + 1, PETSC_DEVICE_MAX - 1, dctx->device ? PetscDeviceTypes[dctx->device->type] : PetscDeviceTypes[PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE], &dtype, &flag)); 741 if (flag) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, static_cast<PetscDeviceType>(dtype + 1))); 742 PetscOptionsEnd(); 743 PetscFunctionReturn(0); 744 } 745