1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/ 2 #include <petsc/private/viewerimpl.h> // _p_PetscViewer for PetscObjectCast() 3 4 #include <petsc/private/cpp/object_pool.hpp> 5 #include <petsc/private/cpp/utility.hpp> 6 #include <petsc/private/cpp/array.hpp> 7 8 #include <vector> 9 #include <string> // std::to_string among other things 10 11 /* Define the allocator */ 12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> { 13 public: 14 PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept 15 { 16 PetscFunctionBegin; 17 PetscCall(PetscArrayzero(dctx, 1)); 18 PetscCall(PetscHeaderCreate_Private((PetscObject)dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, (PetscObjectDestroyFn *)PetscDeviceContextDestroy, (PetscObjectViewFn *)PetscDeviceContextView)); 19 PetscCall(PetscLogObjectCreate((PetscObject)dctx)); 20 21 PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData{dctx}); 22 PetscCall(underlying().reset(dctx, false)); 23 PetscFunctionReturn(PETSC_SUCCESS); 24 } 25 26 static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept 27 { 28 PetscFunctionBegin; 29 PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren); 30 PetscTryTypeMethod(dctx, destroy); 31 PetscCall(PetscDeviceDestroy(&dctx->device)); 32 PetscCall(PetscFree(dctx->childIDs)); 33 delete CxxDataCast(dctx); 34 PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE)); 35 PetscFunctionReturn(PETSC_SUCCESS); 36 } 37 38 static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept 39 { 40 PetscFunctionBegin; 41 if (zero) { 42 // reset the device if the user set it 43 if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) { 44 PetscTryTypeMethod(dctx, destroy); 45 PetscCall(PetscDeviceDestroy(&dctx->device)); 46 PetscCall(PetscArrayzero(dctx->ops, 1)); 47 dctx->data = nullptr; 48 } 49 PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx))); 50 dctx->numChildren = 0; 51 dctx->setup = PETSC_FALSE; 52 // don't deallocate the child array, rather just zero it out 53 PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren)); 54 PetscCall(CxxDataCast(dctx)->clear()); 55 PetscCall(CxxDataCast(dctx)->reset_self(dctx)); 56 } 57 dctx->streamType = PETSC_STREAM_DEFAULT; 58 PetscFunctionReturn(PETSC_SUCCESS); 59 } 60 61 static PetscErrorCode invalidate_(PetscDeviceContext dctx) noexcept 62 { 63 PetscFunctionBegin; 64 PetscCall(CxxDataCast(dctx)->reset_self(dctx)); 65 PetscFunctionReturn(PETSC_SUCCESS); 66 } 67 }; 68 69 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool; 70 71 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 72 /*@C 73 PetscDeviceContextCreate - Creates a `PetscDeviceContext` 74 75 Not Collective 76 77 Output Parameter: 78 . dctx - The `PetscDeviceContext` 79 80 Level: beginner 81 82 Note: 83 Unlike almost every other PETSc class it is advised that most users use 84 `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of 85 different types are incompatible with one another; using `PetscDeviceContextDuplicate()` 86 ensures compatible types. 87 88 DAG representation: 89 .vb 90 time -> 91 92 |= CALL =| - dctx -> 93 .ve 94 95 .N ASYNC_API 96 97 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`, 98 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`, 99 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()` 100 @*/ 101 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 102 { 103 PetscFunctionBegin; 104 PetscAssertPointer(dctx, 1); 105 PetscCall(PetscDeviceInitializePackage()); 106 PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 107 PetscCall(contextPool.allocate(dctx)); 108 PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 113 /*@C 114 PetscDeviceContextDestroy - Frees a `PetscDeviceContext` 115 116 Not Collective 117 118 Input Parameter: 119 . dctx - The `PetscDeviceContext` 120 121 Level: beginner 122 123 Notes: 124 No implicit synchronization occurs due to this routine, all resources are released completely 125 asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on 126 `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before 127 calling this routine. 128 129 DAG representation: 130 .vb 131 time -> 132 133 -> dctx - |= CALL =| 134 .ve 135 136 Developer Notes: 137 `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever 138 growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool, 139 this should perhaps be implemented. 140 141 .N ASYNC_API 142 143 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 144 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()` 145 @*/ 146 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 147 { 148 PetscFunctionBegin; 149 PetscAssertPointer(dctx, 1); 150 if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS); 151 PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 152 if (--(PetscObjectCast(*dctx)->refct) <= 0) { 153 PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx)); 154 PetscCall(contextPool.deallocate(dctx)); 155 } 156 PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 157 *dctx = nullptr; 158 PetscFunctionReturn(PETSC_SUCCESS); 159 } 160 161 /*@C 162 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a 163 `PetscDeviceContext` 164 165 Not Collective 166 167 Input Parameters: 168 + dctx - The `PetscDeviceContext` 169 - type - The `PetscStreamType` 170 171 Level: beginner 172 173 Note: 174 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 175 types and their interactions. If the `PetscDeviceContext` was previously set up and stream 176 type was changed, you must call `PetscDeviceContextSetUp()` again after this routine. 177 178 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, 179 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()` 180 @*/ 181 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 182 { 183 PetscFunctionBegin; 184 // do not use getoptionalnullcontext here since we do not want the user to change the stream 185 // type 186 PetscValidDeviceContext(dctx, 1); 187 PetscValidStreamType(type, 2); 188 // only need to do complex swapping if the object has already been setup 189 if (dctx->setup && (dctx->streamType != type)) { 190 dctx->setup = PETSC_FALSE; 191 PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 192 PetscUseTypeMethod(dctx, changestreamtype, type); 193 PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 194 } 195 dctx->streamType = type; 196 PetscFunctionReturn(PETSC_SUCCESS); 197 } 198 199 /*@C 200 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a 201 `PetscDeviceContext` 202 203 Not Collective 204 205 Input Parameter: 206 . dctx - The `PetscDeviceContext` 207 208 Output Parameter: 209 . type - The `PetscStreamType` 210 211 Level: beginner 212 213 Note: 214 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 215 types and their interactions 216 217 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, 218 `PetscDeviceContextSetFromOptions()` 219 @*/ 220 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 221 { 222 PetscFunctionBegin; 223 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 224 PetscAssertPointer(type, 2); 225 *type = dctx->streamType; 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 229 /* 230 Actual function to set the device. 231 232 1. Repeatedly destroying and recreating internal data structures (like streams and events) 233 for recycled PetscDeviceContexts is not free. If done often, it does add up. 234 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or 235 default contexts. The default contexts *never* change type, and the children are extremely 236 unlikely to (chances are if you fork once, you will fork again very soon). 237 3. The only time this calculus changes is if the user themselves sets the device type. In 238 this case we do not know what the user has changed, so must always wipe the slate clean. 239 240 Thus we need to keep track whether the user explicitly sets the device contexts device. 241 */ 242 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set) 243 { 244 PetscFunctionBegin; 245 // do not use getoptionalnullcontext here since we do not want the user to change its device 246 PetscValidDeviceContext(dctx, 1); 247 PetscValidDevice(device, 2); 248 if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS); 249 PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 250 PetscTryTypeMethod(dctx, destroy); 251 PetscCall(PetscDeviceDestroy(&dctx->device)); 252 PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops))); 253 PetscCall(PetscDeviceReference_Internal(device)); 254 // set it before calling the method 255 dctx->device = device; 256 PetscCall((*device->ops->createcontext)(dctx)); 257 PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 258 dctx->setup = PETSC_FALSE; 259 dctx->usersetdevice = user_set; 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type) 264 { 265 PetscDevice device; 266 267 PetscFunctionBegin; 268 PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device)); 269 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE)); 270 PetscFunctionReturn(PETSC_SUCCESS); 271 } 272 273 /*@C 274 PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext` 275 276 Not Collective 277 278 Input Parameters: 279 + dctx - The `PetscDeviceContext` 280 - device - The `PetscDevice` 281 282 Level: intermediate 283 284 Notes: 285 This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must 286 also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly 287 necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will 288 always come equipped with the "default" device. 289 290 This routine is a no-op if `device` is already attached to `dctx`. 291 292 This routine may (but is very unlikely to) initialize the backend device and may incur 293 synchronization. 294 295 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`, 296 `PetscDeviceContextGetDeviceType()` 297 @*/ 298 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 299 { 300 PetscFunctionBegin; 301 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE)); 302 PetscFunctionReturn(PETSC_SUCCESS); 303 } 304 305 /*@C 306 PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext` 307 308 Not Collective 309 310 Input Parameter: 311 . dctx - the `PetscDeviceContext` 312 313 Output Parameter: 314 . device - The `PetscDevice` 315 316 Level: intermediate 317 318 Note: 319 This is a borrowed reference, the user should not destroy `device`. 320 321 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()` 322 @*/ 323 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 324 { 325 PetscFunctionBegin; 326 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 327 PetscAssertPointer(device, 2); 328 PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id); 329 *device = dctx->device; 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 /*@C 334 PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext` 335 336 Not Collective 337 338 Input Parameter: 339 . dctx - The `PetscDeviceContext` 340 341 Output Parameter: 342 . type - The `PetscDeviceType` 343 344 Level: beginner 345 346 Note: 347 This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` -> 348 `PetscDeviceGetType()`. 349 350 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice` 351 @*/ 352 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type) 353 { 354 PetscDevice device = nullptr; 355 356 PetscFunctionBegin; 357 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 358 PetscAssertPointer(type, 2); 359 PetscCall(PetscDeviceContextGetDevice(dctx, &device)); 360 PetscCall(PetscDeviceGetType(device, type)); 361 PetscFunctionReturn(PETSC_SUCCESS); 362 } 363 364 /*@C 365 PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use 366 367 Not Collective 368 369 Input Parameter: 370 . dctx - The `PetscDeviceContext` 371 372 Level: beginner 373 374 Developer Notes: 375 This routine is usually the stage where a `PetscDeviceContext` acquires device-side data 376 structures such as streams, events, and (possibly) handles. 377 378 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 379 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()` 380 @*/ 381 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 382 { 383 PetscFunctionBegin; 384 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 385 if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS); 386 if (!dctx->device) { 387 const auto default_dtype = PETSC_DEVICE_DEFAULT(); 388 389 PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype])); 390 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype)); 391 } 392 PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 393 PetscUseTypeMethod(dctx, setup); 394 PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 395 dctx->setup = PETSC_TRUE; 396 PetscFunctionReturn(PETSC_SUCCESS); 397 } 398 399 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup) 400 { 401 PetscFunctionBegin; 402 PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 403 PetscCall(PetscDeviceContextCreate(dctxdup)); 404 PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype)); 405 if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice)); 406 PetscCall(PetscDeviceContextSetUp(*dctxdup)); 407 PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 408 PetscFunctionReturn(PETSC_SUCCESS); 409 } 410 411 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 412 /*@C 413 PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object 414 415 Not Collective 416 417 Input Parameter: 418 . dctx - The `PetscDeviceContext` to duplicate 419 420 Output Parameter: 421 . dctxdup - The duplicated `PetscDeviceContext` 422 423 Level: beginner 424 425 Notes: 426 This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as 427 another. Note however that `dctxdup` does not share any of the underlying data with `dctx`, 428 (including its current stream-state) they are completely separate objects. 429 430 There is no implied ordering between `dctx` or `dctxdup`. 431 432 DAG representation: 433 .vb 434 time -> 435 436 -> dctx - |= CALL =| - dctx ----> 437 - dctxdup -> 438 .ve 439 440 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 441 `PetscDeviceContextSetStreamType()` 442 @*/ 443 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 444 { 445 auto stype = PETSC_STREAM_DEFAULT; 446 447 PetscFunctionBegin; 448 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 449 PetscAssertPointer(dctxdup, 2); 450 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 451 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup)); 452 PetscFunctionReturn(PETSC_SUCCESS); 453 } 454 455 /*@C 456 PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle 457 458 Not Collective 459 460 Input Parameter: 461 . dctx - The `PetscDeviceContext` 462 463 Output Parameter: 464 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work 465 466 Level: intermediate 467 468 Note: 469 This routine only refers a singular context and does NOT take any of its children into 470 account. That is, if `dctx` is idle but has dependents who do have work this routine still 471 returns `PETSC_TRUE`. 472 473 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()` 474 @*/ 475 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 476 { 477 PetscFunctionBegin; 478 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 479 PetscAssertPointer(idle, 2); 480 PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 481 PetscUseTypeMethod(dctx, query, idle); 482 PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 483 PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not")); 484 PetscFunctionReturn(PETSC_SUCCESS); 485 } 486 487 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 488 /*@C 489 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 490 491 Not Collective 492 493 Input Parameters: 494 + dctxa - The `PetscDeviceContext` object that is waiting 495 - dctxb - The `PetscDeviceContext` object that is being waited on 496 497 Level: beginner 498 499 Notes: 500 Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host 501 does not wait for the serialization to actually occur. 502 503 This routine uses only the state of `dctxb` at the moment this routine was called, so any 504 future work queued will not affect `dctxa`. It is safe to pass the same context to both 505 arguments (in which case this routine does nothing). 506 507 DAG representation: 508 .vb 509 time -> 510 511 -> dctxa ---/- |= CALL =| - dctxa -> 512 / 513 -> dctxb -/------------------------> 514 .ve 515 516 .N ASYNC_API 517 518 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()` 519 @*/ 520 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 521 { 522 PetscObjectId bid; 523 524 PetscFunctionBegin; 525 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa)); 526 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb)); 527 PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2); 528 if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS); 529 bid = PetscObjectCast(dctxb)->id; 530 PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 531 PetscUseTypeMethod(dctxa, waitforcontext, dctxb); 532 PetscCallCXX(CxxDataCast(dctxa)->upstream()[bid] = CxxDataCast(dctxb)->weak_snapshot()); 533 PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 534 PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", PetscObjectCast(dctxa)->id, bid)); 535 PetscCall(PetscObjectStateIncrease(PetscObjectCast(dctxa))); 536 PetscFunctionReturn(PETSC_SUCCESS); 537 } 538 539 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 540 /*@C 541 PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent 542 context with a prescribed `PetscStreamType` 543 544 Not Collective, Asynchronous 545 546 Input Parameters: 547 + dctx - The parent `PetscDeviceContext` 548 . stype - The prescribed `PetscStreamType` 549 - n - The number of children to create 550 551 Output Parameter: 552 . dsub - The created child context(s) 553 554 Level: intermediate 555 556 Notes: 557 This routine creates `n` edges of a DAG from a source node which are causally dependent on the 558 source node. This causal dependency is established as-if by calling 559 `PetscDeviceContextWaitForContext()` on every child. 560 561 `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx` 562 expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed. 563 564 This routine only accounts for work queued on `dctx` up until calling this routine, any 565 subsequent work enqueued on `dctx` has no effect on `dsub`. 566 567 The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case 568 that they are different. This is useful in cases where a routine can locally exploit stream 569 parallelism without needing to worry about what stream type the incoming `PetscDeviceContext` 570 carries. 571 572 DAG representation: 573 .vb 574 time -> 575 576 -> dctx - |= CALL =| -\----> dctx ------> 577 \---> dsub[0] ---> 578 \--> ... -------> 579 \-> dsub[n-1] -> 580 .ve 581 582 .N ASYNC_API 583 584 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, 585 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()` 586 @*/ 587 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub) 588 { 589 // debugging only 590 std::string idList; 591 auto ninput = n; 592 593 PetscFunctionBegin; 594 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 595 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n); 596 PetscAssertPointer(dsub, 4); 597 *dsub = nullptr; 598 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 599 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 600 PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 601 /* update child totals */ 602 dctx->numChildren += n; 603 /* now to find out if we have room */ 604 if (dctx->numChildren > dctx->maxNumChildren) { 605 const auto numChildren = dctx->numChildren; 606 auto &maxNumChildren = dctx->maxNumChildren; 607 auto numAllocated = numChildren; 608 609 /* no room, either from having too many kids or not having any */ 610 if (auto &childIDs = dctx->childIDs) { 611 // the difference is backwards because we have not updated maxNumChildren yet 612 numAllocated -= maxNumChildren; 613 /* have existing children, must reallocate them */ 614 PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs)); 615 /* clear the extra memory since realloc doesn't do it for us */ 616 PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated)); 617 } else { 618 /* have no children */ 619 PetscCall(PetscCalloc1(numChildren, &childIDs)); 620 } 621 /* update total number of children */ 622 maxNumChildren = numChildren; 623 } 624 PetscCall(PetscMalloc1(n, dsub)); 625 for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) { 626 auto &childID = dctx->childIDs[i]; 627 /* empty child slot */ 628 if (!childID) { 629 auto &childctx = (*dsub)[i]; 630 631 /* create the child context in the image of its parent */ 632 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx)); 633 PetscCall(PetscDeviceContextWaitForContext(childctx, dctx)); 634 /* register the child with its parent */ 635 PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID)); 636 if (PetscDefined(USE_DEBUG_AND_INFO)) { 637 PetscCallCXX(idList += std::to_string(childID)); 638 if (ninput != 1) PetscCallCXX(idList += ", "); 639 } 640 --ninput; 641 } 642 } 643 PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 644 PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str())); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 /*@C 649 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 650 651 Not Collective, Asynchronous 652 653 Input Parameters: 654 + dctx - The parent `PetscDeviceContext` 655 - n - The number of children to create 656 657 Output Parameter: 658 . dsub - The created child context(s) 659 660 Level: beginner 661 662 Notes: 663 Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed 664 `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\: 665 666 .vb 667 PetscStreamType stype; 668 669 PetscDeviceContextGetStreamType(dctx, &stype); 670 PetscDeviceContextForkWithStreamType(dctx, stype, ...); 671 .ve 672 673 .N ASYNC_API 674 675 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`, 676 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()` 677 @*/ 678 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 679 { 680 auto stype = PETSC_STREAM_DEFAULT; 681 682 PetscFunctionBegin; 683 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 684 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 685 PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub)); 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 690 /*@C 691 PetscDeviceContextJoin - Converge a set of child contexts 692 693 Not Collective, Asynchronous 694 695 Input Parameters: 696 + dctx - A `PetscDeviceContext` to converge on 697 . n - The number of sub contexts to converge 698 . joinMode - The type of join to perform 699 - dsub - The sub contexts to converge 700 701 Level: beginner 702 703 Notes: 704 If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source 705 node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`) 706 which receives `n` edges (and optionally destroys them) which is dependent on the completion 707 of all incoming edges. 708 709 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be 710 destroyed by this routine. Thus all sub contexts must have been created with the `dctx` 711 passed to this routine. 712 713 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on 714 `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the 715 sync suffix does NOT refer to the host, i.e. this routine does NOT call 716 `PetscDeviceSynchronize()`. 717 718 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but 719 the sub contexts do not wait for one another or `dctx` afterwards. 720 721 DAG representations: 722 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` 723 .vb 724 time -> 725 726 -> dctx ---------/- |= CALL =| - dctx -> 727 -> dsub[0] -----/ 728 -> ... -------/ 729 -> dsub[n-1] -/ 730 .ve 731 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` 732 .vb 733 time -> 734 735 -> dctx ---------/- |= CALL =| -\----> dctx ------> 736 -> dsub[0] -----/ \---> dsub[0] ---> 737 -> ... -------/ \--> ... -------> 738 -> dsub[n-1] -/ \-> dsub[n-1] -> 739 .ve 740 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` 741 .vb 742 time -> 743 744 -> dctx ----------/- |= CALL =| - dctx -> 745 -> dsub[0] ------/-----------------------> 746 -> ... --------/------------------------> 747 -> dsub[n-1] --/-------------------------> 748 .ve 749 750 .N ASYNC_API 751 752 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`, 753 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode` 754 @*/ 755 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 756 { 757 // debugging only 758 std::string idList; 759 760 PetscFunctionBegin; 761 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 762 /* validity of dctx is checked in the wait-for loop */ 763 PetscAssertPointer(dsub, 4); 764 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n); 765 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 766 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 767 /* first dctx waits on all the incoming edges */ 768 PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 769 for (PetscInt i = 0; i < n; ++i) { 770 PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4); 771 PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i])); 772 if (PetscDefined(USE_DEBUG_AND_INFO)) { 773 PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id)); 774 if (i + 1 < n) PetscCallCXX(idList += ", "); 775 } 776 } 777 778 /* now we handle the aftermath */ 779 switch (joinMode) { 780 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: { 781 const auto children = dctx->childIDs; 782 const auto maxchild = dctx->maxNumChildren; 783 auto &nchild = dctx->numChildren; 784 PetscInt j = 0; 785 786 PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild); 787 /* update child count while it's still fresh in memory */ 788 nchild -= n; 789 for (PetscInt i = 0; i < maxchild; ++i) { 790 if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) { 791 /* child is one of ours, can destroy it */ 792 PetscCall(PetscDeviceContextDestroy((*dsub) + j)); 793 /* reset the child slot */ 794 children[i] = 0; 795 if (++j == n) break; 796 } 797 } 798 /* gone through the loop but did not find every child */ 799 PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out in", n - j); 800 PetscCall(PetscFree(*dsub)); 801 } break; 802 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 803 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx)); 804 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 805 break; 806 default: 807 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given"); 808 } 809 PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 810 811 PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str())); 812 PetscFunctionReturn(PETSC_SUCCESS); 813 } 814 815 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 816 /*@C 817 PetscDeviceContextSynchronize - Block the host until all work queued on a 818 `PetscDeviceContext` has finished 819 820 Not Collective 821 822 Input Parameter: 823 . dctx - The `PetscDeviceContext` to synchronize 824 825 Level: beginner 826 827 Notes: 828 The host will not return from this routine until `dctx` is idle. Any and all memory 829 operations queued on or otherwise associated with (either explicitly or implicitly via 830 dependencies) are guaranteed to have finished and be globally visible on return. 831 832 In effect, this routine serves as memory and execution barrier. 833 834 DAG representation: 835 .vb 836 time -> 837 838 -> dctx - |= CALL =| - dctx -> 839 .ve 840 841 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()` 842 @*/ 843 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 844 { 845 PetscFunctionBegin; 846 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 847 PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 848 /* if it isn't setup there is nothing to sync on */ 849 if (dctx->setup) { 850 PetscUseTypeMethod(dctx, synchronize); 851 PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx)); 852 } 853 PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 /* every device type has a vector of null PetscDeviceContexts -- one for each device */ 858 static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{}; 859 static auto nullContextsFinalizer = false; 860 861 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx) 862 { 863 PetscInt devid; 864 PetscDeviceType dtype; 865 866 PetscFunctionBegin; 867 PetscValidDevice(device, 2); 868 PetscAssertPointer(dctx, 3); 869 if (PetscUnlikely(!nullContextsFinalizer)) { 870 nullContextsFinalizer = true; 871 PetscCall(PetscRegisterFinalize([] { 872 PetscFunctionBegin; 873 for (auto &&dvec : nullContexts) { 874 for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx)); 875 PetscCallCXX(dvec.clear()); 876 } 877 nullContextsFinalizer = false; 878 PetscFunctionReturn(PETSC_SUCCESS); 879 })); 880 } 881 PetscCall(PetscDeviceGetDeviceId(device, &devid)); 882 PetscCall(PetscDeviceGetType(device, &dtype)); 883 { 884 auto &ctxlist = nullContexts[dtype]; 885 886 PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid); 887 // need to resize the container if not big enough because incrementing the iterator in 888 // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the 889 // current size of the container. 890 if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1)); 891 if (PetscUnlikely(!ctxlist[devid])) { 892 // we have not seen this device before 893 PetscCall(PetscDeviceContextCreate(dctx)); 894 PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid)); 895 { 896 const auto pobj = PetscObjectCast(*dctx); 897 const auto name = "null context " + std::to_string(devid); 898 const auto prefix = "null_context_" + std::to_string(devid) + '_'; 899 900 PetscCall(PetscObjectSetName(pobj, name.c_str())); 901 PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str())); 902 } 903 PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_DEFAULT)); 904 PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device)); 905 PetscCall(PetscDeviceContextSetUp(*dctx)); 906 // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert! 907 PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx)); 908 } else *dctx = ctxlist[devid]; 909 } 910 PetscFunctionReturn(PETSC_SUCCESS); 911 } 912 913 /* 914 Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are 915 guaranteed to always be globally blocking. 916 */ 917 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx) 918 { 919 PetscDeviceContext gctx; 920 PetscDevice gdev = nullptr; 921 922 PetscFunctionBegin; 923 PetscAssertPointer(dctx, 1); 924 PetscCall(PetscDeviceContextGetCurrentContext(&gctx)); 925 PetscCall(PetscDeviceContextGetDevice(gctx, &gdev)); 926 PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 930 /*@C 931 PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database 932 933 Collective on `comm` or `dctx` 934 935 Input Parameters: 936 + comm - MPI communicator on which to query the options database (optional) 937 - dctx - The `PetscDeviceContext` to configure 938 939 Output Parameter: 940 . dctx - The `PetscDeviceContext` 941 942 Options Database Keys: 943 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` - 944 `PetscDeviceContextSetStreamType()` 945 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType` 946 947 Level: beginner 948 949 Note: 950 The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is 951 used (which is always `PETSC_COMM_SELF`). 952 953 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`, 954 `PetscDeviceContextView()` 955 @*/ 956 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx) 957 { 958 const auto pobj = PetscObjectCast(dctx); 959 auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE); 960 auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE); 961 MPI_Comm old_comm = PETSC_COMM_SELF; 962 963 PetscFunctionBegin; 964 // do not user getoptionalnullcontext here, the user is not allowed to set it from options! 965 PetscValidDeviceContext(dctx, 2); 966 /* set the device type first */ 967 if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first)); 968 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first)); 969 970 if (comm == MPI_COMM_NULL) { 971 PetscCall(PetscObjectGetComm(pobj, &comm)); 972 } else { 973 // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so 974 // PetscObjectOptionsBegin() behaves as if dctx had comm 975 old_comm = Petsc::util::exchange(pobj->comm, comm); 976 } 977 978 PetscObjectOptionsBegin(pobj); 979 PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype)); 980 PetscOptionsEnd(); 981 // reset the comm (should be PETSC_COMM_SELF) 982 if (comm != MPI_COMM_NULL) pobj->comm = old_comm; 983 if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first)); 984 if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first)); 985 PetscCall(PetscDeviceContextSetUp(dctx)); 986 PetscFunctionReturn(PETSC_SUCCESS); 987 } 988 989 /*@C 990 PetscDeviceContextView - View a `PetscDeviceContext` 991 992 Collective on `viewer` 993 994 Input Parameters: 995 + dctx - The `PetscDeviceContext` 996 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`) 997 998 Level: beginner 999 1000 Note: 1001 If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this 1002 routine is collective on `PETSC_COMM_WORLD`. 1003 1004 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()` 1005 @*/ 1006 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer) 1007 { 1008 PetscBool iascii; 1009 1010 PetscFunctionBegin; 1011 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1012 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer)); 1013 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1014 PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii)); 1015 if (iascii) { 1016 auto stype = PETSC_STREAM_DEFAULT; 1017 PetscViewer sub; 1018 1019 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1020 PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub)); 1021 PetscCall(PetscViewerASCIIPushTab(sub)); 1022 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 1023 PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype])); 1024 PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren)); 1025 if (const auto nchild = dctx->numChildren) { 1026 PetscCall(PetscViewerASCIIPushTab(sub)); 1027 for (PetscInt i = 0; i < nchild; ++i) { 1028 if (i == nchild - 1) { 1029 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i])); 1030 } else { 1031 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i])); 1032 } 1033 } 1034 } 1035 PetscCall(PetscViewerASCIIPopTab(sub)); 1036 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1037 PetscCall(PetscViewerASCIIPushTab(viewer)); 1038 } 1039 if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer)); 1040 if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer)); 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 1044 /*@C 1045 PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options 1046 1047 Input Parameters: 1048 + dctx - The `PetscDeviceContext` to view 1049 . obj - Optional `PetscObject` to associate (may be `NULL`) 1050 - name - The command line option 1051 1052 Level: beginner 1053 1054 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()` 1055 @*/ 1056 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[]) 1057 { 1058 PetscFunctionBegin; 1059 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1060 if (obj) PetscValidHeader(obj, 2); 1061 PetscAssertPointer(name, 3); 1062 PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name)); 1063 PetscFunctionReturn(PETSC_SUCCESS); 1064 } 1065 1066 /*@C 1067 PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context 1068 1069 Input Parameter: 1070 . dctx - The `PetscDeviceContext` to get the stream from 1071 1072 Output Parameter: 1073 . handle - A pointer to the handle to the stream 1074 1075 Level: developer 1076 1077 Note: 1078 This routine is dangerous. It exists only for the most experienced users and 1079 internal PETSc development. 1080 1081 There is no way for PETSc's auto-dependency system to track what the caller does with the 1082 stream. 1083 1084 If the user uses the stream to copy memory that was previously modified by PETSc, or launches 1085 kernels that modify memory with the stream, it is the users responsibility to inform PETSc of 1086 their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a 1087 race condition. This race condition may manifest in nondeterministic ways. 1088 1089 Alternatively, the user may synchronize the stream immediately before and after use. This is 1090 the safest option. 1091 1092 Example Usage: 1093 .vb 1094 PetscDeviceContext dctx; 1095 PetscDeviceType type; 1096 void *handle; 1097 1098 PetscDeviceContextGetCurrentContext(&dctx); 1099 PetscDeviceContextGetStreamHandle(dctx, &handle); 1100 PetscDeviceContextGetDeviceType(dctx, &type); 1101 1102 if (type == PETSC_DEVICE_CUDA) { 1103 cudaStream_t stream = *(cudaStream_t *)handle; 1104 1105 my_cuda_kernel<<<1, 2, 3, stream>>>(); 1106 } 1107 .ve 1108 Alternatively, if type of `PetscDeviceContext` is known (for example `PETSC_DEVICE_HIP`), the 1109 user may pass in a pointer to stream handle directly\: 1110 .vb 1111 hipStream_t *stream; 1112 1113 // note the cast to void ** 1114 PetscDeviceContextGetStreamHandle(dctx, (void **)&stream); 1115 // note the dereference 1116 my_hip_kernel<<<1, 2, 3, *stream>>>(); 1117 .ve 1118 1119 .N ASYNC_API 1120 1121 .seealso: `PetscDeviceContext` 1122 @*/ 1123 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void **handle) 1124 { 1125 PetscFunctionBegin; 1126 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1127 PetscAssertPointer(handle, 2); 1128 PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, handle)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131