1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/ 2 #include <petsc/private/viewerimpl.h> // _p_PetscViewer for PetscObjectCast() 3 4 #include <petsc/private/cpp/object_pool.hpp> 5 #include <petsc/private/cpp/utility.hpp> 6 #include <petsc/private/cpp/array.hpp> 7 8 #include <vector> 9 #include <string> // std::to_string among other things 10 11 /* Define the allocator */ 12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> { 13 public: 14 PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept 15 { 16 PetscFunctionBegin; 17 PetscCall(PetscArrayzero(dctx, 1)); 18 PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView)); 19 PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData{dctx}); 20 PetscCall(underlying().reset(dctx, false)); 21 PetscFunctionReturn(PETSC_SUCCESS); 22 } 23 24 static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept 25 { 26 PetscFunctionBegin; 27 PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren); 28 PetscTryTypeMethod(dctx, destroy); 29 PetscCall(PetscDeviceDestroy(&dctx->device)); 30 PetscCall(PetscFree(dctx->childIDs)); 31 delete CxxDataCast(dctx); 32 PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE)); 33 PetscFunctionReturn(PETSC_SUCCESS); 34 } 35 36 static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept 37 { 38 PetscFunctionBegin; 39 if (zero) { 40 // reset the device if the user set it 41 if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) { 42 PetscTryTypeMethod(dctx, destroy); 43 PetscCall(PetscDeviceDestroy(&dctx->device)); 44 PetscCall(PetscArrayzero(dctx->ops, 1)); 45 dctx->data = nullptr; 46 } 47 PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx))); 48 dctx->numChildren = 0; 49 dctx->setup = PETSC_FALSE; 50 // don't deallocate the child array, rather just zero it out 51 PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren)); 52 PetscCall(CxxDataCast(dctx)->clear()); 53 PetscCall(CxxDataCast(dctx)->reset_self(dctx)); 54 } 55 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 56 PetscFunctionReturn(PETSC_SUCCESS); 57 } 58 59 static PetscErrorCode invalidate_(PetscDeviceContext dctx) noexcept 60 { 61 PetscFunctionBegin; 62 PetscCall(CxxDataCast(dctx)->reset_self(dctx)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 }; 66 67 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool; 68 69 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 70 /*@C 71 PetscDeviceContextCreate - Creates a `PetscDeviceContext` 72 73 Not Collective 74 75 Output Parameter: 76 . dctx - The `PetscDeviceContext` 77 78 Level: beginner 79 80 Note: 81 Unlike almost every other PETSc class it is advised that most users use 82 `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of 83 different types are incompatible with one another; using `PetscDeviceContextDuplicate()` 84 ensures compatible types. 85 86 DAG representation: 87 .vb 88 time -> 89 90 |= CALL =| - dctx -> 91 .ve 92 93 .N ASYNC_API 94 95 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`, 96 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`, 97 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()` 98 @*/ 99 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 100 { 101 PetscFunctionBegin; 102 PetscAssertPointer(dctx, 1); 103 PetscCall(PetscDeviceInitializePackage()); 104 PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 105 PetscCall(contextPool.allocate(dctx)); 106 PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 111 /*@C 112 PetscDeviceContextDestroy - Frees a `PetscDeviceContext` 113 114 Not Collective 115 116 Input Parameter: 117 . dctx - The `PetscDeviceContext` 118 119 Level: beginner 120 121 Notes: 122 No implicit synchronization occurs due to this routine, all resources are released completely 123 asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on 124 `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before 125 calling this routine. 126 127 DAG representation: 128 .vb 129 time -> 130 131 -> dctx - |= CALL =| 132 .ve 133 134 Developer Notes: 135 `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever 136 growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool, 137 this should perhaps be implemented. 138 139 .N ASYNC_API 140 141 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 142 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()` 143 @*/ 144 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 145 { 146 PetscFunctionBegin; 147 PetscAssertPointer(dctx, 1); 148 if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS); 149 PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 150 if (--(PetscObjectCast(*dctx)->refct) <= 0) { 151 PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx)); 152 PetscCall(contextPool.deallocate(dctx)); 153 } 154 PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 155 *dctx = nullptr; 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 /*@C 160 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a 161 `PetscDeviceContext` 162 163 Not Collective 164 165 Input Parameters: 166 + dctx - The `PetscDeviceContext` 167 - type - The `PetscStreamType` 168 169 Level: beginner 170 171 Note: 172 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 173 types and their interactions. If the `PetscDeviceContext` was previously set up and stream 174 type was changed, you must call `PetscDeviceContextSetUp()` again after this routine. 175 176 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, 177 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()` 178 @*/ 179 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 180 { 181 PetscFunctionBegin; 182 // do not use getoptionalnullcontext here since we do not want the user to change the stream 183 // type 184 PetscValidDeviceContext(dctx, 1); 185 PetscValidStreamType(type, 2); 186 // only need to do complex swapping if the object has already been setup 187 if (dctx->setup && (dctx->streamType != type)) { 188 dctx->setup = PETSC_FALSE; 189 PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 190 PetscUseTypeMethod(dctx, changestreamtype, type); 191 PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 192 } 193 dctx->streamType = type; 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 /*@C 198 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a 199 `PetscDeviceContext` 200 201 Not Collective 202 203 Input Parameter: 204 . dctx - The `PetscDeviceContext` 205 206 Output Parameter: 207 . type - The `PetscStreamType` 208 209 Level: beginner 210 211 Note: 212 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 213 types and their interactions 214 215 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, 216 `PetscDeviceContextSetFromOptions()` 217 @*/ 218 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 219 { 220 PetscFunctionBegin; 221 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 222 PetscAssertPointer(type, 2); 223 *type = dctx->streamType; 224 PetscFunctionReturn(PETSC_SUCCESS); 225 } 226 227 /* 228 Actual function to set the device. 229 230 1. Repeatedly destroying and recreating internal data structures (like streams and events) 231 for recycled PetscDeviceContexts is not free. If done often, it does add up. 232 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or 233 default contexts. The default contexts *never* change type, and the children are extremely 234 unlikely to (chances are if you fork once, you will fork again very soon). 235 3. The only time this calculus changes is if the user themselves sets the device type. In 236 this case we do not know what the user has changed, so must always wipe the slate clean. 237 238 Thus we need to keep track whether the user explicitly sets the device contexts device. 239 */ 240 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set) 241 { 242 PetscFunctionBegin; 243 // do not use getoptionalnullcontext here since we do not want the user to change its device 244 PetscValidDeviceContext(dctx, 1); 245 PetscValidDevice(device, 2); 246 if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS); 247 PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 248 PetscTryTypeMethod(dctx, destroy); 249 PetscCall(PetscDeviceDestroy(&dctx->device)); 250 PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops))); 251 PetscCall(PetscDeviceReference_Internal(device)); 252 // set it before calling the method 253 dctx->device = device; 254 PetscCall((*device->ops->createcontext)(dctx)); 255 PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 256 dctx->setup = PETSC_FALSE; 257 dctx->usersetdevice = user_set; 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type) 262 { 263 PetscDevice device; 264 265 PetscFunctionBegin; 266 PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device)); 267 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE)); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 /*@C 272 PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext` 273 274 Not Collective 275 276 Input Parameters: 277 + dctx - The `PetscDeviceContext` 278 - device - The `PetscDevice` 279 280 Level: intermediate 281 282 Notes: 283 This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must 284 also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly 285 necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will 286 always come equipped with the "default" device. 287 288 This routine is a no-op if `device` is already attached to `dctx`. 289 290 This routine may (but is very unlikely to) initialize the backend device and may incur 291 synchronization. 292 293 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`, 294 `PetscDeviceContextGetDeviceType()` 295 @*/ 296 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 297 { 298 PetscFunctionBegin; 299 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE)); 300 PetscFunctionReturn(PETSC_SUCCESS); 301 } 302 303 /*@C 304 PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext` 305 306 Not Collective 307 308 Input Parameter: 309 . dctx - the `PetscDeviceContext` 310 311 Output Parameter: 312 . device - The `PetscDevice` 313 314 Level: intermediate 315 316 Note: 317 This is a borrowed reference, the user should not destroy `device`. 318 319 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()` 320 @*/ 321 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 322 { 323 PetscFunctionBegin; 324 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 325 PetscAssertPointer(device, 2); 326 PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id); 327 *device = dctx->device; 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 /*@C 332 PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext` 333 334 Not Collective 335 336 Input Parameter: 337 . dctx - The `PetscDeviceContext` 338 339 Output Parameter: 340 . type - The `PetscDeviceType` 341 342 Level: beginner 343 344 Note: 345 This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` -> 346 `PetscDeviceGetType()`. 347 348 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice` 349 @*/ 350 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type) 351 { 352 PetscDevice device = nullptr; 353 354 PetscFunctionBegin; 355 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 356 PetscAssertPointer(type, 2); 357 PetscCall(PetscDeviceContextGetDevice(dctx, &device)); 358 PetscCall(PetscDeviceGetType(device, type)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /*@C 363 PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use 364 365 Not Collective 366 367 Input Parameter: 368 . dctx - The `PetscDeviceContext` 369 370 Level: beginner 371 372 Developer Notes: 373 This routine is usually the stage where a `PetscDeviceContext` acquires device-side data 374 structures such as streams, events, and (possibly) handles. 375 376 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 377 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()` 378 @*/ 379 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 380 { 381 PetscFunctionBegin; 382 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 383 if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS); 384 if (!dctx->device) { 385 const auto default_dtype = PETSC_DEVICE_DEFAULT(); 386 387 PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype])); 388 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype)); 389 } 390 PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 391 PetscUseTypeMethod(dctx, setup); 392 PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 393 dctx->setup = PETSC_TRUE; 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 397 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup) 398 { 399 PetscFunctionBegin; 400 PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 401 PetscCall(PetscDeviceContextCreate(dctxdup)); 402 PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype)); 403 if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice)); 404 PetscCall(PetscDeviceContextSetUp(*dctxdup)); 405 PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 410 /*@C 411 PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object 412 413 Not Collective 414 415 Input Parameter: 416 . dctx - The `PetscDeviceContext` to duplicate 417 418 Output Parameter: 419 . dctxdup - The duplicated `PetscDeviceContext` 420 421 Level: beginner 422 423 Notes: 424 This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as 425 another. Note however that `dctxdup` does not share any of the underlying data with `dctx`, 426 (including its current stream-state) they are completely separate objects. 427 428 There is no implied ordering between `dctx` or `dctxdup`. 429 430 DAG representation: 431 .vb 432 time -> 433 434 -> dctx - |= CALL =| - dctx ----> 435 - dctxdup -> 436 .ve 437 438 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 439 `PetscDeviceContextSetStreamType()` 440 @*/ 441 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 442 { 443 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 444 445 PetscFunctionBegin; 446 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 447 PetscAssertPointer(dctxdup, 2); 448 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 449 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup)); 450 PetscFunctionReturn(PETSC_SUCCESS); 451 } 452 453 /*@C 454 PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle 455 456 Not Collective 457 458 Input Parameter: 459 . dctx - The `PetscDeviceContext` 460 461 Output Parameter: 462 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work 463 464 Level: intermediate 465 466 Note: 467 This routine only refers a singular context and does NOT take any of its children into 468 account. That is, if `dctx` is idle but has dependents who do have work this routine still 469 returns `PETSC_TRUE`. 470 471 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()` 472 @*/ 473 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 474 { 475 PetscFunctionBegin; 476 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 477 PetscAssertPointer(idle, 2); 478 PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 479 PetscUseTypeMethod(dctx, query, idle); 480 PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 481 PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not")); 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 486 /*@C 487 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 488 489 Not Collective 490 491 Input Parameters: 492 + dctxa - The `PetscDeviceContext` object that is waiting 493 - dctxb - The `PetscDeviceContext` object that is being waited on 494 495 Level: beginner 496 497 Notes: 498 Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host 499 does not wait for the serialization to actually occur. 500 501 This routine uses only the state of `dctxb` at the moment this routine was called, so any 502 future work queued will not affect `dctxa`. It is safe to pass the same context to both 503 arguments (in which case this routine does nothing). 504 505 DAG representation: 506 .vb 507 time -> 508 509 -> dctxa ---/- |= CALL =| - dctxa -> 510 / 511 -> dctxb -/------------------------> 512 .ve 513 514 .N ASYNC_API 515 516 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()` 517 @*/ 518 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 519 { 520 PetscObjectId bid; 521 522 PetscFunctionBegin; 523 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa)); 524 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb)); 525 PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2); 526 if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS); 527 bid = PetscObjectCast(dctxb)->id; 528 PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 529 PetscUseTypeMethod(dctxa, waitforcontext, dctxb); 530 PetscCallCXX(CxxDataCast(dctxa)->upstream()[bid] = CxxDataCast(dctxb)->weak_snapshot()); 531 PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 532 PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", PetscObjectCast(dctxa)->id, bid)); 533 PetscCall(PetscObjectStateIncrease(PetscObjectCast(dctxa))); 534 PetscFunctionReturn(PETSC_SUCCESS); 535 } 536 537 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 538 /*@C 539 PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent 540 context with a prescribed `PetscStreamType` 541 542 Not Collective, Asynchronous 543 544 Input Parameters: 545 + dctx - The parent `PetscDeviceContext` 546 . stype - The prescribed `PetscStreamType` 547 - n - The number of children to create 548 549 Output Parameter: 550 . dsub - The created child context(s) 551 552 Level: intermediate 553 554 Notes: 555 This routine creates `n` edges of a DAG from a source node which are causally dependent on the 556 source node. This causal dependency is established as-if by calling 557 `PetscDeviceContextWaitForContext()` on every child. 558 559 `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx` 560 expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed. 561 562 This routine only accounts for work queued on `dctx` up until calling this routine, any 563 subsequent work enqueued on `dctx` has no effect on `dsub`. 564 565 The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case 566 that they are different. This is useful in cases where a routine can locally exploit stream 567 parallelism without needing to worry about what stream type the incoming `PetscDeviceContext` 568 carries. 569 570 DAG representation: 571 .vb 572 time -> 573 574 -> dctx - |= CALL =| -\----> dctx ------> 575 \---> dsub[0] ---> 576 \--> ... -------> 577 \-> dsub[n-1] -> 578 .ve 579 580 .N ASYNC_API 581 582 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, 583 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()` 584 @*/ 585 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub) 586 { 587 // debugging only 588 std::string idList; 589 auto ninput = n; 590 591 PetscFunctionBegin; 592 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 593 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n); 594 PetscAssertPointer(dsub, 4); 595 *dsub = nullptr; 596 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 597 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 598 PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 599 /* update child totals */ 600 dctx->numChildren += n; 601 /* now to find out if we have room */ 602 if (dctx->numChildren > dctx->maxNumChildren) { 603 const auto numChildren = dctx->numChildren; 604 auto &maxNumChildren = dctx->maxNumChildren; 605 auto numAllocated = numChildren; 606 607 /* no room, either from having too many kids or not having any */ 608 if (auto &childIDs = dctx->childIDs) { 609 // the difference is backwards because we have not updated maxNumChildren yet 610 numAllocated -= maxNumChildren; 611 /* have existing children, must reallocate them */ 612 PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs)); 613 /* clear the extra memory since realloc doesn't do it for us */ 614 PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated)); 615 } else { 616 /* have no children */ 617 PetscCall(PetscCalloc1(numChildren, &childIDs)); 618 } 619 /* update total number of children */ 620 maxNumChildren = numChildren; 621 } 622 PetscCall(PetscMalloc1(n, dsub)); 623 for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) { 624 auto &childID = dctx->childIDs[i]; 625 /* empty child slot */ 626 if (!childID) { 627 auto &childctx = (*dsub)[i]; 628 629 /* create the child context in the image of its parent */ 630 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx)); 631 PetscCall(PetscDeviceContextWaitForContext(childctx, dctx)); 632 /* register the child with its parent */ 633 PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID)); 634 if (PetscDefined(USE_DEBUG_AND_INFO)) { 635 PetscCallCXX(idList += std::to_string(childID)); 636 if (ninput != 1) PetscCallCXX(idList += ", "); 637 } 638 --ninput; 639 } 640 } 641 PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 642 PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str())); 643 PetscFunctionReturn(PETSC_SUCCESS); 644 } 645 646 /*@C 647 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 648 649 Not Collective, Asynchronous 650 651 Input Parameters: 652 + dctx - The parent `PetscDeviceContext` 653 - n - The number of children to create 654 655 Output Parameter: 656 . dsub - The created child context(s) 657 658 Level: beginner 659 660 Notes: 661 Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed 662 `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\: 663 664 .vb 665 PetscStreamType stype; 666 667 PetscDeviceContextGetStreamType(dctx, &stype); 668 PetscDeviceContextForkWithStreamType(dctx, stype, ...); 669 .ve 670 671 .N ASYNC_API 672 673 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`, 674 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()` 675 @*/ 676 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 677 { 678 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 679 680 PetscFunctionBegin; 681 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 682 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 683 PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub)); 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 688 /*@C 689 PetscDeviceContextJoin - Converge a set of child contexts 690 691 Not Collective, Asynchronous 692 693 Input Parameters: 694 + dctx - A `PetscDeviceContext` to converge on 695 . n - The number of sub contexts to converge 696 . joinMode - The type of join to perform 697 - dsub - The sub contexts to converge 698 699 Level: beginner 700 701 Notes: 702 If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source 703 node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`) 704 which receives `n` edges (and optionally destroys them) which is dependent on the completion 705 of all incoming edges. 706 707 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be 708 destroyed by this routine. Thus all sub contexts must have been created with the `dctx` 709 passed to this routine. 710 711 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on 712 `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the 713 sync suffix does NOT refer to the host, i.e. this routine does NOT call 714 `PetscDeviceSynchronize()`. 715 716 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but 717 the sub contexts do not wait for one another or `dctx` afterwards. 718 719 DAG representations: 720 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` 721 .vb 722 time -> 723 724 -> dctx ---------/- |= CALL =| - dctx -> 725 -> dsub[0] -----/ 726 -> ... -------/ 727 -> dsub[n-1] -/ 728 .ve 729 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` 730 .vb 731 time -> 732 733 -> dctx ---------/- |= CALL =| -\----> dctx ------> 734 -> dsub[0] -----/ \---> dsub[0] ---> 735 -> ... -------/ \--> ... -------> 736 -> dsub[n-1] -/ \-> dsub[n-1] -> 737 .ve 738 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` 739 .vb 740 time -> 741 742 -> dctx ----------/- |= CALL =| - dctx -> 743 -> dsub[0] ------/-----------------------> 744 -> ... --------/------------------------> 745 -> dsub[n-1] --/-------------------------> 746 .ve 747 748 .N ASYNC_API 749 750 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`, 751 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode` 752 @*/ 753 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 754 { 755 // debugging only 756 std::string idList; 757 758 PetscFunctionBegin; 759 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 760 /* validity of dctx is checked in the wait-for loop */ 761 PetscAssertPointer(dsub, 4); 762 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n); 763 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 764 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 765 /* first dctx waits on all the incoming edges */ 766 PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 767 for (PetscInt i = 0; i < n; ++i) { 768 PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4); 769 PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i])); 770 if (PetscDefined(USE_DEBUG_AND_INFO)) { 771 PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id)); 772 if (i + 1 < n) PetscCallCXX(idList += ", "); 773 } 774 } 775 776 /* now we handle the aftermath */ 777 switch (joinMode) { 778 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: { 779 const auto children = dctx->childIDs; 780 const auto maxchild = dctx->maxNumChildren; 781 auto &nchild = dctx->numChildren; 782 PetscInt j = 0; 783 784 PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild); 785 /* update child count while it's still fresh in memory */ 786 nchild -= n; 787 for (PetscInt i = 0; i < maxchild; ++i) { 788 if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) { 789 /* child is one of ours, can destroy it */ 790 PetscCall(PetscDeviceContextDestroy((*dsub) + j)); 791 /* reset the child slot */ 792 children[i] = 0; 793 if (++j == n) break; 794 } 795 } 796 /* gone through the loop but did not find every child */ 797 PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j); 798 PetscCall(PetscFree(*dsub)); 799 } break; 800 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 801 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx)); 802 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 803 break; 804 default: 805 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given"); 806 } 807 PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 808 809 PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str())); 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 814 /*@C 815 PetscDeviceContextSynchronize - Block the host until all work queued on a 816 `PetscDeviceContext` has finished 817 818 Not Collective 819 820 Input Parameter: 821 . dctx - The `PetscDeviceContext` to synchronize 822 823 Level: beginner 824 825 Notes: 826 The host will not return from this routine until `dctx` is idle. Any and all memory 827 operations queued on or otherwise associated with (either explicitly or implicitly via 828 dependencies) are guaranteed to have finished and be globally visible on return. 829 830 In effect, this routine serves as memory and execution barrier. 831 832 DAG representation: 833 .vb 834 time -> 835 836 -> dctx - |= CALL =| - dctx -> 837 .ve 838 839 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()` 840 @*/ 841 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 842 { 843 PetscFunctionBegin; 844 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 845 PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 846 /* if it isn't setup there is nothing to sync on */ 847 if (dctx->setup) { 848 PetscUseTypeMethod(dctx, synchronize); 849 PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx)); 850 } 851 PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 852 PetscFunctionReturn(PETSC_SUCCESS); 853 } 854 855 /* every device type has a vector of null PetscDeviceContexts -- one for each device */ 856 static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{}; 857 static auto nullContextsFinalizer = false; 858 859 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx) 860 { 861 PetscInt devid; 862 PetscDeviceType dtype; 863 864 PetscFunctionBegin; 865 PetscValidDevice(device, 2); 866 PetscAssertPointer(dctx, 3); 867 if (PetscUnlikely(!nullContextsFinalizer)) { 868 nullContextsFinalizer = true; 869 PetscCall(PetscRegisterFinalize([] { 870 PetscFunctionBegin; 871 for (auto &&dvec : nullContexts) { 872 for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx)); 873 PetscCallCXX(dvec.clear()); 874 } 875 nullContextsFinalizer = false; 876 PetscFunctionReturn(PETSC_SUCCESS); 877 })); 878 } 879 PetscCall(PetscDeviceGetDeviceId(device, &devid)); 880 PetscCall(PetscDeviceGetType(device, &dtype)); 881 { 882 auto &ctxlist = nullContexts[dtype]; 883 884 PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid); 885 // need to resize the container if not big enough because incrementing the iterator in 886 // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the 887 // current size of the container. 888 if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1)); 889 if (PetscUnlikely(!ctxlist[devid])) { 890 // we have not seen this device before 891 PetscCall(PetscDeviceContextCreate(dctx)); 892 PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid)); 893 { 894 const auto pobj = PetscObjectCast(*dctx); 895 const auto name = "null context " + std::to_string(devid); 896 const auto prefix = "null_context_" + std::to_string(devid) + '_'; 897 898 PetscCall(PetscObjectSetName(pobj, name.c_str())); 899 PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str())); 900 } 901 PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING)); 902 PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device)); 903 PetscCall(PetscDeviceContextSetUp(*dctx)); 904 // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert! 905 PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx)); 906 } else *dctx = ctxlist[devid]; 907 } 908 PetscFunctionReturn(PETSC_SUCCESS); 909 } 910 911 /* 912 Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are 913 guaranteed to always be globally blocking. 914 */ 915 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx) 916 { 917 PetscDeviceContext gctx; 918 PetscDevice gdev = nullptr; 919 920 PetscFunctionBegin; 921 PetscAssertPointer(dctx, 1); 922 PetscCall(PetscDeviceContextGetCurrentContext(&gctx)); 923 PetscCall(PetscDeviceContextGetDevice(gctx, &gdev)); 924 PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx)); 925 PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 /*@C 929 PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database 930 931 Collective on `comm` or `dctx` 932 933 Input Parameters: 934 + comm - MPI communicator on which to query the options database (optional) 935 - dctx - The `PetscDeviceContext` to configure 936 937 Output Parameter: 938 . dctx - The `PetscDeviceContext` 939 940 Options Database Keys: 941 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` - 942 `PetscDeviceContextSetStreamType()` 943 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType` 944 945 Level: beginner 946 947 Note: 948 The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is 949 used (which is always `PETSC_COMM_SELF`). 950 951 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`, 952 `PetscDeviceContextView()` 953 @*/ 954 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx) 955 { 956 const auto pobj = PetscObjectCast(dctx); 957 auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE); 958 auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE); 959 MPI_Comm old_comm = PETSC_COMM_SELF; 960 961 PetscFunctionBegin; 962 // do not user getoptionalnullcontext here, the user is not allowed to set it from options! 963 PetscValidDeviceContext(dctx, 2); 964 /* set the device type first */ 965 if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first)); 966 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first)); 967 968 if (comm == MPI_COMM_NULL) { 969 PetscCall(PetscObjectGetComm(pobj, &comm)); 970 } else { 971 // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so 972 // PetscObjectOptionsBegin() behaves as if dctx had comm 973 old_comm = Petsc::util::exchange(pobj->comm, comm); 974 } 975 976 PetscObjectOptionsBegin(pobj); 977 PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype)); 978 PetscOptionsEnd(); 979 // reset the comm (should be PETSC_COMM_SELF) 980 if (comm != MPI_COMM_NULL) pobj->comm = old_comm; 981 if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first)); 982 if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first)); 983 PetscCall(PetscDeviceContextSetUp(dctx)); 984 PetscFunctionReturn(PETSC_SUCCESS); 985 } 986 987 /*@C 988 PetscDeviceContextView - View a `PetscDeviceContext` 989 990 Collective on `viewer` 991 992 Input Parameters: 993 + dctx - The `PetscDeviceContext` 994 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`) 995 996 Level: beginner 997 998 Note: 999 If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this 1000 routine is collective on `PETSC_COMM_WORLD`. 1001 1002 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()` 1003 @*/ 1004 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer) 1005 { 1006 PetscBool iascii; 1007 1008 PetscFunctionBegin; 1009 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1010 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer)); 1011 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1012 PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii)); 1013 if (iascii) { 1014 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 1015 PetscViewer sub; 1016 1017 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1018 PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub)); 1019 PetscCall(PetscViewerASCIIPushTab(sub)); 1020 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 1021 PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype])); 1022 PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren)); 1023 if (const auto nchild = dctx->numChildren) { 1024 PetscCall(PetscViewerASCIIPushTab(sub)); 1025 for (PetscInt i = 0; i < nchild; ++i) { 1026 if (i == nchild - 1) { 1027 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i])); 1028 } else { 1029 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i])); 1030 } 1031 } 1032 } 1033 PetscCall(PetscViewerASCIIPopTab(sub)); 1034 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1035 PetscCall(PetscViewerFlush(viewer)); 1036 PetscCall(PetscViewerASCIIPushTab(viewer)); 1037 } 1038 if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer)); 1039 if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 /*@C 1044 PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options 1045 1046 Input Parameters: 1047 + dctx - The `PetscDeviceContext` to view 1048 . obj - Optional `PetscObject` to associate (may be `NULL`) 1049 - name - The command line option 1050 1051 Level: beginner 1052 1053 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()` 1054 @*/ 1055 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[]) 1056 { 1057 PetscFunctionBegin; 1058 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1059 if (obj) PetscValidHeader(obj, 2); 1060 PetscAssertPointer(name, 3); 1061 PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name)); 1062 PetscFunctionReturn(PETSC_SUCCESS); 1063 } 1064 1065 /*@C 1066 PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context 1067 1068 Input Parameters: 1069 + dctx - The `PetscDeviceContext` to get the stream from 1070 - handle - A handle to the stream 1071 1072 Level: developer 1073 1074 Note: 1075 This routine is dangerous. It exists only for the most experienced users and 1076 internal PETSc developement. 1077 1078 There is no way for PETSc's auto-dependency system to track what the caller does with the 1079 stream. 1080 1081 If the user uses the stream to copy memory that was previously modified by PETSc, or launches 1082 kernels that modify memory with the stream, it is the users responsibility to inform PETSc of 1083 their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a 1084 race condition. This race condition may manifest in nondeterministic ways. 1085 1086 Alternatively, the user may synchronize the stream immediately before and after use. This is 1087 the safest option. 1088 1089 Example Usage: 1090 .vb 1091 PetscDeviceContext dctx; 1092 PetscDeviceType type; 1093 void *handle; 1094 1095 PetscDeviceContextGetCurrentContext(&dctx); 1096 PetscDeviceContextGetStreamHandle(dctx, &handle); 1097 PetscDeviceContextGetDeviceType(dctx, &type); 1098 1099 if (type == PETSC_DEVICE_CUDA) { 1100 cudsStream_t stream = *(cudaStream_t*)handle; 1101 1102 my_cuda_kernel<<<1, 2, 3, stream>>>(); 1103 } 1104 .ve 1105 1106 .N ASYNC_API 1107 1108 .seealso: `PetscDeviceContext` 1109 @*/ 1110 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle) 1111 { 1112 PetscFunctionBegin; 1113 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1114 PetscAssertPointer(handle, 2); 1115 PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle)); 1116 PetscFunctionReturn(PETSC_SUCCESS); 1117 } 1118