1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/ 2 #include <petsc/private/viewerimpl.h> // _p_PetscViewer for PetscObjectCast() 3 4 #include <petsc/private/cpp/object_pool.hpp> 5 #include <petsc/private/cpp/utility.hpp> 6 #include <petsc/private/cpp/array.hpp> 7 8 #include <vector> 9 #include <string> // std::to_string among other things 10 11 /* Define the allocator */ 12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> { 13 public: 14 PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept 15 { 16 PetscFunctionBegin; 17 PetscCall(PetscArrayzero(dctx, 1)); 18 PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView)); 19 PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData()); 20 PetscCall(underlying().reset(dctx, false)); 21 PetscFunctionReturn(PETSC_SUCCESS); 22 } 23 24 static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept 25 { 26 PetscFunctionBegin; 27 PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren); 28 PetscTryTypeMethod(dctx, destroy); 29 PetscCall(PetscDeviceDestroy(&dctx->device)); 30 PetscCall(PetscFree(dctx->childIDs)); 31 delete CxxDataCast(dctx); 32 PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE)); 33 PetscFunctionReturn(PETSC_SUCCESS); 34 } 35 36 static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept 37 { 38 PetscFunctionBegin; 39 if (zero) { 40 // reset the device if the user set it 41 if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) { 42 PetscTryTypeMethod(dctx, destroy); 43 PetscCall(PetscDeviceDestroy(&dctx->device)); 44 PetscCall(PetscArrayzero(dctx->ops, 1)); 45 dctx->data = nullptr; 46 } 47 PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx))); 48 dctx->numChildren = 0; 49 dctx->setup = PETSC_FALSE; 50 // don't deallocate the child array, rather just zero it out 51 PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren)); 52 PetscCall(CxxDataCast(dctx)->clear()); 53 } 54 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 static PetscErrorCode invalidate_(PetscDeviceContext) noexcept { return PETSC_SUCCESS; } 59 }; 60 61 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool; 62 63 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 64 /*@C 65 PetscDeviceContextCreate - Creates a `PetscDeviceContext` 66 67 Not Collective 68 69 Output Parameter: 70 . dctx - The `PetscDeviceContext` 71 72 Level: beginner 73 74 Note: 75 Unlike almost every other PETSc class it is advised that most users use 76 `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of 77 different types are incompatible with one another; using `PetscDeviceContextDuplicate()` 78 ensures compatible types. 79 80 DAG representation: 81 .vb 82 time -> 83 84 |= CALL =| - dctx -> 85 .ve 86 87 .N ASYNC_API 88 89 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`, 90 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`, 91 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()` 92 @*/ 93 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 94 { 95 PetscFunctionBegin; 96 PetscValidPointer(dctx, 1); 97 PetscCall(PetscDeviceInitializePackage()); 98 PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 99 PetscCall(contextPool.allocate(dctx)); 100 PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 101 PetscFunctionReturn(PETSC_SUCCESS); 102 } 103 104 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 105 /*@C 106 PetscDeviceContextDestroy - Frees a `PetscDeviceContext` 107 108 Not Collective 109 110 Input Parameter: 111 . dctx - The `PetscDeviceContext` 112 113 Level: beginner 114 115 Notes: 116 No implicit synchronization occurs due to this routine, all resources are released completely 117 asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on 118 `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before 119 calling this routine. 120 121 DAG representation: 122 .vb 123 time -> 124 125 -> dctx - |= CALL =| 126 .ve 127 128 Developer Notes: 129 `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever 130 growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool, 131 this should perhaps be implemented. 132 133 .N ASYNC_API 134 135 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 136 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()` 137 @*/ 138 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 139 { 140 PetscFunctionBegin; 141 PetscValidPointer(dctx, 1); 142 if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS); 143 PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 144 if (--(PetscObjectCast(*dctx)->refct) <= 0) { 145 PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx)); 146 PetscCall(contextPool.deallocate(dctx)); 147 } 148 PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 149 *dctx = nullptr; 150 PetscFunctionReturn(PETSC_SUCCESS); 151 } 152 153 /*@C 154 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a 155 `PetscDeviceContext` 156 157 Not Collective 158 159 Input Parameters: 160 + dctx - The `PetscDeviceContext` 161 - type - The `PetscStreamType` 162 163 Level: beginner 164 165 Note: 166 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 167 types and their interactions. If the `PetscDeviceContext` was previously set up and stream 168 type was changed, you must call `PetscDeviceContextSetUp()` again after this routine. 169 170 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, 171 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()` 172 @*/ 173 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 174 { 175 PetscFunctionBegin; 176 // do not use getoptionalnullcontext here since we do not want the user to change the stream 177 // type 178 PetscValidDeviceContext(dctx, 1); 179 PetscValidStreamType(type, 2); 180 // only need to do complex swapping if the object has already been setup 181 if (dctx->setup && (dctx->streamType != type)) { 182 dctx->setup = PETSC_FALSE; 183 PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 184 PetscUseTypeMethod(dctx, changestreamtype, type); 185 PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 186 } 187 dctx->streamType = type; 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 /*@C 192 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a 193 `PetscDeviceContext` 194 195 Not Collective 196 197 Input Parameter: 198 . dctx - The `PetscDeviceContext` 199 200 Output Parameter: 201 . type - The `PetscStreamType` 202 203 Level: beginner 204 205 Note: 206 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 207 types and their interactions 208 209 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, 210 `PetscDeviceContextSetFromOptions()` 211 @*/ 212 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 213 { 214 PetscFunctionBegin; 215 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 216 PetscValidPointer(type, 2); 217 *type = dctx->streamType; 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 /* 222 Actual function to set the device. 223 224 1. Repeatedly destroying and recreating internal data structures (like streams and events) 225 for recycled PetscDeviceContexts is not free. If done often, it does add up. 226 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or 227 default contexts. The default contexts *never* change type, and the children are extremely 228 unlikely to (chances are if you fork once, you will fork again very soon). 229 3. The only time this calculus changes is if the user themselves sets the device type. In 230 this case we do not know what the user has changed, so must always wipe the slate clean. 231 232 Thus we need to keep track whether the user explicitly sets the device contexts device. 233 */ 234 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set) 235 { 236 PetscFunctionBegin; 237 // do not use getoptionalnullcontext here since we do not want the user to change its device 238 PetscValidDeviceContext(dctx, 1); 239 PetscValidDevice(device, 2); 240 if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS); 241 PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 242 PetscTryTypeMethod(dctx, destroy); 243 PetscCall(PetscDeviceDestroy(&dctx->device)); 244 PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops))); 245 PetscCall(PetscDeviceReference_Internal(device)); 246 // set it before calling the method 247 dctx->device = device; 248 PetscCall((*device->ops->createcontext)(dctx)); 249 PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 250 dctx->setup = PETSC_FALSE; 251 dctx->usersetdevice = user_set; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type) 256 { 257 PetscDevice device; 258 259 PetscFunctionBegin; 260 PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device)); 261 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE)); 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 /*@C 266 PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext` 267 268 Not Collective 269 270 Input Parameters: 271 + dctx - The `PetscDeviceContext` 272 - device - The `PetscDevice` 273 274 Level: intermediate 275 276 Notes: 277 This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must 278 also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly 279 necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will 280 always come equipped with the "default" device. 281 282 This routine is a no-op if `device` is already attached to `dctx`. 283 284 This routine may (but is very unlikely to) initialize the backend device and may incur 285 synchronization. 286 287 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`, 288 `PetscDeviceContextGetDeviceType()` 289 @*/ 290 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 291 { 292 PetscFunctionBegin; 293 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE)); 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 /*@C 298 PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext` 299 300 Not Collective 301 302 Input Parameter: 303 . dctx - the `PetscDeviceContext` 304 305 Output Parameter: 306 . device - The `PetscDevice` 307 308 Level: intermediate 309 310 Note: 311 This is a borrowed reference, the user should not destroy `device`. 312 313 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()` 314 @*/ 315 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 316 { 317 PetscFunctionBegin; 318 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 319 PetscValidPointer(device, 2); 320 PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id); 321 *device = dctx->device; 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@C 326 PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext` 327 328 Not Collective 329 330 Input Parameter: 331 . dctx - The `PetscDeviceContext` 332 333 Output Parameter: 334 . type - The `PetscDeviceType` 335 336 Level: beginner 337 338 Note: 339 This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` -> 340 `PetscDeviceGetType()`. 341 342 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice` 343 @*/ 344 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type) 345 { 346 PetscDevice device = nullptr; 347 348 PetscFunctionBegin; 349 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 350 PetscValidPointer(type, 2); 351 PetscCall(PetscDeviceContextGetDevice(dctx, &device)); 352 PetscCall(PetscDeviceGetType(device, type)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /*@C 357 PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use 358 359 Not Collective 360 361 Input Parameter: 362 . dctx - The `PetscDeviceContext` 363 364 Level: beginner 365 366 Developer Notes: 367 This routine is usually the stage where a `PetscDeviceContext` acquires device-side data 368 structures such as streams, events, and (possibly) handles. 369 370 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 371 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()` 372 @*/ 373 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 374 { 375 PetscFunctionBegin; 376 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 377 if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS); 378 if (!dctx->device) { 379 const auto default_dtype = PETSC_DEVICE_DEFAULT(); 380 381 PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype])); 382 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype)); 383 } 384 PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 385 PetscUseTypeMethod(dctx, setup); 386 PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 387 dctx->setup = PETSC_TRUE; 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup) 392 { 393 PetscFunctionBegin; 394 PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 395 PetscCall(PetscDeviceContextCreate(dctxdup)); 396 PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype)); 397 if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice)); 398 PetscCall(PetscDeviceContextSetUp(*dctxdup)); 399 PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 404 /*@C 405 PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object 406 407 Not Collective 408 409 Input Parameter: 410 . dctx - The `PetscDeviceContext` to duplicate 411 412 Output Parameter: 413 . dctxdup - The duplicated `PetscDeviceContext` 414 415 Level: beginner 416 417 Notes: 418 This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as 419 another. Note however that `dctxdup` does not share any of the underlying data with `dctx`, 420 (including its current stream-state) they are completely separate objects. 421 422 There is no implied ordering between `dctx` or `dctxdup`. 423 424 DAG representation: 425 .vb 426 time -> 427 428 -> dctx - |= CALL =| - dctx ----> 429 - dctxdup -> 430 .ve 431 432 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 433 `PetscDeviceContextSetStreamType()` 434 @*/ 435 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 436 { 437 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 438 439 PetscFunctionBegin; 440 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 441 PetscValidPointer(dctxdup, 2); 442 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 443 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup)); 444 PetscFunctionReturn(PETSC_SUCCESS); 445 } 446 447 /*@C 448 PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle 449 450 Not Collective 451 452 Input Parameter: 453 . dctx - The `PetscDeviceContext` 454 455 Output Parameter: 456 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work 457 458 Level: intermediate 459 460 Note: 461 This routine only refers a singular context and does NOT take any of its children into 462 account. That is, if `dctx` is idle but has dependents who do have work this routine still 463 returns `PETSC_TRUE`. 464 465 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()` 466 @*/ 467 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 468 { 469 PetscFunctionBegin; 470 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 471 PetscValidPointer(idle, 2); 472 PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 473 PetscUseTypeMethod(dctx, query, idle); 474 PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 475 PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not")); 476 PetscFunctionReturn(PETSC_SUCCESS); 477 } 478 479 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 480 /*@C 481 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 482 483 Not Collective 484 485 Input Parameters: 486 + dctxa - The `PetscDeviceContext` object that is waiting 487 - dctxb - The `PetscDeviceContext` object that is being waited on 488 489 Level: beginner 490 491 Notes: 492 Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host 493 does not wait for the serialization to actually occur. 494 495 This routine uses only the state of `dctxb` at the moment this routine was called, so any 496 future work queued will not affect `dctxa`. It is safe to pass the same context to both 497 arguments (in which case this routine does nothing). 498 499 DAG representation: 500 .vb 501 time -> 502 503 -> dctxa ---/- |= CALL =| - dctxa -> 504 / 505 -> dctxb -/------------------------> 506 .ve 507 508 .N ASYNC_API 509 510 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()` 511 @*/ 512 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 513 { 514 PetscObject aobj; 515 516 PetscFunctionBegin; 517 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa)); 518 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb)); 519 PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2); 520 if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS); 521 aobj = PetscObjectCast(dctxa); 522 PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 523 PetscUseTypeMethod(dctxa, waitforcontext, dctxb); 524 PetscCallCXX(CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb)); 525 PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 526 PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id)); 527 PetscCall(PetscObjectStateIncrease(aobj)); 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 532 /*@C 533 PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent 534 context with a prescribed `PetscStreamType` 535 536 Not Collective, Asynchronous 537 538 Input Parameters: 539 + dctx - The parent `PetscDeviceContext` 540 . stype - The prescribed `PetscStreamType` 541 - n - The number of children to create 542 543 Output Parameter: 544 . dsub - The created child context(s) 545 546 Level: intermediate 547 548 Notes: 549 This routine creates `n` edges of a DAG from a source node which are causally dependent on the 550 source node. This causal dependency is established as-if by calling 551 `PetscDeviceContextWaitForContext()` on every child. 552 553 `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx` 554 expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed. 555 556 This routine only accounts for work queued on `dctx` up until calling this routine, any 557 subsequent work enqueued on `dctx` has no effect on `dsub`. 558 559 The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case 560 that they are different. This is useful in cases where a routine can locally exploit stream 561 parallelism without needing to worry about what stream type the incoming `PetscDeviceContext` 562 carries. 563 564 DAG representation: 565 .vb 566 time -> 567 568 -> dctx - |= CALL =| -\----> dctx ------> 569 \---> dsub[0] ---> 570 \--> ... -------> 571 \-> dsub[n-1] -> 572 .ve 573 574 .N ASYNC_API 575 576 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, 577 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()` 578 @*/ 579 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub) 580 { 581 // debugging only 582 std::string idList; 583 auto ninput = n; 584 585 PetscFunctionBegin; 586 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 587 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n); 588 PetscValidPointer(dsub, 4); 589 *dsub = nullptr; 590 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 591 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 592 PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 593 /* update child totals */ 594 dctx->numChildren += n; 595 /* now to find out if we have room */ 596 if (dctx->numChildren > dctx->maxNumChildren) { 597 const auto numChildren = dctx->numChildren; 598 auto &maxNumChildren = dctx->maxNumChildren; 599 auto numAllocated = numChildren; 600 601 /* no room, either from having too many kids or not having any */ 602 if (auto &childIDs = dctx->childIDs) { 603 // the difference is backwards because we have not updated maxNumChildren yet 604 numAllocated -= maxNumChildren; 605 /* have existing children, must reallocate them */ 606 PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs)); 607 /* clear the extra memory since realloc doesn't do it for us */ 608 PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated)); 609 } else { 610 /* have no children */ 611 PetscCall(PetscCalloc1(numChildren, &childIDs)); 612 } 613 /* update total number of children */ 614 maxNumChildren = numChildren; 615 } 616 PetscCall(PetscMalloc1(n, dsub)); 617 for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) { 618 auto &childID = dctx->childIDs[i]; 619 /* empty child slot */ 620 if (!childID) { 621 auto &childctx = (*dsub)[i]; 622 623 /* create the child context in the image of its parent */ 624 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx)); 625 PetscCall(PetscDeviceContextWaitForContext(childctx, dctx)); 626 /* register the child with its parent */ 627 PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID)); 628 if (PetscDefined(USE_DEBUG_AND_INFO)) { 629 PetscCallCXX(idList += std::to_string(childID)); 630 if (ninput != 1) PetscCallCXX(idList += ", "); 631 } 632 --ninput; 633 } 634 } 635 PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 636 PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str())); 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 /*@C 641 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 642 643 Not Collective, Asynchronous 644 645 Input Parameters: 646 + dctx - The parent `PetscDeviceContext` 647 - n - The number of children to create 648 649 Output Parameter: 650 . dsub - The created child context(s) 651 652 Level: beginner 653 654 Notes: 655 Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed 656 `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\: 657 658 .vb 659 PetscStreamType stype; 660 661 PetscDeviceContextGetStreamType(dctx, &stype); 662 PetscDeviceContextForkWithStreamType(dctx, stype, ...); 663 .ve 664 665 .N ASYNC_API 666 667 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`, 668 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()` 669 @*/ 670 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 671 { 672 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 673 674 PetscFunctionBegin; 675 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 676 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 677 PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub)); 678 PetscFunctionReturn(PETSC_SUCCESS); 679 } 680 681 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 682 /*@C 683 PetscDeviceContextJoin - Converge a set of child contexts 684 685 Not Collective, Asynchronous 686 687 Input Parameters: 688 + dctx - A `PetscDeviceContext` to converge on 689 . n - The number of sub contexts to converge 690 . joinMode - The type of join to perform 691 - dsub - The sub contexts to converge 692 693 Level: beginner 694 695 Notes: 696 If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source 697 node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`) 698 which receives `n` edges (and optionally destroys them) which is dependent on the completion 699 of all incoming edges. 700 701 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be 702 destroyed by this routine. Thus all sub contexts must have been created with the `dctx` 703 passed to this routine. 704 705 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on 706 `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the 707 sync suffix does NOT refer to the host, i.e. this routine does NOT call 708 `PetscDeviceSynchronize()`. 709 710 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but 711 the sub contexts do not wait for one another or `dctx` afterwards. 712 713 DAG representations: 714 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` 715 .vb 716 time -> 717 718 -> dctx ---------/- |= CALL =| - dctx -> 719 -> dsub[0] -----/ 720 -> ... -------/ 721 -> dsub[n-1] -/ 722 .ve 723 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` 724 .vb 725 time -> 726 727 -> dctx ---------/- |= CALL =| -\----> dctx ------> 728 -> dsub[0] -----/ \---> dsub[0] ---> 729 -> ... -------/ \--> ... -------> 730 -> dsub[n-1] -/ \-> dsub[n-1] -> 731 .ve 732 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` 733 .vb 734 time -> 735 736 -> dctx ----------/- |= CALL =| - dctx -> 737 -> dsub[0] ------/-----------------------> 738 -> ... --------/------------------------> 739 -> dsub[n-1] --/-------------------------> 740 .ve 741 742 .N ASYNC_API 743 744 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`, 745 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode` 746 @*/ 747 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 748 { 749 // debugging only 750 std::string idList; 751 752 PetscFunctionBegin; 753 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 754 /* validity of dctx is checked in the wait-for loop */ 755 PetscValidPointer(dsub, 4); 756 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n); 757 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 758 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 759 /* first dctx waits on all the incoming edges */ 760 PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 761 for (PetscInt i = 0; i < n; ++i) { 762 PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4); 763 PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i])); 764 if (PetscDefined(USE_DEBUG_AND_INFO)) { 765 PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id)); 766 if (i + 1 < n) PetscCallCXX(idList += ", "); 767 } 768 } 769 770 /* now we handle the aftermath */ 771 switch (joinMode) { 772 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: { 773 const auto children = dctx->childIDs; 774 const auto maxchild = dctx->maxNumChildren; 775 auto &nchild = dctx->numChildren; 776 PetscInt j = 0; 777 778 PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild); 779 /* update child count while it's still fresh in memory */ 780 nchild -= n; 781 for (PetscInt i = 0; i < maxchild; ++i) { 782 if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) { 783 /* child is one of ours, can destroy it */ 784 PetscCall(PetscDeviceContextDestroy((*dsub) + j)); 785 /* reset the child slot */ 786 children[i] = 0; 787 if (++j == n) break; 788 } 789 } 790 /* gone through the loop but did not find every child */ 791 PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j); 792 PetscCall(PetscFree(*dsub)); 793 } break; 794 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 795 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx)); 796 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 797 break; 798 default: 799 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given"); 800 } 801 PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 802 803 PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str())); 804 PetscFunctionReturn(PETSC_SUCCESS); 805 } 806 807 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 808 /*@C 809 PetscDeviceContextSynchronize - Block the host until all work queued on a 810 `PetscDeviceContext` has finished 811 812 Not Collective 813 814 Input Parameter: 815 . dctx - The `PetscDeviceContext` to synchronize 816 817 Level: beginner 818 819 Notes: 820 The host will not return from this routine until `dctx` is idle. Any and all memory 821 operations queued on or otherwise associated with (either explicitly or implicitly via 822 dependencies) are guaranteed to have finished and be globally visible on return. 823 824 In effect, this routine serves as memory and execution barrier. 825 826 DAG representation: 827 .vb 828 time -> 829 830 -> dctx - |= CALL =| - dctx -> 831 .ve 832 833 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()` 834 @*/ 835 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 836 { 837 PetscFunctionBegin; 838 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 839 PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 840 /* if it isn't setup there is nothing to sync on */ 841 if (dctx->setup) { 842 PetscUseTypeMethod(dctx, synchronize); 843 PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx)); 844 } 845 PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 /* every device type has a vector of null PetscDeviceContexts -- one for each device */ 850 static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{}; 851 static auto nullContextsFinalizer = false; 852 853 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx) 854 { 855 PetscInt devid; 856 PetscDeviceType dtype; 857 858 PetscFunctionBegin; 859 PetscValidDevice(device, 2); 860 PetscValidPointer(dctx, 3); 861 if (PetscUnlikely(!nullContextsFinalizer)) { 862 const auto finalizer = [] { 863 PetscFunctionBegin; 864 for (auto &&dvec : nullContexts) { 865 for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx)); 866 PetscCallCXX(dvec.clear()); 867 } 868 nullContextsFinalizer = false; 869 PetscFunctionReturn(PETSC_SUCCESS); 870 }; 871 872 nullContextsFinalizer = true; 873 PetscCall(PetscRegisterFinalize(std::move(finalizer))); 874 } 875 PetscCall(PetscDeviceGetDeviceId(device, &devid)); 876 PetscCall(PetscDeviceGetType(device, &dtype)); 877 { 878 auto &ctxlist = nullContexts[dtype]; 879 880 PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid); 881 // need to resize the container if not big enough because incrementing the iterator in 882 // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the 883 // current size of the container. 884 if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1)); 885 if (PetscUnlikely(!ctxlist[devid])) { 886 // we have not seen this device before 887 PetscCall(PetscDeviceContextCreate(dctx)); 888 PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid)); 889 { 890 const auto pobj = PetscObjectCast(*dctx); 891 const auto name = "null context " + std::to_string(devid); 892 const auto prefix = "null_context_" + std::to_string(devid) + '_'; 893 894 PetscCall(PetscObjectSetName(pobj, name.c_str())); 895 PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str())); 896 } 897 PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING)); 898 PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device)); 899 PetscCall(PetscDeviceContextSetUp(*dctx)); 900 // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert! 901 PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx)); 902 } else *dctx = ctxlist[devid]; 903 } 904 PetscFunctionReturn(PETSC_SUCCESS); 905 } 906 907 /* 908 Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are 909 guaranteed to always be globally blocking. 910 */ 911 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx) 912 { 913 PetscDeviceContext gctx; 914 PetscDevice gdev = nullptr; 915 916 PetscFunctionBegin; 917 PetscValidPointer(dctx, 1); 918 PetscCall(PetscDeviceContextGetCurrentContext(&gctx)); 919 PetscCall(PetscDeviceContextGetDevice(gctx, &gdev)); 920 PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx)); 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 /*@C 925 PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database 926 927 Collective on `comm` or `dctx` 928 929 Input Parameters: 930 + comm - MPI communicator on which to query the options database (optional) 931 - dctx - The `PetscDeviceContext` to configure 932 933 Output Parameter: 934 . dctx - The `PetscDeviceContext` 935 936 Options Database Keys: 937 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` - 938 `PetscDeviceContextSetStreamType()` 939 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType` 940 941 Level: beginner 942 943 Note: 944 The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is 945 used (which is always `PETSC_COMM_SELF`). 946 947 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`, 948 `PetscDeviceContextView()` 949 @*/ 950 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx) 951 { 952 const auto pobj = PetscObjectCast(dctx); 953 auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE); 954 auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE); 955 MPI_Comm old_comm = PETSC_COMM_SELF; 956 957 PetscFunctionBegin; 958 // do not user getoptionalnullcontext here, the user is not allowed to set it from options! 959 PetscValidDeviceContext(dctx, 2); 960 /* set the device type first */ 961 if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first)); 962 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first)); 963 964 if (comm == MPI_COMM_NULL) { 965 PetscCall(PetscObjectGetComm(pobj, &comm)); 966 } else { 967 // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so 968 // PetscObjectOptionsBegin() behaves as if dctx had comm 969 old_comm = Petsc::util::exchange(pobj->comm, comm); 970 } 971 972 PetscObjectOptionsBegin(pobj); 973 PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype)); 974 PetscOptionsEnd(); 975 // reset the comm (should be PETSC_COMM_SELF) 976 if (comm != MPI_COMM_NULL) pobj->comm = old_comm; 977 if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first)); 978 if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first)); 979 PetscCall(PetscDeviceContextSetUp(dctx)); 980 PetscFunctionReturn(PETSC_SUCCESS); 981 } 982 983 /*@C 984 PetscDeviceContextView - View a `PetscDeviceContext` 985 986 Collective on `viewer` 987 988 Input Parameters: 989 + dctx - The `PetscDeviceContext` 990 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`) 991 992 Level: beginner 993 994 Note: 995 If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this 996 routine is collective on `PETSC_COMM_WORLD`. 997 998 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()` 999 @*/ 1000 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer) 1001 { 1002 PetscBool iascii; 1003 1004 PetscFunctionBegin; 1005 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1006 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer)); 1007 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1008 PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii)); 1009 if (iascii) { 1010 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 1011 PetscViewer sub; 1012 1013 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1014 PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub)); 1015 PetscCall(PetscViewerASCIIPushTab(sub)); 1016 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 1017 PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype])); 1018 PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren)); 1019 if (const auto nchild = dctx->numChildren) { 1020 PetscCall(PetscViewerASCIIPushTab(sub)); 1021 for (PetscInt i = 0; i < nchild; ++i) { 1022 if (i == nchild - 1) { 1023 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i])); 1024 } else { 1025 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i])); 1026 } 1027 } 1028 } 1029 PetscCall(PetscViewerASCIIPopTab(sub)); 1030 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1031 PetscCall(PetscViewerFlush(viewer)); 1032 PetscCall(PetscViewerASCIIPushTab(viewer)); 1033 } 1034 if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer)); 1035 if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } 1038 1039 /*@C 1040 PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options 1041 1042 Input Parameters: 1043 + dctx - The `PetscDeviceContext` to view 1044 . obj - Optional `PetscObject` to associate (may be `NULL`) 1045 - name - The command line option 1046 1047 Level: beginner 1048 1049 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()` 1050 @*/ 1051 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[]) 1052 { 1053 PetscFunctionBegin; 1054 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1055 if (obj) PetscValidHeader(obj, 2); 1056 PetscValidPointer(name, 3); 1057 PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name)); 1058 PetscFunctionReturn(PETSC_SUCCESS); 1059 } 1060 1061 /*@C 1062 PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context 1063 1064 Input Parameters: 1065 + dctx - The `PetscDeviceContext` to get the stream from 1066 - handle - A handle to the stream 1067 1068 Level: developer 1069 1070 Note: 1071 This routine is dangerous. It exists only for the most experienced users and 1072 internal PETSc developement. 1073 1074 There is no way for PETSc's auto-dependency system to track what the caller does with the 1075 stream. 1076 1077 If the user uses the stream to copy memory that was previously modified by PETSc, or launches 1078 kernels that modify memory with the stream, it is the users responsibility to inform PETSc of 1079 their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a 1080 race condition. This race condition may manifest in nondeterministic ways. 1081 1082 Alternatively, the user may synchronize the stream immediately before and after use. This is 1083 the safest option. 1084 1085 Example Usage: 1086 .vb 1087 PetscDeviceContext dctx; 1088 PetscDeviceType type; 1089 void *handle; 1090 1091 PetscDeviceContextGetCurrentContext(&dctx); 1092 PetscDeviceContextGetStreamHandle(dctx, &handle); 1093 PetscDeviceContextGetDeviceType(dctx, &type); 1094 1095 if (type == PETSC_DEVICE_CUDA) { 1096 cudsStream_t stream = *(cudaStream_t*)handle; 1097 1098 my_cuda_kernel<<<1, 2, 3, stream>>>(); 1099 } 1100 .ve 1101 1102 .N ASYNC_API 1103 1104 .seealso: `PetscDeviceContext` 1105 @*/ 1106 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle) 1107 { 1108 PetscFunctionBegin; 1109 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1110 PetscValidPointer(handle, 2); 1111 PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle)); 1112 PetscFunctionReturn(PETSC_SUCCESS); 1113 } 1114