1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/ 2 #include <petsc/private/viewerimpl.h> // _p_PetscViewer for PetscObjectCast() 3 4 #include <petsc/private/cpp/object_pool.hpp> 5 #include <petsc/private/cpp/utility.hpp> 6 #include <petsc/private/cpp/array.hpp> 7 8 #include <vector> 9 #include <string> // std::to_string among other things 10 11 /* Define the allocator */ 12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> { 13 public: 14 PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept 15 { 16 PetscFunctionBegin; 17 PetscCall(PetscArrayzero(dctx, 1)); 18 PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView)); 19 PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData()); 20 PetscCall(underlying().reset(dctx, false)); 21 PetscFunctionReturn(PETSC_SUCCESS); 22 } 23 24 static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept 25 { 26 PetscFunctionBegin; 27 PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren); 28 PetscTryTypeMethod(dctx, destroy); 29 PetscCall(PetscDeviceDestroy(&dctx->device)); 30 PetscCall(PetscFree(dctx->childIDs)); 31 delete CxxDataCast(dctx); 32 PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE)); 33 PetscFunctionReturn(PETSC_SUCCESS); 34 } 35 36 static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept 37 { 38 PetscFunctionBegin; 39 if (zero) { 40 // reset the device if the user set it 41 if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) { 42 PetscTryTypeMethod(dctx, destroy); 43 PetscCall(PetscDeviceDestroy(&dctx->device)); 44 PetscCall(PetscArrayzero(dctx->ops, 1)); 45 dctx->data = nullptr; 46 } 47 PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx))); 48 dctx->numChildren = 0; 49 dctx->setup = PETSC_FALSE; 50 // don't deallocate the child array, rather just zero it out 51 PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren)); 52 PetscCall(CxxDataCast(dctx)->clear()); 53 } 54 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 static PetscErrorCode invalidate_(PetscDeviceContext) noexcept { return PETSC_SUCCESS; } 59 }; 60 61 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool; 62 63 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 64 /*@C 65 PetscDeviceContextCreate - Creates a `PetscDeviceContext` 66 67 Not Collective 68 69 Output Parameter: 70 . dctx - The `PetscDeviceContext` 71 72 Level: beginner 73 74 Note: 75 Unlike almost every other PETSc class it is advised that most users use 76 `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of 77 different types are incompatible with one another; using `PetscDeviceContextDuplicate()` 78 ensures compatible types. 79 80 DAG representation: 81 .vb 82 time -> 83 84 |= CALL =| - dctx -> 85 .ve 86 87 .N ASYNC_API 88 89 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`, 90 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`, 91 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()` 92 @*/ 93 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 94 { 95 PetscFunctionBegin; 96 PetscAssertPointer(dctx, 1); 97 PetscCall(PetscDeviceInitializePackage()); 98 PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 99 PetscCall(contextPool.allocate(dctx)); 100 PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 101 PetscFunctionReturn(PETSC_SUCCESS); 102 } 103 104 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 105 /*@C 106 PetscDeviceContextDestroy - Frees a `PetscDeviceContext` 107 108 Not Collective 109 110 Input Parameter: 111 . dctx - The `PetscDeviceContext` 112 113 Level: beginner 114 115 Notes: 116 No implicit synchronization occurs due to this routine, all resources are released completely 117 asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on 118 `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before 119 calling this routine. 120 121 DAG representation: 122 .vb 123 time -> 124 125 -> dctx - |= CALL =| 126 .ve 127 128 Developer Notes: 129 `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever 130 growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool, 131 this should perhaps be implemented. 132 133 .N ASYNC_API 134 135 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 136 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()` 137 @*/ 138 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 139 { 140 PetscFunctionBegin; 141 PetscAssertPointer(dctx, 1); 142 if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS); 143 PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 144 if (--(PetscObjectCast(*dctx)->refct) <= 0) { 145 PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx)); 146 PetscCall(contextPool.deallocate(dctx)); 147 } 148 PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 149 *dctx = nullptr; 150 PetscFunctionReturn(PETSC_SUCCESS); 151 } 152 153 /*@C 154 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a 155 `PetscDeviceContext` 156 157 Not Collective 158 159 Input Parameters: 160 + dctx - The `PetscDeviceContext` 161 - type - The `PetscStreamType` 162 163 Level: beginner 164 165 Note: 166 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 167 types and their interactions. If the `PetscDeviceContext` was previously set up and stream 168 type was changed, you must call `PetscDeviceContextSetUp()` again after this routine. 169 170 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, 171 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()` 172 @*/ 173 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 174 { 175 PetscFunctionBegin; 176 // do not use getoptionalnullcontext here since we do not want the user to change the stream 177 // type 178 PetscValidDeviceContext(dctx, 1); 179 PetscValidStreamType(type, 2); 180 // only need to do complex swapping if the object has already been setup 181 if (dctx->setup && (dctx->streamType != type)) { 182 dctx->setup = PETSC_FALSE; 183 PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 184 PetscUseTypeMethod(dctx, changestreamtype, type); 185 PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 186 } 187 dctx->streamType = type; 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 /*@C 192 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a 193 `PetscDeviceContext` 194 195 Not Collective 196 197 Input Parameter: 198 . dctx - The `PetscDeviceContext` 199 200 Output Parameter: 201 . type - The `PetscStreamType` 202 203 Level: beginner 204 205 Note: 206 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 207 types and their interactions 208 209 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, 210 `PetscDeviceContextSetFromOptions()` 211 @*/ 212 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 213 { 214 PetscFunctionBegin; 215 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 216 PetscAssertPointer(type, 2); 217 *type = dctx->streamType; 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 /* 222 Actual function to set the device. 223 224 1. Repeatedly destroying and recreating internal data structures (like streams and events) 225 for recycled PetscDeviceContexts is not free. If done often, it does add up. 226 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or 227 default contexts. The default contexts *never* change type, and the children are extremely 228 unlikely to (chances are if you fork once, you will fork again very soon). 229 3. The only time this calculus changes is if the user themselves sets the device type. In 230 this case we do not know what the user has changed, so must always wipe the slate clean. 231 232 Thus we need to keep track whether the user explicitly sets the device contexts device. 233 */ 234 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set) 235 { 236 PetscFunctionBegin; 237 // do not use getoptionalnullcontext here since we do not want the user to change its device 238 PetscValidDeviceContext(dctx, 1); 239 PetscValidDevice(device, 2); 240 if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS); 241 PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 242 PetscTryTypeMethod(dctx, destroy); 243 PetscCall(PetscDeviceDestroy(&dctx->device)); 244 PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops))); 245 PetscCall(PetscDeviceReference_Internal(device)); 246 // set it before calling the method 247 dctx->device = device; 248 PetscCall((*device->ops->createcontext)(dctx)); 249 PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 250 dctx->setup = PETSC_FALSE; 251 dctx->usersetdevice = user_set; 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type) 256 { 257 PetscDevice device; 258 259 PetscFunctionBegin; 260 PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device)); 261 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE)); 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 /*@C 266 PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext` 267 268 Not Collective 269 270 Input Parameters: 271 + dctx - The `PetscDeviceContext` 272 - device - The `PetscDevice` 273 274 Level: intermediate 275 276 Notes: 277 This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must 278 also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly 279 necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will 280 always come equipped with the "default" device. 281 282 This routine is a no-op if `device` is already attached to `dctx`. 283 284 This routine may (but is very unlikely to) initialize the backend device and may incur 285 synchronization. 286 287 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`, 288 `PetscDeviceContextGetDeviceType()` 289 @*/ 290 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 291 { 292 PetscFunctionBegin; 293 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE)); 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 /*@C 298 PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext` 299 300 Not Collective 301 302 Input Parameter: 303 . dctx - the `PetscDeviceContext` 304 305 Output Parameter: 306 . device - The `PetscDevice` 307 308 Level: intermediate 309 310 Note: 311 This is a borrowed reference, the user should not destroy `device`. 312 313 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()` 314 @*/ 315 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 316 { 317 PetscFunctionBegin; 318 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 319 PetscAssertPointer(device, 2); 320 PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id); 321 *device = dctx->device; 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@C 326 PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext` 327 328 Not Collective 329 330 Input Parameter: 331 . dctx - The `PetscDeviceContext` 332 333 Output Parameter: 334 . type - The `PetscDeviceType` 335 336 Level: beginner 337 338 Note: 339 This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` -> 340 `PetscDeviceGetType()`. 341 342 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice` 343 @*/ 344 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type) 345 { 346 PetscDevice device = nullptr; 347 348 PetscFunctionBegin; 349 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 350 PetscAssertPointer(type, 2); 351 PetscCall(PetscDeviceContextGetDevice(dctx, &device)); 352 PetscCall(PetscDeviceGetType(device, type)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /*@C 357 PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use 358 359 Not Collective 360 361 Input Parameter: 362 . dctx - The `PetscDeviceContext` 363 364 Level: beginner 365 366 Developer Notes: 367 This routine is usually the stage where a `PetscDeviceContext` acquires device-side data 368 structures such as streams, events, and (possibly) handles. 369 370 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 371 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()` 372 @*/ 373 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 374 { 375 PetscFunctionBegin; 376 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 377 if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS); 378 if (!dctx->device) { 379 const auto default_dtype = PETSC_DEVICE_DEFAULT(); 380 381 PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype])); 382 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype)); 383 } 384 PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 385 PetscUseTypeMethod(dctx, setup); 386 PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 387 dctx->setup = PETSC_TRUE; 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup) 392 { 393 PetscFunctionBegin; 394 PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 395 PetscCall(PetscDeviceContextCreate(dctxdup)); 396 PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype)); 397 if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice)); 398 PetscCall(PetscDeviceContextSetUp(*dctxdup)); 399 PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 404 /*@C 405 PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object 406 407 Not Collective 408 409 Input Parameter: 410 . dctx - The `PetscDeviceContext` to duplicate 411 412 Output Parameter: 413 . dctxdup - The duplicated `PetscDeviceContext` 414 415 Level: beginner 416 417 Notes: 418 This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as 419 another. Note however that `dctxdup` does not share any of the underlying data with `dctx`, 420 (including its current stream-state) they are completely separate objects. 421 422 There is no implied ordering between `dctx` or `dctxdup`. 423 424 DAG representation: 425 .vb 426 time -> 427 428 -> dctx - |= CALL =| - dctx ----> 429 - dctxdup -> 430 .ve 431 432 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 433 `PetscDeviceContextSetStreamType()` 434 @*/ 435 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 436 { 437 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 438 439 PetscFunctionBegin; 440 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 441 PetscAssertPointer(dctxdup, 2); 442 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 443 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup)); 444 PetscFunctionReturn(PETSC_SUCCESS); 445 } 446 447 /*@C 448 PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle 449 450 Not Collective 451 452 Input Parameter: 453 . dctx - The `PetscDeviceContext` 454 455 Output Parameter: 456 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work 457 458 Level: intermediate 459 460 Note: 461 This routine only refers a singular context and does NOT take any of its children into 462 account. That is, if `dctx` is idle but has dependents who do have work this routine still 463 returns `PETSC_TRUE`. 464 465 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()` 466 @*/ 467 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 468 { 469 PetscFunctionBegin; 470 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 471 PetscAssertPointer(idle, 2); 472 PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 473 PetscUseTypeMethod(dctx, query, idle); 474 PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 475 PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not")); 476 PetscFunctionReturn(PETSC_SUCCESS); 477 } 478 479 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 480 /*@C 481 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 482 483 Not Collective 484 485 Input Parameters: 486 + dctxa - The `PetscDeviceContext` object that is waiting 487 - dctxb - The `PetscDeviceContext` object that is being waited on 488 489 Level: beginner 490 491 Notes: 492 Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host 493 does not wait for the serialization to actually occur. 494 495 This routine uses only the state of `dctxb` at the moment this routine was called, so any 496 future work queued will not affect `dctxa`. It is safe to pass the same context to both 497 arguments (in which case this routine does nothing). 498 499 DAG representation: 500 .vb 501 time -> 502 503 -> dctxa ---/- |= CALL =| - dctxa -> 504 / 505 -> dctxb -/------------------------> 506 .ve 507 508 .N ASYNC_API 509 510 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()` 511 @*/ 512 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 513 { 514 PetscObject aobj; 515 516 PetscFunctionBegin; 517 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa)); 518 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb)); 519 PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2); 520 if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS); 521 aobj = PetscObjectCast(dctxa); 522 PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 523 PetscUseTypeMethod(dctxa, waitforcontext, dctxb); 524 PetscCallCXX(CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb)); 525 PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 526 PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id)); 527 PetscCall(PetscObjectStateIncrease(aobj)); 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 532 /*@C 533 PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent 534 context with a prescribed `PetscStreamType` 535 536 Not Collective, Asynchronous 537 538 Input Parameters: 539 + dctx - The parent `PetscDeviceContext` 540 . stype - The prescribed `PetscStreamType` 541 - n - The number of children to create 542 543 Output Parameter: 544 . dsub - The created child context(s) 545 546 Level: intermediate 547 548 Notes: 549 This routine creates `n` edges of a DAG from a source node which are causally dependent on the 550 source node. This causal dependency is established as-if by calling 551 `PetscDeviceContextWaitForContext()` on every child. 552 553 `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx` 554 expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed. 555 556 This routine only accounts for work queued on `dctx` up until calling this routine, any 557 subsequent work enqueued on `dctx` has no effect on `dsub`. 558 559 The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case 560 that they are different. This is useful in cases where a routine can locally exploit stream 561 parallelism without needing to worry about what stream type the incoming `PetscDeviceContext` 562 carries. 563 564 DAG representation: 565 .vb 566 time -> 567 568 -> dctx - |= CALL =| -\----> dctx ------> 569 \---> dsub[0] ---> 570 \--> ... -------> 571 \-> dsub[n-1] -> 572 .ve 573 574 .N ASYNC_API 575 576 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, 577 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()` 578 @*/ 579 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub) 580 { 581 // debugging only 582 std::string idList; 583 auto ninput = n; 584 585 PetscFunctionBegin; 586 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 587 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n); 588 PetscAssertPointer(dsub, 4); 589 *dsub = nullptr; 590 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 591 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 592 PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 593 /* update child totals */ 594 dctx->numChildren += n; 595 /* now to find out if we have room */ 596 if (dctx->numChildren > dctx->maxNumChildren) { 597 const auto numChildren = dctx->numChildren; 598 auto &maxNumChildren = dctx->maxNumChildren; 599 auto numAllocated = numChildren; 600 601 /* no room, either from having too many kids or not having any */ 602 if (auto &childIDs = dctx->childIDs) { 603 // the difference is backwards because we have not updated maxNumChildren yet 604 numAllocated -= maxNumChildren; 605 /* have existing children, must reallocate them */ 606 PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs)); 607 /* clear the extra memory since realloc doesn't do it for us */ 608 PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated)); 609 } else { 610 /* have no children */ 611 PetscCall(PetscCalloc1(numChildren, &childIDs)); 612 } 613 /* update total number of children */ 614 maxNumChildren = numChildren; 615 } 616 PetscCall(PetscMalloc1(n, dsub)); 617 for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) { 618 auto &childID = dctx->childIDs[i]; 619 /* empty child slot */ 620 if (!childID) { 621 auto &childctx = (*dsub)[i]; 622 623 /* create the child context in the image of its parent */ 624 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx)); 625 PetscCall(PetscDeviceContextWaitForContext(childctx, dctx)); 626 /* register the child with its parent */ 627 PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID)); 628 if (PetscDefined(USE_DEBUG_AND_INFO)) { 629 PetscCallCXX(idList += std::to_string(childID)); 630 if (ninput != 1) PetscCallCXX(idList += ", "); 631 } 632 --ninput; 633 } 634 } 635 PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 636 PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str())); 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 /*@C 641 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 642 643 Not Collective, Asynchronous 644 645 Input Parameters: 646 + dctx - The parent `PetscDeviceContext` 647 - n - The number of children to create 648 649 Output Parameter: 650 . dsub - The created child context(s) 651 652 Level: beginner 653 654 Notes: 655 Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed 656 `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\: 657 658 .vb 659 PetscStreamType stype; 660 661 PetscDeviceContextGetStreamType(dctx, &stype); 662 PetscDeviceContextForkWithStreamType(dctx, stype, ...); 663 .ve 664 665 .N ASYNC_API 666 667 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`, 668 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()` 669 @*/ 670 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 671 { 672 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 673 674 PetscFunctionBegin; 675 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 676 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 677 PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub)); 678 PetscFunctionReturn(PETSC_SUCCESS); 679 } 680 681 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 682 /*@C 683 PetscDeviceContextJoin - Converge a set of child contexts 684 685 Not Collective, Asynchronous 686 687 Input Parameters: 688 + dctx - A `PetscDeviceContext` to converge on 689 . n - The number of sub contexts to converge 690 . joinMode - The type of join to perform 691 - dsub - The sub contexts to converge 692 693 Level: beginner 694 695 Notes: 696 If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source 697 node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`) 698 which receives `n` edges (and optionally destroys them) which is dependent on the completion 699 of all incoming edges. 700 701 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be 702 destroyed by this routine. Thus all sub contexts must have been created with the `dctx` 703 passed to this routine. 704 705 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on 706 `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the 707 sync suffix does NOT refer to the host, i.e. this routine does NOT call 708 `PetscDeviceSynchronize()`. 709 710 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but 711 the sub contexts do not wait for one another or `dctx` afterwards. 712 713 DAG representations: 714 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` 715 .vb 716 time -> 717 718 -> dctx ---------/- |= CALL =| - dctx -> 719 -> dsub[0] -----/ 720 -> ... -------/ 721 -> dsub[n-1] -/ 722 .ve 723 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` 724 .vb 725 time -> 726 727 -> dctx ---------/- |= CALL =| -\----> dctx ------> 728 -> dsub[0] -----/ \---> dsub[0] ---> 729 -> ... -------/ \--> ... -------> 730 -> dsub[n-1] -/ \-> dsub[n-1] -> 731 .ve 732 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` 733 .vb 734 time -> 735 736 -> dctx ----------/- |= CALL =| - dctx -> 737 -> dsub[0] ------/-----------------------> 738 -> ... --------/------------------------> 739 -> dsub[n-1] --/-------------------------> 740 .ve 741 742 .N ASYNC_API 743 744 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`, 745 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode` 746 @*/ 747 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 748 { 749 // debugging only 750 std::string idList; 751 752 PetscFunctionBegin; 753 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 754 /* validity of dctx is checked in the wait-for loop */ 755 PetscAssertPointer(dsub, 4); 756 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n); 757 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 758 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 759 /* first dctx waits on all the incoming edges */ 760 PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 761 for (PetscInt i = 0; i < n; ++i) { 762 PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4); 763 PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i])); 764 if (PetscDefined(USE_DEBUG_AND_INFO)) { 765 PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id)); 766 if (i + 1 < n) PetscCallCXX(idList += ", "); 767 } 768 } 769 770 /* now we handle the aftermath */ 771 switch (joinMode) { 772 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: { 773 const auto children = dctx->childIDs; 774 const auto maxchild = dctx->maxNumChildren; 775 auto &nchild = dctx->numChildren; 776 PetscInt j = 0; 777 778 PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild); 779 /* update child count while it's still fresh in memory */ 780 nchild -= n; 781 for (PetscInt i = 0; i < maxchild; ++i) { 782 if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) { 783 /* child is one of ours, can destroy it */ 784 PetscCall(PetscDeviceContextDestroy((*dsub) + j)); 785 /* reset the child slot */ 786 children[i] = 0; 787 if (++j == n) break; 788 } 789 } 790 /* gone through the loop but did not find every child */ 791 PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j); 792 PetscCall(PetscFree(*dsub)); 793 } break; 794 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 795 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx)); 796 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 797 break; 798 default: 799 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given"); 800 } 801 PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 802 803 PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str())); 804 PetscFunctionReturn(PETSC_SUCCESS); 805 } 806 807 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 808 /*@C 809 PetscDeviceContextSynchronize - Block the host until all work queued on a 810 `PetscDeviceContext` has finished 811 812 Not Collective 813 814 Input Parameter: 815 . dctx - The `PetscDeviceContext` to synchronize 816 817 Level: beginner 818 819 Notes: 820 The host will not return from this routine until `dctx` is idle. Any and all memory 821 operations queued on or otherwise associated with (either explicitly or implicitly via 822 dependencies) are guaranteed to have finished and be globally visible on return. 823 824 In effect, this routine serves as memory and execution barrier. 825 826 DAG representation: 827 .vb 828 time -> 829 830 -> dctx - |= CALL =| - dctx -> 831 .ve 832 833 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()` 834 @*/ 835 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 836 { 837 PetscFunctionBegin; 838 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 839 PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 840 /* if it isn't setup there is nothing to sync on */ 841 if (dctx->setup) { 842 PetscUseTypeMethod(dctx, synchronize); 843 PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx)); 844 } 845 PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 /* every device type has a vector of null PetscDeviceContexts -- one for each device */ 850 static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{}; 851 static auto nullContextsFinalizer = false; 852 853 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx) 854 { 855 PetscInt devid; 856 PetscDeviceType dtype; 857 858 PetscFunctionBegin; 859 PetscValidDevice(device, 2); 860 PetscAssertPointer(dctx, 3); 861 if (PetscUnlikely(!nullContextsFinalizer)) { 862 nullContextsFinalizer = true; 863 PetscCall(PetscRegisterFinalize([] { 864 PetscFunctionBegin; 865 for (auto &&dvec : nullContexts) { 866 for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx)); 867 PetscCallCXX(dvec.clear()); 868 } 869 nullContextsFinalizer = false; 870 PetscFunctionReturn(PETSC_SUCCESS); 871 })); 872 } 873 PetscCall(PetscDeviceGetDeviceId(device, &devid)); 874 PetscCall(PetscDeviceGetType(device, &dtype)); 875 { 876 auto &ctxlist = nullContexts[dtype]; 877 878 PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid); 879 // need to resize the container if not big enough because incrementing the iterator in 880 // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the 881 // current size of the container. 882 if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1)); 883 if (PetscUnlikely(!ctxlist[devid])) { 884 // we have not seen this device before 885 PetscCall(PetscDeviceContextCreate(dctx)); 886 PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid)); 887 { 888 const auto pobj = PetscObjectCast(*dctx); 889 const auto name = "null context " + std::to_string(devid); 890 const auto prefix = "null_context_" + std::to_string(devid) + '_'; 891 892 PetscCall(PetscObjectSetName(pobj, name.c_str())); 893 PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str())); 894 } 895 PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING)); 896 PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device)); 897 PetscCall(PetscDeviceContextSetUp(*dctx)); 898 // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert! 899 PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx)); 900 } else *dctx = ctxlist[devid]; 901 } 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 /* 906 Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are 907 guaranteed to always be globally blocking. 908 */ 909 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx) 910 { 911 PetscDeviceContext gctx; 912 PetscDevice gdev = nullptr; 913 914 PetscFunctionBegin; 915 PetscAssertPointer(dctx, 1); 916 PetscCall(PetscDeviceContextGetCurrentContext(&gctx)); 917 PetscCall(PetscDeviceContextGetDevice(gctx, &gdev)); 918 PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx)); 919 PetscFunctionReturn(PETSC_SUCCESS); 920 } 921 922 /*@C 923 PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database 924 925 Collective on `comm` or `dctx` 926 927 Input Parameters: 928 + comm - MPI communicator on which to query the options database (optional) 929 - dctx - The `PetscDeviceContext` to configure 930 931 Output Parameter: 932 . dctx - The `PetscDeviceContext` 933 934 Options Database Keys: 935 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` - 936 `PetscDeviceContextSetStreamType()` 937 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType` 938 939 Level: beginner 940 941 Note: 942 The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is 943 used (which is always `PETSC_COMM_SELF`). 944 945 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`, 946 `PetscDeviceContextView()` 947 @*/ 948 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx) 949 { 950 const auto pobj = PetscObjectCast(dctx); 951 auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE); 952 auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE); 953 MPI_Comm old_comm = PETSC_COMM_SELF; 954 955 PetscFunctionBegin; 956 // do not user getoptionalnullcontext here, the user is not allowed to set it from options! 957 PetscValidDeviceContext(dctx, 2); 958 /* set the device type first */ 959 if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first)); 960 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first)); 961 962 if (comm == MPI_COMM_NULL) { 963 PetscCall(PetscObjectGetComm(pobj, &comm)); 964 } else { 965 // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so 966 // PetscObjectOptionsBegin() behaves as if dctx had comm 967 old_comm = Petsc::util::exchange(pobj->comm, comm); 968 } 969 970 PetscObjectOptionsBegin(pobj); 971 PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype)); 972 PetscOptionsEnd(); 973 // reset the comm (should be PETSC_COMM_SELF) 974 if (comm != MPI_COMM_NULL) pobj->comm = old_comm; 975 if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first)); 976 if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first)); 977 PetscCall(PetscDeviceContextSetUp(dctx)); 978 PetscFunctionReturn(PETSC_SUCCESS); 979 } 980 981 /*@C 982 PetscDeviceContextView - View a `PetscDeviceContext` 983 984 Collective on `viewer` 985 986 Input Parameters: 987 + dctx - The `PetscDeviceContext` 988 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`) 989 990 Level: beginner 991 992 Note: 993 If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this 994 routine is collective on `PETSC_COMM_WORLD`. 995 996 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()` 997 @*/ 998 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer) 999 { 1000 PetscBool iascii; 1001 1002 PetscFunctionBegin; 1003 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1004 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer)); 1005 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1006 PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii)); 1007 if (iascii) { 1008 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 1009 PetscViewer sub; 1010 1011 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1012 PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub)); 1013 PetscCall(PetscViewerASCIIPushTab(sub)); 1014 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 1015 PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype])); 1016 PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren)); 1017 if (const auto nchild = dctx->numChildren) { 1018 PetscCall(PetscViewerASCIIPushTab(sub)); 1019 for (PetscInt i = 0; i < nchild; ++i) { 1020 if (i == nchild - 1) { 1021 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i])); 1022 } else { 1023 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i])); 1024 } 1025 } 1026 } 1027 PetscCall(PetscViewerASCIIPopTab(sub)); 1028 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1029 PetscCall(PetscViewerFlush(viewer)); 1030 PetscCall(PetscViewerASCIIPushTab(viewer)); 1031 } 1032 if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer)); 1033 if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer)); 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 /*@C 1038 PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options 1039 1040 Input Parameters: 1041 + dctx - The `PetscDeviceContext` to view 1042 . obj - Optional `PetscObject` to associate (may be `NULL`) 1043 - name - The command line option 1044 1045 Level: beginner 1046 1047 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()` 1048 @*/ 1049 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[]) 1050 { 1051 PetscFunctionBegin; 1052 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1053 if (obj) PetscValidHeader(obj, 2); 1054 PetscAssertPointer(name, 3); 1055 PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name)); 1056 PetscFunctionReturn(PETSC_SUCCESS); 1057 } 1058 1059 /*@C 1060 PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context 1061 1062 Input Parameters: 1063 + dctx - The `PetscDeviceContext` to get the stream from 1064 - handle - A handle to the stream 1065 1066 Level: developer 1067 1068 Note: 1069 This routine is dangerous. It exists only for the most experienced users and 1070 internal PETSc developement. 1071 1072 There is no way for PETSc's auto-dependency system to track what the caller does with the 1073 stream. 1074 1075 If the user uses the stream to copy memory that was previously modified by PETSc, or launches 1076 kernels that modify memory with the stream, it is the users responsibility to inform PETSc of 1077 their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a 1078 race condition. This race condition may manifest in nondeterministic ways. 1079 1080 Alternatively, the user may synchronize the stream immediately before and after use. This is 1081 the safest option. 1082 1083 Example Usage: 1084 .vb 1085 PetscDeviceContext dctx; 1086 PetscDeviceType type; 1087 void *handle; 1088 1089 PetscDeviceContextGetCurrentContext(&dctx); 1090 PetscDeviceContextGetStreamHandle(dctx, &handle); 1091 PetscDeviceContextGetDeviceType(dctx, &type); 1092 1093 if (type == PETSC_DEVICE_CUDA) { 1094 cudsStream_t stream = *(cudaStream_t*)handle; 1095 1096 my_cuda_kernel<<<1, 2, 3, stream>>>(); 1097 } 1098 .ve 1099 1100 .N ASYNC_API 1101 1102 .seealso: `PetscDeviceContext` 1103 @*/ 1104 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle) 1105 { 1106 PetscFunctionBegin; 1107 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1108 PetscAssertPointer(handle, 2); 1109 PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle)); 1110 PetscFunctionReturn(PETSC_SUCCESS); 1111 } 1112