1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/ 2 #include <petsc/private/viewerimpl.h> // _p_PetscViewer for PetscObjectCast() 3 4 #include <petsc/private/cpp/object_pool.hpp> 5 #include <petsc/private/cpp/utility.hpp> 6 #include <petsc/private/cpp/array.hpp> 7 8 #include <vector> 9 #include <string> // std::to_string among other things 10 11 /* Define the allocator */ 12 class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> { 13 public: 14 PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept 15 { 16 PetscFunctionBegin; 17 PetscCall(PetscArrayzero(dctx, 1)); 18 PetscCall(PetscHeaderInitialize_Private(dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, PetscDeviceContextDestroy, PetscDeviceContextView)); 19 PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData()); 20 PetscCall(underlying().reset(dctx, false)); 21 PetscFunctionReturn(PETSC_SUCCESS); 22 } 23 24 static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept 25 { 26 PetscFunctionBegin; 27 PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren); 28 PetscTryTypeMethod(dctx, destroy); 29 PetscCall(PetscDeviceDestroy(&dctx->device)); 30 PetscCall(PetscFree(dctx->childIDs)); 31 delete CxxDataCast(dctx); 32 PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE)); 33 PetscFunctionReturn(PETSC_SUCCESS); 34 } 35 36 static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept 37 { 38 PetscFunctionBegin; 39 if (zero) { 40 // reset the device if the user set it 41 if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) { 42 PetscTryTypeMethod(dctx, destroy); 43 PetscCall(PetscDeviceDestroy(&dctx->device)); 44 PetscCall(PetscArrayzero(dctx->ops, 1)); 45 dctx->data = nullptr; 46 } 47 PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx))); 48 dctx->numChildren = 0; 49 dctx->setup = PETSC_FALSE; 50 // don't deallocate the child array, rather just zero it out 51 PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren)); 52 PetscCall(CxxDataCast(dctx)->clear()); 53 } 54 dctx->streamType = PETSC_STREAM_DEFAULT_BLOCKING; 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 static PetscErrorCode invalidate_(PetscDeviceContext) noexcept { return PETSC_SUCCESS; } 59 }; 60 61 static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool; 62 63 /*@C 64 PetscDeviceContextCreate - Creates a `PetscDeviceContext` 65 66 Not Collective 67 68 Output Parameter: 69 . dctx - The `PetscDeviceContext` 70 71 Level: beginner 72 73 Note: 74 Unlike almost every other PETSc class it is advised that most users use 75 `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of 76 different types are incompatible with one another; using `PetscDeviceContextDuplicate()` 77 ensures compatible types. 78 79 DAG representation: 80 .vb 81 time -> 82 83 |= CALL =| - dctx -> 84 .ve 85 86 .N ASYNC_API 87 88 .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`, 89 `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`, 90 `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()` 91 @*/ 92 PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) 93 { 94 PetscFunctionBegin; 95 PetscValidPointer(dctx, 1); 96 PetscCall(PetscDeviceInitializePackage()); 97 PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 98 PetscCall(contextPool.allocate(dctx)); 99 PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); 100 PetscFunctionReturn(PETSC_SUCCESS); 101 } 102 103 /*@C 104 PetscDeviceContextDestroy - Frees a `PetscDeviceContext` 105 106 Not Collective 107 108 Input Parameter: 109 . dctx - The `PetscDeviceContext` 110 111 Level: beginner 112 113 Notes: 114 No implicit synchronization occurs due to this routine, all resources are released completely 115 asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on 116 `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before 117 calling this routine. 118 119 DAG representation: 120 .vb 121 time -> 122 123 -> dctx - |= CALL =| 124 .ve 125 126 Developer Notes: 127 `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever 128 growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool, 129 this should perhaps be implemented. 130 131 .N ASYNC_API 132 133 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 134 `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()` 135 @*/ 136 PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) 137 { 138 PetscFunctionBegin; 139 PetscValidPointer(dctx, 1); 140 if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS); 141 PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 142 if (--(PetscObjectCast(*dctx)->refct) <= 0) { 143 PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx)); 144 PetscCall(contextPool.deallocate(dctx)); 145 } 146 PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); 147 *dctx = nullptr; 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 /*@C 152 PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a 153 `PetscDeviceContext` 154 155 Not Collective 156 157 Input Parameters: 158 + dctx - The `PetscDeviceContext` 159 - type - The `PetscStreamType` 160 161 Level: beginner 162 163 Note: 164 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 165 types and their interactions. If the `PetscDeviceContext` was previously set up and stream 166 type was changed, you must call `PetscDeviceContextSetUp()` again after this routine. 167 168 .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, 169 `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()` 170 @*/ 171 PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) 172 { 173 PetscFunctionBegin; 174 // do not use getoptionalnullcontext here since we do not want the user to change the stream 175 // type 176 PetscValidDeviceContext(dctx, 1); 177 PetscValidStreamType(type, 2); 178 // only need to do complex swapping if the object has already been setup 179 if (dctx->setup && (dctx->streamType != type)) { 180 dctx->setup = PETSC_FALSE; 181 PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 182 PetscUseTypeMethod(dctx, changestreamtype, type); 183 PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); 184 } 185 dctx->streamType = type; 186 PetscFunctionReturn(PETSC_SUCCESS); 187 } 188 189 /*@C 190 PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a 191 `PetscDeviceContext` 192 193 Not Collective 194 195 Input Parameter: 196 . dctx - The `PetscDeviceContext` 197 198 Output Parameter: 199 . type - The `PetscStreamType` 200 201 Level: beginner 202 203 Note: 204 See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available 205 types and their interactions 206 207 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, 208 `PetscDeviceContextSetFromOptions()` 209 @*/ 210 PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) 211 { 212 PetscFunctionBegin; 213 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 214 PetscValidIntPointer(type, 2); 215 *type = dctx->streamType; 216 PetscFunctionReturn(PETSC_SUCCESS); 217 } 218 219 /* 220 Actual function to set the device. 221 222 1. Repeatedly destroying and recreating internal data structures (like streams and events) 223 for recycled PetscDeviceContexts is not free. If done often, it does add up. 224 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or 225 default contexts. The default contexts *never* change type, and the children are extremely 226 unlikely to (chances are if you fork once, you will fork again very soon). 227 3. The only time this calculus changes is if the user themselves sets the device type. In 228 this case we do not know what the user has changed, so must always wipe the slate clean. 229 230 Thus we need to keep track whether the user explicitly sets the device contexts device. 231 */ 232 static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set) 233 { 234 PetscFunctionBegin; 235 // do not use getoptionalnullcontext here since we do not want the user to change its device 236 PetscValidDeviceContext(dctx, 1); 237 PetscValidDevice(device, 2); 238 if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS); 239 PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 240 PetscTryTypeMethod(dctx, destroy); 241 PetscCall(PetscDeviceDestroy(&dctx->device)); 242 PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops))); 243 PetscCall(PetscDeviceReference_Internal(device)); 244 // set it before calling the method 245 dctx->device = device; 246 PetscCall((*device->ops->createcontext)(dctx)); 247 PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); 248 dctx->setup = PETSC_FALSE; 249 dctx->usersetdevice = user_set; 250 PetscFunctionReturn(PETSC_SUCCESS); 251 } 252 253 PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type) 254 { 255 PetscDevice device; 256 257 PetscFunctionBegin; 258 PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device)); 259 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE)); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 /*@C 264 PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext` 265 266 Not Collective 267 268 Input Parameters: 269 + dctx - The `PetscDeviceContext` 270 - device - The `PetscDevice` 271 272 Level: intermediate 273 274 Notes: 275 This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must 276 also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly 277 necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will 278 always come equipped with the "default" device. 279 280 This routine is a no-op if `device` is already attached to `dctx`. 281 282 This routine may (but is very unlikely to) initialize the backend device and may incur 283 synchronization. 284 285 .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`, 286 `PetscDeviceContextGetDeviceType()` 287 @*/ 288 PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) 289 { 290 PetscFunctionBegin; 291 PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE)); 292 PetscFunctionReturn(PETSC_SUCCESS); 293 } 294 295 /*@C 296 PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext` 297 298 Not Collective 299 300 Input Parameter: 301 . dctx - the `PetscDeviceContext` 302 303 Output Parameter: 304 . device - The `PetscDevice` 305 306 Level: intermediate 307 308 Note: 309 This is a borrowed reference, the user should not destroy `device`. 310 311 .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()` 312 @*/ 313 PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) 314 { 315 PetscFunctionBegin; 316 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 317 PetscValidPointer(device, 2); 318 PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id); 319 *device = dctx->device; 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 /*@C 324 PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext` 325 326 Not Collective 327 328 Input Parameter: 329 . dctx - The `PetscDeviceContext` 330 331 Output Parameter: 332 . type - The `PetscDeviceType` 333 334 Level: beginner 335 336 Note: 337 This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` -> 338 `PetscDeviceGetType()`. 339 340 .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice` 341 @*/ 342 PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type) 343 { 344 PetscDevice device = nullptr; 345 346 PetscFunctionBegin; 347 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 348 PetscValidPointer(type, 2); 349 PetscCall(PetscDeviceContextGetDevice(dctx, &device)); 350 PetscCall(PetscDeviceGetType(device, type)); 351 PetscFunctionReturn(PETSC_SUCCESS); 352 } 353 354 /*@C 355 PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use 356 357 Not Collective 358 359 Input Parameter: 360 . dctx - The `PetscDeviceContext` 361 362 Level: beginner 363 364 Developer Note: 365 This routine is usually the stage where a `PetscDeviceContext` acquires device-side data 366 structures such as streams, events, and (possibly) handles. 367 368 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 369 `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()` 370 @*/ 371 PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) 372 { 373 PetscFunctionBegin; 374 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 375 if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS); 376 if (!dctx->device) { 377 const auto default_dtype = PETSC_DEVICE_DEFAULT(); 378 379 PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype])); 380 PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype)); 381 } 382 PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 383 PetscUseTypeMethod(dctx, setup); 384 PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); 385 dctx->setup = PETSC_TRUE; 386 PetscFunctionReturn(PETSC_SUCCESS); 387 } 388 389 static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup) 390 { 391 PetscFunctionBegin; 392 PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 393 PetscCall(PetscDeviceContextCreate(dctxdup)); 394 PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype)); 395 if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice)); 396 PetscCall(PetscDeviceContextSetUp(*dctxdup)); 397 PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); 398 PetscFunctionReturn(PETSC_SUCCESS); 399 } 400 401 /*@C 402 PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object 403 404 Not Collective 405 406 Input Parameter: 407 . dctx - The `PetscDeviceContext` to duplicate 408 409 Output Parameter: 410 . dctxdup - The duplicated `PetscDeviceContext` 411 412 Level: beginner 413 414 Notes: 415 This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as 416 another. Note however that `dctxdup` does not share any of the underlying data with `dctx`, 417 (including its current stream-state) they are completely separate objects. 418 419 There is no implied ordering between `dctx` or `dctxdup`. 420 421 DAG representation: 422 .vb 423 time -> 424 425 -> dctx - |= CALL =| - dctx ----> 426 - dctxdup -> 427 .ve 428 429 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, 430 `PetscDeviceContextSetStreamType()` 431 @*/ 432 PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) 433 { 434 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 435 436 PetscFunctionBegin; 437 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 438 PetscValidPointer(dctxdup, 2); 439 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 440 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup)); 441 PetscFunctionReturn(PETSC_SUCCESS); 442 } 443 444 /*@C 445 PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle 446 447 Not Collective 448 449 Input Parameter: 450 . dctx - The `PetscDeviceContext` 451 452 Output Parameter: 453 . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work 454 455 Level: intermediate 456 457 Note: 458 This routine only refers a singular context and does NOT take any of its children into 459 account. That is, if `dctx` is idle but has dependents who do have work this routine still 460 returns `PETSC_TRUE`. 461 462 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()` 463 @*/ 464 PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) 465 { 466 PetscFunctionBegin; 467 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 468 PetscValidBoolPointer(idle, 2); 469 PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 470 PetscUseTypeMethod(dctx, query, idle); 471 PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); 472 PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not")); 473 PetscFunctionReturn(PETSC_SUCCESS); 474 } 475 476 /*@C 477 PetscDeviceContextWaitForContext - Make one context wait for another context to finish 478 479 Not Collective 480 481 Input Parameters: 482 + dctxa - The `PetscDeviceContext` object that is waiting 483 - dctxb - The `PetscDeviceContext` object that is being waited on 484 485 Level: beginner 486 487 Notes: 488 Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host 489 does not wait for the serialization to actually occur. 490 491 This routine uses only the state of `dctxb` at the moment this routine was called, so any 492 future work queued will not affect `dctxa`. It is safe to pass the same context to both 493 arguments (in which case this routine does nothing). 494 495 DAG representation: 496 .vb 497 time -> 498 499 -> dctxa ---/- |= CALL =| - dctxa -> 500 / 501 -> dctxb -/------------------------> 502 .ve 503 504 .N ASYNC_API 505 506 .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()` 507 @*/ 508 PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) 509 { 510 PetscObject aobj; 511 512 PetscFunctionBegin; 513 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa)); 514 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb)); 515 PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2); 516 if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS); 517 aobj = PetscObjectCast(dctxa); 518 PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 519 PetscUseTypeMethod(dctxa, waitforcontext, dctxb); 520 PetscCallCXX(CxxDataCast(dctxa)->upstream[dctxb] = CxxDataParent(dctxb)); 521 PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); 522 PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", aobj->id, PetscObjectCast(dctxb)->id)); 523 PetscCall(PetscObjectStateIncrease(aobj)); 524 PetscFunctionReturn(PETSC_SUCCESS); 525 } 526 527 /*@C 528 PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent 529 context with a prescribed `PetscStreamType` 530 531 Not Collective, Asynchronous 532 533 Input Parameters: 534 + dctx - The parent `PetscDeviceContext` 535 . stype - The prescribed `PetscStreamType` 536 - n - The number of children to create 537 538 Output Parameter: 539 . dsub - The created child context(s) 540 541 Level: intermediate 542 543 Notes: 544 This routine creates `n` edges of a DAG from a source node which are causally dependent on the 545 source node. This causal dependency is established as-if by calling 546 `PetscDeviceContextWaitForContext()` on every child. 547 548 `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx` 549 expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed. 550 551 This routine only accounts for work queued on `dctx` up until calling this routine, any 552 subsequent work enqueued on `dctx` has no effect on `dsub`. 553 554 The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case 555 that they are different. This is useful in cases where a routine can locally exploit stream 556 parallelism without needing to worry about what stream type the incoming `PetscDeviceContext` 557 carries. 558 559 DAG representation: 560 .vb 561 time -> 562 563 -> dctx - |= CALL =| -\----> dctx ------> 564 \---> dsub[0] ---> 565 \--> ... -------> 566 \-> dsub[n-1] -> 567 .ve 568 569 .N ASYNC_API 570 571 .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, 572 `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()` 573 @*/ 574 PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub) 575 { 576 // debugging only 577 std::string idList; 578 auto ninput = n; 579 580 PetscFunctionBegin; 581 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 582 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n); 583 PetscValidPointer(dsub, 4); 584 *dsub = nullptr; 585 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 586 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 587 PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 588 /* update child totals */ 589 dctx->numChildren += n; 590 /* now to find out if we have room */ 591 if (dctx->numChildren > dctx->maxNumChildren) { 592 const auto numChildren = dctx->numChildren; 593 auto &maxNumChildren = dctx->maxNumChildren; 594 auto numAllocated = numChildren; 595 596 /* no room, either from having too many kids or not having any */ 597 if (auto &childIDs = dctx->childIDs) { 598 // the difference is backwards because we have not updated maxNumChildren yet 599 numAllocated -= maxNumChildren; 600 /* have existing children, must reallocate them */ 601 PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs)); 602 /* clear the extra memory since realloc doesn't do it for us */ 603 PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated)); 604 } else { 605 /* have no children */ 606 PetscCall(PetscCalloc1(numChildren, &childIDs)); 607 } 608 /* update total number of children */ 609 maxNumChildren = numChildren; 610 } 611 PetscCall(PetscMalloc1(n, dsub)); 612 for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) { 613 auto &childID = dctx->childIDs[i]; 614 /* empty child slot */ 615 if (!childID) { 616 auto &childctx = (*dsub)[i]; 617 618 /* create the child context in the image of its parent */ 619 PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx)); 620 PetscCall(PetscDeviceContextWaitForContext(childctx, dctx)); 621 /* register the child with its parent */ 622 PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID)); 623 if (PetscDefined(USE_DEBUG_AND_INFO)) { 624 PetscCallCXX(idList += std::to_string(childID)); 625 if (ninput != 1) PetscCallCXX(idList += ", "); 626 } 627 --ninput; 628 } 629 } 630 PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); 631 PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str())); 632 PetscFunctionReturn(PETSC_SUCCESS); 633 } 634 635 /*@C 636 PetscDeviceContextFork - Create a set of dependent child contexts from a parent context 637 638 Not Collective, Asynchronous 639 640 Input Parameters: 641 + dctx - The parent `PetscDeviceContext` 642 - n - The number of children to create 643 644 Output Parameter: 645 . dsub - The created child context(s) 646 647 Level: beginner 648 649 Notes: 650 Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed 651 `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\: 652 653 .vb 654 PetscStreamType stype; 655 656 PetscDeviceContextGetStreamType(dctx, &stype); 657 PetscDeviceContextForkWithStreamType(dctx, stype, ...); 658 .ve 659 660 .N ASYNC_API 661 662 .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`, 663 `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()` 664 @*/ 665 PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) 666 { 667 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 668 669 PetscFunctionBegin; 670 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 671 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 672 PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub)); 673 PetscFunctionReturn(PETSC_SUCCESS); 674 } 675 676 /*@C 677 PetscDeviceContextJoin - Converge a set of child contexts 678 679 Not Collective, Asynchronous 680 681 Input Parameters: 682 + dctx - A `PetscDeviceContext` to converge on 683 . n - The number of sub contexts to converge 684 . joinMode - The type of join to perform 685 - dsub - The sub contexts to converge 686 687 Level: beginner 688 689 Notes: 690 If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source 691 node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`) 692 which receives `n` edges (and optionally destroys them) which is dependent on the completion 693 of all incoming edges. 694 695 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be 696 destroyed by this routine. Thus all sub contexts must have been created with the `dctx` 697 passed to this routine. 698 699 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on 700 `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the 701 sync suffix does NOT refer to the host, i.e. this routine does NOT call 702 `PetscDeviceSynchronize()`. 703 704 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but 705 the sub contexts do not wait for one another or `dctx` afterwards. 706 707 DAG representations: 708 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` 709 .vb 710 time -> 711 712 -> dctx ---------/- |= CALL =| - dctx -> 713 -> dsub[0] -----/ 714 -> ... -------/ 715 -> dsub[n-1] -/ 716 .ve 717 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` 718 .vb 719 time -> 720 721 -> dctx ---------/- |= CALL =| -\----> dctx ------> 722 -> dsub[0] -----/ \---> dsub[0] ---> 723 -> ... -------/ \--> ... -------> 724 -> dsub[n-1] -/ \-> dsub[n-1] -> 725 .ve 726 If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` 727 .vb 728 time -> 729 730 -> dctx ----------/- |= CALL =| - dctx -> 731 -> dsub[0] ------/-----------------------> 732 -> ... --------/------------------------> 733 -> dsub[n-1] --/-------------------------> 734 .ve 735 736 .N ASYNC_API 737 738 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`, 739 `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode` 740 @*/ 741 PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) 742 { 743 // debugging only 744 std::string idList; 745 746 PetscFunctionBegin; 747 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 748 /* validity of dctx is checked in the wait-for loop */ 749 PetscValidPointer(dsub, 4); 750 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n); 751 /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ 752 if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); 753 /* first dctx waits on all the incoming edges */ 754 PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 755 for (PetscInt i = 0; i < n; ++i) { 756 PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4); 757 PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i])); 758 if (PetscDefined(USE_DEBUG_AND_INFO)) { 759 PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id)); 760 if (i + 1 < n) PetscCallCXX(idList += ", "); 761 } 762 } 763 764 /* now we handle the aftermath */ 765 switch (joinMode) { 766 case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: { 767 const auto children = dctx->childIDs; 768 const auto maxchild = dctx->maxNumChildren; 769 auto &nchild = dctx->numChildren; 770 PetscInt j = 0; 771 772 PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild); 773 /* update child count while it's still fresh in memory */ 774 nchild -= n; 775 for (PetscInt i = 0; i < maxchild; ++i) { 776 if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) { 777 /* child is one of ours, can destroy it */ 778 PetscCall(PetscDeviceContextDestroy((*dsub) + j)); 779 /* reset the child slot */ 780 children[i] = 0; 781 if (++j == n) break; 782 } 783 } 784 /* gone through the loop but did not find every child */ 785 PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out out in", n - j); 786 PetscCall(PetscFree(*dsub)); 787 } break; 788 case PETSC_DEVICE_CONTEXT_JOIN_SYNC: 789 for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx)); 790 case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: 791 break; 792 default: 793 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given"); 794 } 795 PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); 796 797 PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str())); 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 /*@C 802 PetscDeviceContextSynchronize - Block the host until all work queued on a 803 `PetscDeviceContext` has finished 804 805 Not Collective 806 807 Input Parameter: 808 . dctx - The `PetscDeviceContext` to synchronize 809 810 Level: beginner 811 812 Notes: 813 The host will not return from this routine until `dctx` is idle. Any and all memory 814 operations queued on or otherwise associated with (either explicitly or implicitly via 815 dependencies) are guaranteed to have finished and be globally visible on return. 816 817 In effect, this routine serves as memory and execution barrier. 818 819 DAG representation: 820 .vb 821 time -> 822 823 -> dctx - |= CALL =| - dctx -> 824 .ve 825 826 .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()` 827 @*/ 828 PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) 829 { 830 PetscFunctionBegin; 831 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 832 PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 833 /* if it isn't setup there is nothing to sync on */ 834 if (dctx->setup) { 835 PetscUseTypeMethod(dctx, synchronize); 836 PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx)); 837 } 838 PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); 839 PetscFunctionReturn(PETSC_SUCCESS); 840 } 841 842 /* every device type has a vector of null PetscDeviceContexts -- one for each device */ 843 static auto nullContexts = std::array<std::vector<PetscDeviceContext>, PETSC_DEVICE_MAX>{}; 844 static auto nullContextsFinalizer = false; 845 846 static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx) 847 { 848 PetscInt devid; 849 PetscDeviceType dtype; 850 851 PetscFunctionBegin; 852 PetscValidDevice(device, 2); 853 PetscValidPointer(dctx, 3); 854 if (PetscUnlikely(!nullContextsFinalizer)) { 855 const auto finalizer = [] { 856 PetscFunctionBegin; 857 for (auto &&dvec : nullContexts) { 858 for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx)); 859 PetscCallCXX(dvec.clear()); 860 } 861 nullContextsFinalizer = false; 862 PetscFunctionReturn(PETSC_SUCCESS); 863 }; 864 865 nullContextsFinalizer = true; 866 PetscCall(PetscRegisterFinalize(std::move(finalizer))); 867 } 868 PetscCall(PetscDeviceGetDeviceId(device, &devid)); 869 PetscCall(PetscDeviceGetType(device, &dtype)); 870 { 871 auto &ctxlist = nullContexts[dtype]; 872 873 PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid); 874 // need to resize the container if not big enough because incrementing the iterator in 875 // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the 876 // current size of the container. 877 if (static_cast<std::size_t>(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1)); 878 if (PetscUnlikely(!ctxlist[devid])) { 879 // we have not seen this device before 880 PetscCall(PetscDeviceContextCreate(dctx)); 881 PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid)); 882 { 883 const auto pobj = PetscObjectCast(*dctx); 884 const auto name = "null context " + std::to_string(devid); 885 const auto prefix = "null_context_" + std::to_string(devid) + '_'; 886 887 PetscCall(PetscObjectSetName(pobj, name.c_str())); 888 PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str())); 889 } 890 PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_GLOBAL_BLOCKING)); 891 PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device)); 892 PetscCall(PetscDeviceContextSetUp(*dctx)); 893 // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert! 894 PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx)); 895 } else *dctx = ctxlist[devid]; 896 } 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 /* 901 Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are 902 guaranteed to always be globally blocking. 903 */ 904 PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx) 905 { 906 PetscDeviceContext gctx; 907 PetscDevice gdev = nullptr; 908 909 PetscFunctionBegin; 910 PetscValidPointer(dctx, 1); 911 PetscCall(PetscDeviceContextGetCurrentContext(&gctx)); 912 PetscCall(PetscDeviceContextGetDevice(gctx, &gdev)); 913 PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx)); 914 PetscFunctionReturn(PETSC_SUCCESS); 915 } 916 917 /*@C 918 PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database 919 920 Collective on `comm` or `dctx` 921 922 Input Parameters: 923 + comm - MPI communicator on which to query the options database (optional) 924 - dctx - The `PetscDeviceContext` to configure 925 926 Output Parameter: 927 . dctx - The `PetscDeviceContext` 928 929 Options Database Keys: 930 + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` - 931 `PetscDeviceContextSetStreamType()` 932 - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType` 933 934 Level: beginner 935 936 Note: 937 The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is 938 used (which is always `PETSC_COMM_SELF`). 939 940 .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`, 941 `PetscDeviceContextView()` 942 @*/ 943 PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx) 944 { 945 const auto pobj = PetscObjectCast(dctx); 946 auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE); 947 auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE); 948 MPI_Comm old_comm = PETSC_COMM_SELF; 949 950 PetscFunctionBegin; 951 // do not user getoptionalnullcontext here, the user is not allowed to set it from options! 952 PetscValidDeviceContext(dctx, 2); 953 /* set the device type first */ 954 if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first)); 955 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first)); 956 957 if (comm == MPI_COMM_NULL) { 958 PetscCall(PetscObjectGetComm(pobj, &comm)); 959 } else { 960 // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so 961 // PetscObjectOptionsBegin() behaves as if dctx had comm 962 old_comm = Petsc::util::exchange(pobj->comm, comm); 963 } 964 965 PetscObjectOptionsBegin(pobj); 966 PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype)); 967 PetscOptionsEnd(); 968 // reset the comm (should be PETSC_COMM_SELF) 969 if (comm != MPI_COMM_NULL) pobj->comm = old_comm; 970 if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first)); 971 if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first)); 972 PetscCall(PetscDeviceContextSetUp(dctx)); 973 PetscFunctionReturn(PETSC_SUCCESS); 974 } 975 976 /*@C 977 PetscDeviceContextView - View a `PetscDeviceContext` 978 979 Collective on `viewer` 980 981 Input Parameters: 982 + dctx - The `PetscDeviceContext` 983 - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`) 984 985 Level: beginner 986 987 Note: 988 If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this 989 routine is collective on `PETSC_COMM_WORLD`. 990 991 .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()` 992 @*/ 993 PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer) 994 { 995 PetscBool iascii; 996 997 PetscFunctionBegin; 998 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 999 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer)); 1000 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 1001 PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii)); 1002 if (iascii) { 1003 auto stype = PETSC_STREAM_DEFAULT_BLOCKING; 1004 PetscViewer sub; 1005 1006 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1007 PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub)); 1008 PetscCall(PetscViewerASCIIPushTab(sub)); 1009 PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); 1010 PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype])); 1011 PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren)); 1012 if (const auto nchild = dctx->numChildren) { 1013 PetscCall(PetscViewerASCIIPushTab(sub)); 1014 for (PetscInt i = 0; i < nchild; ++i) { 1015 if (i == nchild - 1) { 1016 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i])); 1017 } else { 1018 PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i])); 1019 } 1020 } 1021 } 1022 PetscCall(PetscViewerASCIIPopTab(sub)); 1023 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub)); 1024 PetscCall(PetscViewerFlush(viewer)); 1025 PetscCall(PetscViewerASCIIPushTab(viewer)); 1026 } 1027 if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer)); 1028 if (iascii) PetscCall(PetscViewerASCIIPopTab(viewer)); 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 /*@C 1033 PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options 1034 1035 Input Parameters: 1036 + dctx - The `PetscDeviceContext` to view 1037 . obj - Optional `PetscObject` to associate (may be `NULL`) 1038 - name - The command line option 1039 1040 Level: beginner 1041 1042 .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()` 1043 @*/ 1044 PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[]) 1045 { 1046 PetscFunctionBegin; 1047 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1048 if (obj) PetscValidHeader(obj, 2); 1049 PetscValidCharPointer(name, 3); 1050 PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name)); 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 /*@C 1055 PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context 1056 1057 Input Parameters: 1058 + dctx - The `PetscDeviceContext` to get the stream from 1059 - handle - A handle to the stream 1060 1061 Level: developer 1062 1063 Note: 1064 This routine is dangerous. It exists only for the most experienced users and 1065 internal PETSc developement. 1066 1067 There is no way for PETSc's auto-dependency system to track what the caller does with the 1068 stream. 1069 1070 If the user uses the stream to copy memory that was previously modified by PETSc, or launches 1071 kernels that modify memory with the stream, it is the users responsibility to inform PETSc of 1072 their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a 1073 race condition. This race condition may manifest in nondeterministic ways. 1074 1075 Alternatively, the user may synchronize the stream immediately before and after use. This is 1076 the safest option. 1077 1078 Example Usage: 1079 .vb 1080 PetscDeviceContext dctx; 1081 PetscDeviceType type; 1082 void *handle; 1083 1084 PetscDeviceContextGetCurrentContext(&dctx); 1085 PetscDeviceContextGetStreamHandle(dctx, &handle); 1086 PetscDeviceContextGetDeviceType(dctx, &type); 1087 1088 if (type == PETSC_DEVICE_CUDA) { 1089 cudsStream_t stream = *(cudaStream_t*)handle; 1090 1091 my_cuda_kernel<<<1, 2, 3, stream>>>(); 1092 } 1093 .ve 1094 1095 .N ASYNC_API 1096 1097 .seealso: `PetscDeviceContext` 1098 @*/ 1099 PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void *handle) 1100 { 1101 PetscFunctionBegin; 1102 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 1103 PetscValidPointer(handle, 2); 1104 PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, (void **)handle)); 1105 PetscFunctionReturn(PETSC_SUCCESS); 1106 } 1107