#include "petscdevice_interface_internal.hpp" /*I I*/ #include // _p_PetscViewer for PetscObjectCast() #include #include #include #include #include // std::to_string among other things /* Define the allocator */ class PetscDeviceContextConstructor : public Petsc::ConstructorInterface<_p_PetscDeviceContext, PetscDeviceContextConstructor> { public: PetscErrorCode construct_(PetscDeviceContext dctx) const noexcept { PetscFunctionBegin; PetscCall(PetscArrayzero(dctx, 1)); PetscCall(PetscHeaderCreate_Private((PetscObject)dctx, PETSC_DEVICE_CONTEXT_CLASSID, "PetscDeviceContext", "PetscDeviceContext", "Sys", PETSC_COMM_SELF, (PetscObjectDestroyFn *)PetscDeviceContextDestroy, (PetscObjectViewFn *)PetscDeviceContextView)); PetscCall(PetscLogObjectCreate((PetscObject)dctx)); PetscCallCXX(PetscObjectCast(dctx)->cpp = new CxxData{dctx}); PetscCall(underlying().reset(dctx, false)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode destroy_(PetscDeviceContext dctx) noexcept { PetscFunctionBegin; PetscAssert(!dctx->numChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Device context still has %" PetscInt_FMT " un-joined children, must call PetscDeviceContextJoin() with all children before destroying", dctx->numChildren); PetscTryTypeMethod(dctx, destroy); PetscCall(PetscDeviceDestroy(&dctx->device)); PetscCall(PetscFree(dctx->childIDs)); delete CxxDataCast(dctx); PetscCall(PetscHeaderDestroy_Private(PetscObjectCast(dctx), PETSC_FALSE)); PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode reset_(PetscDeviceContext dctx, bool zero = true) noexcept { PetscFunctionBegin; if (zero) { // reset the device if the user set it if (Petsc::util::exchange(dctx->usersetdevice, PETSC_FALSE)) { PetscTryTypeMethod(dctx, destroy); PetscCall(PetscDeviceDestroy(&dctx->device)); PetscCall(PetscArrayzero(dctx->ops, 1)); dctx->data = nullptr; } PetscCall(PetscHeaderReset_Internal(PetscObjectCast(dctx))); dctx->numChildren = 0; dctx->setup = PETSC_FALSE; // don't deallocate the child array, rather just zero it out PetscCall(PetscArrayzero(dctx->childIDs, dctx->maxNumChildren)); PetscCall(CxxDataCast(dctx)->clear()); PetscCall(CxxDataCast(dctx)->reset_self(dctx)); } dctx->streamType = PETSC_STREAM_DEFAULT; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode invalidate_(PetscDeviceContext dctx) noexcept { PetscFunctionBegin; PetscCall(CxxDataCast(dctx)->reset_self(dctx)); PetscFunctionReturn(PETSC_SUCCESS); } }; static Petsc::ObjectPool<_p_PetscDeviceContext, PetscDeviceContextConstructor> contextPool; // PetscClangLinter pragma disable: -fdoc-section-header-unknown /*@C PetscDeviceContextCreate - Creates a `PetscDeviceContext` Not Collective Output Parameter: . dctx - The `PetscDeviceContext` Level: beginner Note: Unlike almost every other PETSc class it is advised that most users use `PetscDeviceContextDuplicate()` rather than this routine to create new contexts. Contexts of different types are incompatible with one another; using `PetscDeviceContextDuplicate()` ensures compatible types. DAG representation: .vb time -> |= CALL =| - dctx -> .ve .N ASYNC_API .seealso: `PetscDeviceContextDuplicate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()`, `PetscDeviceContextView()`, `PetscDeviceContextDestroy()` @*/ PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *dctx) { PetscFunctionBegin; PetscAssertPointer(dctx, 1); PetscCall(PetscDeviceInitializePackage()); PetscCall(PetscLogEventBegin(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); PetscCall(contextPool.allocate(dctx)); PetscCall(PetscLogEventEnd(DCONTEXT_Create, nullptr, nullptr, nullptr, nullptr)); PetscFunctionReturn(PETSC_SUCCESS); } // PetscClangLinter pragma disable: -fdoc-section-header-unknown /*@C PetscDeviceContextDestroy - Frees a `PetscDeviceContext` Not Collective Input Parameter: . dctx - The `PetscDeviceContext` Level: beginner Notes: No implicit synchronization occurs due to this routine, all resources are released completely asynchronously w.r.t. the host. If one needs to guarantee access to the data produced on `dctx`'s stream the user is responsible for calling `PetscDeviceContextSynchronize()` before calling this routine. DAG representation: .vb time -> -> dctx - |= CALL =| .ve Developer Notes: `dctx` is never actually "destroyed" in the classical sense. It is returned to an ever growing pool of `PetscDeviceContext`s. There are currently no limits on the size of the pool, this should perhaps be implemented. .N ASYNC_API .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSynchronize()` @*/ PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *dctx) { PetscFunctionBegin; PetscAssertPointer(dctx, 1); if (!*dctx) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(PetscLogEventBegin(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); if (--(PetscObjectCast(*dctx)->refct) <= 0) { PetscCall(PetscDeviceContextCheckNotOrphaned_Internal(*dctx)); PetscCall(contextPool.deallocate(dctx)); } PetscCall(PetscLogEventEnd(DCONTEXT_Destroy, nullptr, nullptr, nullptr, nullptr)); *dctx = nullptr; PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextSetStreamType - Set the implementation type of the underlying stream for a `PetscDeviceContext` Not Collective Input Parameters: + dctx - The `PetscDeviceContext` - type - The `PetscStreamType` Level: beginner Note: See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available types and their interactions. If the `PetscDeviceContext` was previously set up and stream type was changed, you must call `PetscDeviceContextSetUp()` again after this routine. .seealso: `PetscStreamType`, `PetscDeviceContextGetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetUp()`, `PetscDeviceContextSetFromOptions()` @*/ PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext dctx, PetscStreamType type) { PetscFunctionBegin; // do not use getoptionalnullcontext here since we do not want the user to change the stream // type PetscValidDeviceContext(dctx, 1); PetscValidStreamType(type, 2); // only need to do complex swapping if the object has already been setup if (dctx->setup && (dctx->streamType != type)) { dctx->setup = PETSC_FALSE; PetscCall(PetscLogEventBegin(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); PetscUseTypeMethod(dctx, changestreamtype, type); PetscCall(PetscLogEventEnd(DCONTEXT_ChangeStream, dctx, nullptr, nullptr, nullptr)); } dctx->streamType = type; PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextGetStreamType - Get the implementation type of the underlying stream for a `PetscDeviceContext` Not Collective Input Parameter: . dctx - The `PetscDeviceContext` Output Parameter: . type - The `PetscStreamType` Level: beginner Note: See `PetscStreamType` in `include/petscdevicetypes.h` for more information on the available types and their interactions .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextCreate()`, `PetscDeviceContextSetFromOptions()` @*/ PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext dctx, PetscStreamType *type) { PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscAssertPointer(type, 2); *type = dctx->streamType; PetscFunctionReturn(PETSC_SUCCESS); } /* Actual function to set the device. 1. Repeatedly destroying and recreating internal data structures (like streams and events) for recycled PetscDeviceContexts is not free. If done often, it does add up. 2. The vast majority of PetscDeviceContexts are created by PETSc either as children or default contexts. The default contexts *never* change type, and the children are extremely unlikely to (chances are if you fork once, you will fork again very soon). 3. The only time this calculus changes is if the user themselves sets the device type. In this case we do not know what the user has changed, so must always wipe the slate clean. Thus we need to keep track whether the user explicitly sets the device contexts device. */ static PetscErrorCode PetscDeviceContextSetDevice_Private(PetscDeviceContext dctx, PetscDevice device, PetscBool user_set) { PetscFunctionBegin; // do not use getoptionalnullcontext here since we do not want the user to change its device PetscValidDeviceContext(dctx, 1); PetscValidDevice(device, 2); if (dctx->device && (dctx->device->id == device->id)) PetscFunctionReturn(PETSC_SUCCESS); PetscCall(PetscLogEventBegin(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); PetscTryTypeMethod(dctx, destroy); PetscCall(PetscDeviceDestroy(&dctx->device)); PetscCall(PetscMemzero(dctx->ops, sizeof(*dctx->ops))); PetscCall(PetscDeviceReference_Internal(device)); // set it before calling the method dctx->device = device; PetscCall((*device->ops->createcontext)(dctx)); PetscCall(PetscLogEventEnd(DCONTEXT_SetDevice, dctx, nullptr, nullptr, nullptr)); dctx->setup = PETSC_FALSE; dctx->usersetdevice = user_set; PetscFunctionReturn(PETSC_SUCCESS); } PetscErrorCode PetscDeviceContextSetDefaultDeviceForType_Internal(PetscDeviceContext dctx, PetscDeviceType type) { PetscDevice device; PetscFunctionBegin; PetscCall(PetscDeviceGetDefaultForType_Internal(type, &device)); PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_FALSE)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextSetDevice - Set the underlying `PetscDevice` for a `PetscDeviceContext` Not Collective Input Parameters: + dctx - The `PetscDeviceContext` - device - The `PetscDevice` Level: intermediate Notes: This routine is effectively `PetscDeviceContext`'s "set-type" (so every `PetscDeviceContext` must also have an attached `PetscDevice`). Unlike the usual set-type semantics, it is not strictly necessary to set a contexts device to enable usage, any created `PetscDeviceContext`s will always come equipped with the "default" device. This routine is a no-op if `device` is already attached to `dctx`. This routine may (but is very unlikely to) initialize the backend device and may incur synchronization. .seealso: `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceContextGetDevice()`, `PetscDeviceContextGetDeviceType()` @*/ PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext dctx, PetscDevice device) { PetscFunctionBegin; PetscCall(PetscDeviceContextSetDevice_Private(dctx, device, PETSC_TRUE)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextGetDevice - Get the underlying `PetscDevice` for a `PetscDeviceContext` Not Collective Input Parameter: . dctx - the `PetscDeviceContext` Output Parameter: . device - The `PetscDevice` Level: intermediate Note: This is a borrowed reference, the user should not destroy `device`. .seealso: `PetscDeviceContextSetDevice()`, `PetscDevice`, `PetscDeviceContextGetDeviceType()` @*/ PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext dctx, PetscDevice *device) { PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscAssertPointer(device, 2); PetscAssert(dctx->device, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PetscDeviceContext %" PetscInt64_FMT " has no attached PetscDevice to get", PetscObjectCast(dctx)->id); *device = dctx->device; PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextGetDeviceType - Get the `PetscDeviceType` for a `PetscDeviceContext` Not Collective Input Parameter: . dctx - The `PetscDeviceContext` Output Parameter: . type - The `PetscDeviceType` Level: beginner Note: This routine is a convenience shorthand for `PetscDeviceContextGetDevice()` -> `PetscDeviceGetType()`. .seealso: `PetscDeviceType`, `PetscDeviceContextGetDevice()`, `PetscDeviceGetType()`, `PetscDevice` @*/ PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext dctx, PetscDeviceType *type) { PetscDevice device = nullptr; PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscAssertPointer(type, 2); PetscCall(PetscDeviceContextGetDevice(dctx, &device)); PetscCall(PetscDeviceGetType(device, type)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextSetUp - Prepares a `PetscDeviceContext` for use Not Collective Input Parameter: . dctx - The `PetscDeviceContext` Level: beginner Developer Notes: This routine is usually the stage where a `PetscDeviceContext` acquires device-side data structures such as streams, events, and (possibly) handles. .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextDestroy()`, `PetscDeviceContextSetFromOptions()` @*/ PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext dctx) { PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); if (dctx->setup) PetscFunctionReturn(PETSC_SUCCESS); if (!dctx->device) { const auto default_dtype = PETSC_DEVICE_DEFAULT(); PetscCall(PetscInfo(dctx, "PetscDeviceContext %" PetscInt64_FMT " did not have an explicitly attached PetscDevice, using default with type %s\n", PetscObjectCast(dctx)->id, PetscDeviceTypes[default_dtype])); PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, default_dtype)); } PetscCall(PetscLogEventBegin(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); PetscUseTypeMethod(dctx, setup); PetscCall(PetscLogEventEnd(DCONTEXT_SetUp, dctx, nullptr, nullptr, nullptr)); dctx->setup = PETSC_TRUE; PetscFunctionReturn(PETSC_SUCCESS); } static PetscErrorCode PetscDeviceContextDuplicate_Private(PetscDeviceContext dctx, PetscStreamType stype, PetscDeviceContext *dctxdup) { PetscFunctionBegin; PetscCall(PetscLogEventBegin(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); PetscCall(PetscDeviceContextCreate(dctxdup)); PetscCall(PetscDeviceContextSetStreamType(*dctxdup, stype)); if (const auto device = dctx->device) PetscCall(PetscDeviceContextSetDevice_Private(*dctxdup, device, dctx->usersetdevice)); PetscCall(PetscDeviceContextSetUp(*dctxdup)); PetscCall(PetscLogEventEnd(DCONTEXT_Duplicate, dctx, nullptr, nullptr, nullptr)); PetscFunctionReturn(PETSC_SUCCESS); } // PetscClangLinter pragma disable: -fdoc-section-header-unknown /*@C PetscDeviceContextDuplicate - Duplicates a `PetscDeviceContext` object Not Collective Input Parameter: . dctx - The `PetscDeviceContext` to duplicate Output Parameter: . dctxdup - The duplicated `PetscDeviceContext` Level: beginner Notes: This is a shorthand method for creating a `PetscDeviceContext` with the exact same settings as another. Note however that `dctxdup` does not share any of the underlying data with `dctx`, (including its current stream-state) they are completely separate objects. There is no implied ordering between `dctx` or `dctxdup`. DAG representation: .vb time -> -> dctx - |= CALL =| - dctx ----> - dctxdup -> .ve .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextSetStreamType()` @*/ PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext dctx, PetscDeviceContext *dctxdup) { auto stype = PETSC_STREAM_DEFAULT; PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscAssertPointer(dctxdup, 2); PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, dctxdup)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextQueryIdle - Returns whether or not a `PetscDeviceContext` is idle Not Collective Input Parameter: . dctx - The `PetscDeviceContext` Output Parameter: . idle - `PETSC_TRUE` if `dctx` has NO work, `PETSC_FALSE` if it has work Level: intermediate Note: This routine only refers a singular context and does NOT take any of its children into account. That is, if `dctx` is idle but has dependents who do have work this routine still returns `PETSC_TRUE`. .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextWaitForContext()`, `PetscDeviceContextFork()` @*/ PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext dctx, PetscBool *idle) { PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscAssertPointer(idle, 2); PetscCall(PetscLogEventBegin(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); PetscUseTypeMethod(dctx, query, idle); PetscCall(PetscLogEventEnd(DCONTEXT_QueryIdle, dctx, nullptr, nullptr, nullptr)); PetscCall(PetscInfo(dctx, "PetscDeviceContext ('%s', id %" PetscInt64_FMT ") %s idle\n", PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, *idle ? "was" : "was not")); PetscFunctionReturn(PETSC_SUCCESS); } // PetscClangLinter pragma disable: -fdoc-section-header-unknown /*@C PetscDeviceContextWaitForContext - Make one context wait for another context to finish Not Collective Input Parameters: + dctxa - The `PetscDeviceContext` object that is waiting - dctxb - The `PetscDeviceContext` object that is being waited on Level: beginner Notes: Serializes two `PetscDeviceContext`s. Serialization is performed asynchronously; the host does not wait for the serialization to actually occur. This routine uses only the state of `dctxb` at the moment this routine was called, so any future work queued will not affect `dctxa`. It is safe to pass the same context to both arguments (in which case this routine does nothing). DAG representation: .vb time -> -> dctxa ---/- |= CALL =| - dctxa -> / -> dctxb -/------------------------> .ve .N ASYNC_API .seealso: `PetscDeviceContextCreate()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextJoin()` @*/ PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext dctxa, PetscDeviceContext dctxb) { PetscObjectId bid; PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxa)); PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctxb)); PetscCheckCompatibleDeviceContexts(dctxa, 1, dctxb, 2); if (dctxa == dctxb) PetscFunctionReturn(PETSC_SUCCESS); bid = PetscObjectCast(dctxb)->id; PetscCall(PetscLogEventBegin(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); PetscUseTypeMethod(dctxa, waitforcontext, dctxb); PetscCallCXX(CxxDataCast(dctxa)->upstream()[bid] = CxxDataCast(dctxb)->weak_snapshot()); PetscCall(PetscLogEventEnd(DCONTEXT_WaitForCtx, dctxa, dctxb, nullptr, nullptr)); PetscCall(PetscInfo(dctxa, "dctx %" PetscInt64_FMT " waiting on dctx %" PetscInt64_FMT "\n", PetscObjectCast(dctxa)->id, bid)); PetscCall(PetscObjectStateIncrease(PetscObjectCast(dctxa))); PetscFunctionReturn(PETSC_SUCCESS); } // PetscClangLinter pragma disable: -fdoc-section-header-unknown /*@C PetscDeviceContextForkWithStreamType - Create a set of dependent child contexts from a parent context with a prescribed `PetscStreamType` Not Collective, Asynchronous Input Parameters: + dctx - The parent `PetscDeviceContext` . stype - The prescribed `PetscStreamType` - n - The number of children to create Output Parameter: . dsub - The created child context(s) Level: intermediate Notes: This routine creates `n` edges of a DAG from a source node which are causally dependent on the source node. This causal dependency is established as-if by calling `PetscDeviceContextWaitForContext()` on every child. `dsub` is allocated by this routine and has its lifetime bounded by `dctx`. That is, `dctx` expects to free `dsub` (via `PetscDeviceContextJoin()`) before it itself is destroyed. This routine only accounts for work queued on `dctx` up until calling this routine, any subsequent work enqueued on `dctx` has no effect on `dsub`. The `PetscStreamType` of `dctx` does not have to equal `stype`. In fact, it is often the case that they are different. This is useful in cases where a routine can locally exploit stream parallelism without needing to worry about what stream type the incoming `PetscDeviceContext` carries. DAG representation: .vb time -> -> dctx - |= CALL =| -\----> dctx ------> \---> dsub[0] ---> \--> ... -------> \-> dsub[n-1] -> .ve .N ASYNC_API .seealso: `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()`, `PetscDeviceContextWaitForContext()` @*/ PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext dctx, PetscStreamType stype, PetscInt n, PetscDeviceContext **dsub) { // debugging only std::string idList; auto ninput = n; PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts requested %" PetscInt_FMT " < 0", n); PetscAssertPointer(dsub, 4); *dsub = nullptr; /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); PetscCall(PetscLogEventBegin(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); /* update child totals */ dctx->numChildren += n; /* now to find out if we have room */ if (dctx->numChildren > dctx->maxNumChildren) { const auto numChildren = dctx->numChildren; auto &maxNumChildren = dctx->maxNumChildren; auto numAllocated = numChildren; /* no room, either from having too many kids or not having any */ if (auto &childIDs = dctx->childIDs) { // the difference is backwards because we have not updated maxNumChildren yet numAllocated -= maxNumChildren; /* have existing children, must reallocate them */ PetscCall(PetscRealloc(numChildren * sizeof(*childIDs), &childIDs)); /* clear the extra memory since realloc doesn't do it for us */ PetscCall(PetscArrayzero(std::next(childIDs, maxNumChildren), numAllocated)); } else { /* have no children */ PetscCall(PetscCalloc1(numChildren, &childIDs)); } /* update total number of children */ maxNumChildren = numChildren; } PetscCall(PetscMalloc1(n, dsub)); for (PetscInt i = 0; ninput && (i < dctx->numChildren); ++i) { auto &childID = dctx->childIDs[i]; /* empty child slot */ if (!childID) { auto &childctx = (*dsub)[i]; /* create the child context in the image of its parent */ PetscCall(PetscDeviceContextDuplicate_Private(dctx, stype, &childctx)); PetscCall(PetscDeviceContextWaitForContext(childctx, dctx)); /* register the child with its parent */ PetscCall(PetscObjectGetId(PetscObjectCast(childctx), &childID)); if (PetscDefined(USE_DEBUG_AND_INFO)) { PetscCallCXX(idList += std::to_string(childID)); if (ninput != 1) PetscCallCXX(idList += ", "); } --ninput; } } PetscCall(PetscLogEventEnd(DCONTEXT_Fork, dctx, nullptr, nullptr, nullptr)); PetscCall(PetscDebugInfo(dctx, "Forked %" PetscInt_FMT " children from parent %" PetscInt64_FMT " with IDs: %s\n", n, PetscObjectCast(dctx)->id, idList.c_str())); PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextFork - Create a set of dependent child contexts from a parent context Not Collective, Asynchronous Input Parameters: + dctx - The parent `PetscDeviceContext` - n - The number of children to create Output Parameter: . dsub - The created child context(s) Level: beginner Notes: Behaves identically to `PetscDeviceContextForkWithStreamType()` except that the prescribed `PetscStreamType` is taken from `dctx`. In effect this routine is shorthand for\: .vb PetscStreamType stype; PetscDeviceContextGetStreamType(dctx, &stype); PetscDeviceContextForkWithStreamType(dctx, stype, ...); .ve .N ASYNC_API .seealso: `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextJoin()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextQueryIdle()` @*/ PetscErrorCode PetscDeviceContextFork(PetscDeviceContext dctx, PetscInt n, PetscDeviceContext **dsub) { auto stype = PETSC_STREAM_DEFAULT; PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); PetscCall(PetscDeviceContextForkWithStreamType(dctx, stype, n, dsub)); PetscFunctionReturn(PETSC_SUCCESS); } // PetscClangLinter pragma disable: -fdoc-section-header-unknown /*@C PetscDeviceContextJoin - Converge a set of child contexts Not Collective, Asynchronous Input Parameters: + dctx - A `PetscDeviceContext` to converge on . n - The number of sub contexts to converge . joinMode - The type of join to perform - dsub - The sub contexts to converge Level: beginner Notes: If `PetscDeviceContextFork()` creates `n` edges from a source node which all depend on the source node, then this routine is the exact mirror. That is, it creates a node (represented in `dctx`) which receives `n` edges (and optionally destroys them) which is dependent on the completion of all incoming edges. If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY`. All contexts in `dsub` will be destroyed by this routine. Thus all sub contexts must have been created with the `dctx` passed to this routine. If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC`. All sub contexts will additionally wait on `dctx` after converging. This has the effect of "synchronizing" the outgoing edges. Note the sync suffix does NOT refer to the host, i.e. this routine does NOT call `PetscDeviceSynchronize()`. If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC`. `dctx` waits for all sub contexts but the sub contexts do not wait for one another or `dctx` afterwards. DAG representations: If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_DESTROY` .vb time -> -> dctx ---------/- |= CALL =| - dctx -> -> dsub[0] -----/ -> ... -------/ -> dsub[n-1] -/ .ve If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_SYNC` .vb time -> -> dctx ---------/- |= CALL =| -\----> dctx ------> -> dsub[0] -----/ \---> dsub[0] ---> -> ... -------/ \--> ... -------> -> dsub[n-1] -/ \-> dsub[n-1] -> .ve If `joinMode` is `PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC` .vb time -> -> dctx ----------/- |= CALL =| - dctx -> -> dsub[0] ------/-----------------------> -> ... --------/------------------------> -> dsub[n-1] --/-------------------------> .ve .N ASYNC_API .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextForkWithStreamType()`, `PetscDeviceContextSynchronize()`, `PetscDeviceContextJoinMode` @*/ PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext dctx, PetscInt n, PetscDeviceContextJoinMode joinMode, PetscDeviceContext **dsub) { // debugging only std::string idList; PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); /* validity of dctx is checked in the wait-for loop */ PetscAssertPointer(dsub, 4); PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of contexts merged %" PetscInt_FMT " < 0", n); /* reserve 4 chars per id, 2 for number and 2 for ', ' separator */ if (PetscDefined(USE_DEBUG_AND_INFO)) PetscCallCXX(idList.reserve(4 * n)); /* first dctx waits on all the incoming edges */ PetscCall(PetscLogEventBegin(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); for (PetscInt i = 0; i < n; ++i) { PetscCheckCompatibleDeviceContexts(dctx, 1, (*dsub)[i], 4); PetscCall(PetscDeviceContextWaitForContext(dctx, (*dsub)[i])); if (PetscDefined(USE_DEBUG_AND_INFO)) { PetscCallCXX(idList += std::to_string(PetscObjectCast((*dsub)[i])->id)); if (i + 1 < n) PetscCallCXX(idList += ", "); } } /* now we handle the aftermath */ switch (joinMode) { case PETSC_DEVICE_CONTEXT_JOIN_DESTROY: { const auto children = dctx->childIDs; const auto maxchild = dctx->maxNumChildren; auto &nchild = dctx->numChildren; PetscInt j = 0; PetscCheck(n <= nchild, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Trying to destroy %" PetscInt_FMT " children of a parent context that only has %" PetscInt_FMT " children, likely trying to restore to wrong parent", n, nchild); /* update child count while it's still fresh in memory */ nchild -= n; for (PetscInt i = 0; i < maxchild; ++i) { if (children[i] && (children[i] == PetscObjectCast((*dsub)[j])->id)) { /* child is one of ours, can destroy it */ PetscCall(PetscDeviceContextDestroy((*dsub) + j)); /* reset the child slot */ children[i] = 0; if (++j == n) break; } } /* gone through the loop but did not find every child */ PetscCheck(j == n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " contexts still remain after destroy, this may be because you are trying to restore to the wrong parent context, or the device contexts are not in the same order as they were checked out in", n - j); PetscCall(PetscFree(*dsub)); } break; case PETSC_DEVICE_CONTEXT_JOIN_SYNC: for (PetscInt i = 0; i < n; ++i) PetscCall(PetscDeviceContextWaitForContext((*dsub)[i], dctx)); case PETSC_DEVICE_CONTEXT_JOIN_NO_SYNC: break; default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown PetscDeviceContextJoinMode given"); } PetscCall(PetscLogEventEnd(DCONTEXT_Join, dctx, nullptr, nullptr, nullptr)); PetscCall(PetscDebugInfo(dctx, "Joined %" PetscInt_FMT " ctxs to ctx %" PetscInt64_FMT ", mode %s with IDs: %s\n", n, PetscObjectCast(dctx)->id, PetscDeviceContextJoinModes[joinMode], idList.c_str())); PetscFunctionReturn(PETSC_SUCCESS); } // PetscClangLinter pragma disable: -fdoc-section-header-unknown /*@C PetscDeviceContextSynchronize - Block the host until all work queued on a `PetscDeviceContext` has finished Not Collective Input Parameter: . dctx - The `PetscDeviceContext` to synchronize Level: beginner Notes: The host will not return from this routine until `dctx` is idle. Any and all memory operations queued on or otherwise associated with (either explicitly or implicitly via dependencies) are guaranteed to have finished and be globally visible on return. In effect, this routine serves as memory and execution barrier. DAG representation: .vb time -> -> dctx - |= CALL =| - dctx -> .ve .seealso: `PetscDeviceContextFork()`, `PetscDeviceContextJoin()`, `PetscDeviceContextQueryIdle()` @*/ PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext dctx) { PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscCall(PetscLogEventBegin(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); /* if it isn't setup there is nothing to sync on */ if (dctx->setup) { PetscUseTypeMethod(dctx, synchronize); PetscCall(PetscDeviceContextSyncClearMap_Internal(dctx)); } PetscCall(PetscLogEventEnd(DCONTEXT_Sync, dctx, nullptr, nullptr, nullptr)); PetscFunctionReturn(PETSC_SUCCESS); } /* every device type has a vector of null PetscDeviceContexts -- one for each device */ static auto nullContexts = std::array, PETSC_DEVICE_MAX>{}; static auto nullContextsFinalizer = false; static PetscErrorCode PetscDeviceContextGetNullContextForDevice_Private(PetscBool user_set_device, PetscDevice device, PetscDeviceContext *dctx) { PetscInt devid; PetscDeviceType dtype; PetscFunctionBegin; PetscValidDevice(device, 2); PetscAssertPointer(dctx, 3); if (PetscUnlikely(!nullContextsFinalizer)) { nullContextsFinalizer = true; PetscCall(PetscRegisterFinalize([] { PetscFunctionBegin; for (auto &&dvec : nullContexts) { for (auto &&dctx : dvec) PetscCall(PetscDeviceContextDestroy(&dctx)); PetscCallCXX(dvec.clear()); } nullContextsFinalizer = false; PetscFunctionReturn(PETSC_SUCCESS); })); } PetscCall(PetscDeviceGetDeviceId(device, &devid)); PetscCall(PetscDeviceGetType(device, &dtype)); { auto &ctxlist = nullContexts[dtype]; PetscCheck(devid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Device ID (%" PetscInt_FMT ") must be positive", devid); // need to resize the container if not big enough because incrementing the iterator in // std::next() (if we haven't initialized that ctx yet) may cause it to fall outside the // current size of the container. if (static_cast(devid) >= ctxlist.size()) PetscCallCXX(ctxlist.resize(devid + 1)); if (PetscUnlikely(!ctxlist[devid])) { // we have not seen this device before PetscCall(PetscDeviceContextCreate(dctx)); PetscCall(PetscInfo(*dctx, "Initializing null PetscDeviceContext (of type %s) for device %" PetscInt_FMT "\n", PetscDeviceTypes[dtype], devid)); { const auto pobj = PetscObjectCast(*dctx); const auto name = "null context " + std::to_string(devid); const auto prefix = "null_context_" + std::to_string(devid) + '_'; PetscCall(PetscObjectSetName(pobj, name.c_str())); PetscCall(PetscObjectSetOptionsPrefix(pobj, prefix.c_str())); } PetscCall(PetscDeviceContextSetStreamType(*dctx, PETSC_STREAM_DEFAULT)); PetscCall(PetscDeviceContextSetDevice_Private(*dctx, device, user_set_device)); PetscCall(PetscDeviceContextSetUp(*dctx)); // would use ctxlist.cbegin() but GCC 4.8 can't handle const iterator insert! PetscCallCXX(ctxlist.insert(std::next(ctxlist.begin(), devid), *dctx)); } else *dctx = ctxlist[devid]; } PetscFunctionReturn(PETSC_SUCCESS); } /* Gets the "NULL" context for the current PetscDeviceType and PetscDevice. NULL contexts are guaranteed to always be globally blocking. */ PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *dctx) { PetscDeviceContext gctx; PetscDevice gdev = nullptr; PetscFunctionBegin; PetscAssertPointer(dctx, 1); PetscCall(PetscDeviceContextGetCurrentContext(&gctx)); PetscCall(PetscDeviceContextGetDevice(gctx, &gdev)); PetscCall(PetscDeviceContextGetNullContextForDevice_Private(gctx->usersetdevice, gdev, dctx)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextSetFromOptions - Configure a `PetscDeviceContext` from the options database Collective on `comm` or `dctx` Input Parameters: + comm - MPI communicator on which to query the options database (optional) - dctx - The `PetscDeviceContext` to configure Output Parameter: . dctx - The `PetscDeviceContext` Options Database Keys: + -device_context_stream_type - type of stream to create inside the `PetscDeviceContext` - `PetscDeviceContextSetStreamType()` - -device_context_device_type - the type of `PetscDevice` to attach by default - `PetscDeviceType` Level: beginner Note: The user may pass `MPI_COMM_NULL` for `comm` in which case the communicator of `dctx` is used (which is always `PETSC_COMM_SELF`). .seealso: `PetscDeviceContextSetStreamType()`, `PetscDeviceContextSetDevice()`, `PetscDeviceContextView()` @*/ PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm comm, PetscDeviceContext dctx) { const auto pobj = PetscObjectCast(dctx); auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE); auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE); MPI_Comm old_comm = PETSC_COMM_SELF; PetscFunctionBegin; // do not user getoptionalnullcontext here, the user is not allowed to set it from options! PetscValidDeviceContext(dctx, 2); /* set the device type first */ if (const auto device = dctx->device) PetscCall(PetscDeviceGetType(device, &dtype.first)); PetscCall(PetscDeviceContextGetStreamType(dctx, &stype.first)); if (comm == MPI_COMM_NULL) { PetscCall(PetscObjectGetComm(pobj, &comm)); } else { // briefly set the communicator for dctx (it is always PETSC_COMM_SELF) so // PetscObjectOptionsBegin() behaves as if dctx had comm old_comm = Petsc::util::exchange(pobj->comm, comm); } PetscObjectOptionsBegin(pobj); PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype)); PetscOptionsEnd(); // reset the comm (should be PETSC_COMM_SELF) if (comm != MPI_COMM_NULL) pobj->comm = old_comm; if (dtype.second) PetscCall(PetscDeviceContextSetDefaultDeviceForType_Internal(dctx, dtype.first)); if (stype.second) PetscCall(PetscDeviceContextSetStreamType(dctx, stype.first)); PetscCall(PetscDeviceContextSetUp(dctx)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ PetscDeviceContextView - View a `PetscDeviceContext` Collective on `viewer` Input Parameters: + dctx - The `PetscDeviceContext` - viewer - The `PetscViewer` to view `dctx` with (may be `NULL`) Level: beginner Note: If `viewer` is `NULL`, `PETSC_VIEWER_STDOUT_WORLD` is used instead, in which case this routine is collective on `PETSC_COMM_WORLD`. .seealso: `PetscDeviceContextViewFromOptions()`, `PetscDeviceView()`, `PETSC_VIEWER_STDOUT_WORLD`, `PetscDeviceContextCreate()` @*/ PetscErrorCode PetscDeviceContextView(PetscDeviceContext dctx, PetscViewer viewer) { PetscBool isascii; PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer)); PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &isascii)); if (isascii) { auto stype = PETSC_STREAM_DEFAULT; PetscViewer sub; PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub)); PetscCall(PetscObjectPrintClassNamePrefixType(PetscObjectCast(dctx), sub)); PetscCall(PetscViewerASCIIPushTab(sub)); PetscCall(PetscDeviceContextGetStreamType(dctx, &stype)); PetscCall(PetscViewerASCIIPrintf(sub, "stream type: %s\n", PetscStreamTypes[stype])); PetscCall(PetscViewerASCIIPrintf(sub, "children: %" PetscInt_FMT "\n", dctx->numChildren)); if (const auto nchild = dctx->numChildren) { PetscCall(PetscViewerASCIIPushTab(sub)); for (PetscInt i = 0; i < nchild; ++i) { if (i == nchild - 1) { PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT, dctx->childIDs[i])); } else { PetscCall(PetscViewerASCIIPrintf(sub, "%" PetscInt64_FMT ", ", dctx->childIDs[i])); } } } PetscCall(PetscViewerASCIIPopTab(sub)); PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub)); PetscCall(PetscViewerASCIIPushTab(viewer)); } if (const auto device = dctx->device) PetscCall(PetscDeviceView(device, viewer)); if (isascii) PetscCall(PetscViewerASCIIPopTab(viewer)); PetscFunctionReturn(PETSC_SUCCESS); } /*@ PetscDeviceContextViewFromOptions - View a `PetscDeviceContext` from options Input Parameters: + dctx - The `PetscDeviceContext` to view . obj - Optional `PetscObject` to associate (may be `NULL`) - name - The command line option Level: beginner .seealso: `PetscDeviceContextView()`, `PetscObjectViewFromOptions()`, `PetscDeviceContextCreate()` @*/ PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext dctx, PetscObject obj, const char name[]) { PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); if (obj) PetscValidHeader(obj, 2); PetscAssertPointer(name, 3); PetscCall(PetscObjectViewFromOptions(PetscObjectCast(dctx), obj, name)); PetscFunctionReturn(PETSC_SUCCESS); } /*@C PetscDeviceContextGetStreamHandle - Return a handle to the underlying stream of the current device context Input Parameter: . dctx - The `PetscDeviceContext` to get the stream from Output Parameter: . handle - A pointer to the handle to the stream Level: developer Note: This routine is dangerous. It exists only for the most experienced users and internal PETSc development. There is no way for PETSc's auto-dependency system to track what the caller does with the stream. If the user uses the stream to copy memory that was previously modified by PETSc, or launches kernels that modify memory with the stream, it is the users responsibility to inform PETSc of their actions via `PetscDeviceContextMarkIntentFromID()`. Failure to do so may introduce a race condition. This race condition may manifest in nondeterministic ways. Alternatively, the user may synchronize the stream immediately before and after use. This is the safest option. Example Usage: .vb PetscDeviceContext dctx; PetscDeviceType type; void *handle; PetscDeviceContextGetCurrentContext(&dctx); PetscDeviceContextGetStreamHandle(dctx, &handle); PetscDeviceContextGetDeviceType(dctx, &type); if (type == PETSC_DEVICE_CUDA) { cudaStream_t stream = *(cudaStream_t *)handle; my_cuda_kernel<<<1, 2, 3, stream>>>(); } .ve Alternatively, if type of `PetscDeviceContext` is known (for example `PETSC_DEVICE_HIP`), the user may pass in a pointer to stream handle directly\: .vb hipStream_t *stream; // note the cast to void ** PetscDeviceContextGetStreamHandle(dctx, (void **)&stream); // note the dereference my_hip_kernel<<<1, 2, 3, *stream>>>(); .ve .N ASYNC_API .seealso: `PetscDeviceContext` @*/ PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext dctx, void **handle) { PetscFunctionBegin; PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); PetscAssertPointer(handle, 2); PetscCall(PetscDeviceContextGetStreamHandle_Internal(dctx, handle)); PetscFunctionReturn(PETSC_SUCCESS); }