#pragma once #include #include "../segmentedmempool.hpp" #include "cupmthrustutility.hpp" #include #include #include // std::numeric_limits namespace Petsc { namespace device { namespace cupm { // ========================================================================================== // CUPM Host Allocator // ========================================================================================== template class HostAllocator; // Allocator class to allocate pinned host memory for use with device template class HostAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase, impl::Interface { public: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T); using base_type = memory::impl::SegmentedMemoryPoolAllocatorBase; using real_value_type = typename base_type::real_value_type; using size_type = typename base_type::size_type; using value_type = typename base_type::value_type; template static PetscErrorCode allocate(value_type **, size_type, const StreamBase *) noexcept; template static PetscErrorCode deallocate(value_type *, const StreamBase *) noexcept; template static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase *) noexcept; }; template template inline PetscErrorCode HostAllocator::allocate(value_type **ptr, size_type n, const StreamBase *) noexcept { PetscFunctionBegin; PetscCall(PetscCUPMMallocHost(ptr, n)); PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode HostAllocator::deallocate(value_type *ptr, const StreamBase *) noexcept { PetscFunctionBegin; PetscCallCUPM(cupmFreeHost(ptr)); PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode HostAllocator::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase *stream) noexcept { PetscFunctionBegin; PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyHostToHost, stream->get_stream(), true)); PetscFunctionReturn(PETSC_SUCCESS); } // ========================================================================================== // CUPM Device Allocator // ========================================================================================== template class DeviceAllocator; template class DeviceAllocator : public memory::impl::SegmentedMemoryPoolAllocatorBase, impl::Interface { public: PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T); using base_type = memory::impl::SegmentedMemoryPoolAllocatorBase; using real_value_type = typename base_type::real_value_type; using size_type = typename base_type::size_type; using value_type = typename base_type::value_type; template static PetscErrorCode allocate(value_type **, size_type, const StreamBase *) noexcept; template static PetscErrorCode deallocate(value_type *, const StreamBase *) noexcept; template static PetscErrorCode zero(value_type *, size_type, const StreamBase *) noexcept; template static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const StreamBase *) noexcept; template static PetscErrorCode set_canary(value_type *, size_type, const StreamBase *) noexcept; }; template template inline PetscErrorCode DeviceAllocator::allocate(value_type **ptr, size_type n, const StreamBase *stream) noexcept { PetscFunctionBegin; PetscCall(PetscCUPMMallocAsync(ptr, n, stream->get_stream())); PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode DeviceAllocator::deallocate(value_type *ptr, const StreamBase *stream) noexcept { PetscFunctionBegin; PetscCallCUPM(cupmFreeAsync(ptr, stream->get_stream())); PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode DeviceAllocator::zero(value_type *ptr, size_type n, const StreamBase *stream) noexcept { PetscFunctionBegin; PetscCall(PetscCUPMMemsetAsync(ptr, 0, n, stream->get_stream(), true)); PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode DeviceAllocator::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const StreamBase *stream) noexcept { PetscFunctionBegin; PetscCall(PetscCUPMMemcpyAsync(dest, src, n, cupmMemcpyDeviceToDevice, stream->get_stream(), true)); PetscFunctionReturn(PETSC_SUCCESS); } template template inline PetscErrorCode DeviceAllocator::set_canary(value_type *ptr, size_type n, const StreamBase *stream) noexcept { using limit_t = std::numeric_limits; const value_type canary = limit_t::has_signaling_NaN ? limit_t::signaling_NaN() : limit_t::max(); const auto xptr = thrust::device_pointer_cast(ptr); PetscFunctionBegin; PetscCallThrust(THRUST_CALL(thrust::fill, stream->get_stream(), xptr, xptr + n, canary)); PetscFunctionReturn(PETSC_SUCCESS); } } // namespace cupm } // namespace device } // namespace Petsc