1 #pragma once 2 3 #include <petsc/private/cupminterface.hpp> 4 #include <petsc/private/cpp/memory.hpp> 5 #include <petsc/private/cpp/object_pool.hpp> 6 7 #include <stack> 8 9 namespace Petsc 10 { 11 12 namespace device 13 { 14 15 namespace cupm 16 { 17 18 // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and 19 // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20% 20 // speedup. 21 template <DeviceType T, unsigned long flags> 22 class CUPMEventPool : impl::Interface<T>, public RegisterFinalizeable<CUPMEventPool<T, flags>> { 23 public: 24 PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T); 25 26 PetscErrorCode allocate(cupmEvent_t *) noexcept; 27 PetscErrorCode deallocate(cupmEvent_t *) noexcept; 28 29 PetscErrorCode finalize_() noexcept; 30 31 private: 32 std::stack<cupmEvent_t> pool_; 33 }; 34 35 template <DeviceType T, unsigned long flags> 36 inline PetscErrorCode CUPMEventPool<T, flags>::finalize_() noexcept 37 { 38 PetscFunctionBegin; 39 while (!pool_.empty()) { 40 PetscCallCUPM(cupmEventDestroy(std::move(pool_.top()))); 41 PetscCallCXX(pool_.pop()); 42 } 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 template <DeviceType T, unsigned long flags> 47 inline PetscErrorCode CUPMEventPool<T, flags>::allocate(cupmEvent_t *event) noexcept 48 { 49 PetscFunctionBegin; 50 PetscAssertPointer(event, 1); 51 if (pool_.empty()) { 52 PetscCall(this->register_finalize()); 53 PetscCallCUPM(cupmEventCreateWithFlags(event, flags)); 54 } else { 55 PetscCallCXX(*event = std::move(pool_.top())); 56 PetscCallCXX(pool_.pop()); 57 } 58 PetscFunctionReturn(PETSC_SUCCESS); 59 } 60 61 template <DeviceType T, unsigned long flags> 62 inline PetscErrorCode CUPMEventPool<T, flags>::deallocate(cupmEvent_t *in_event) noexcept 63 { 64 PetscFunctionBegin; 65 PetscAssertPointer(in_event, 1); 66 if (auto event = std::exchange(*in_event, cupmEvent_t{})) { 67 if (this->registered()) { 68 PetscCallCXX(pool_.push(std::move(event))); 69 } else { 70 PetscCallCUPM(cupmEventDestroy(event)); 71 } 72 } 73 PetscFunctionReturn(PETSC_SUCCESS); 74 } 75 76 template <DeviceType T, unsigned long flags> 77 CUPMEventPool<T, flags> &cupm_event_pool() noexcept 78 { 79 static CUPMEventPool<T, flags> pool; 80 return pool; 81 } 82 83 // pool of events with timing disabled 84 template <DeviceType T> 85 inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) & 86 { 87 return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>(); 88 } 89 90 // pool of events with timing enabled 91 template <DeviceType T> 92 inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) & 93 { 94 return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>(); 95 } 96 97 // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the 98 // event-stream pairing for the async allocator. It is also used as the data member of 99 // PetscEvent. 100 template <DeviceType T> 101 class CUPMEvent : impl::Interface<T>, public memory::PoolAllocated { 102 using pool_type = memory::PoolAllocated; 103 104 public: 105 PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T); 106 107 constexpr CUPMEvent() noexcept = default; 108 ~CUPMEvent() noexcept; 109 110 CUPMEvent(CUPMEvent &&) noexcept; 111 CUPMEvent &operator=(CUPMEvent &&) noexcept; 112 113 // event is not copyable 114 CUPMEvent(const CUPMEvent &) = delete; 115 CUPMEvent &operator=(const CUPMEvent &) = delete; 116 117 PETSC_NODISCARD cupmEvent_t get() noexcept; 118 PetscErrorCode record(cupmStream_t) noexcept; 119 120 explicit operator bool() const noexcept; 121 122 private: 123 cupmEvent_t event_{}; 124 }; 125 126 template <DeviceType T> 127 inline CUPMEvent<T>::~CUPMEvent() noexcept 128 { 129 PetscFunctionBegin; 130 PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_)); 131 PetscFunctionReturnVoid(); 132 } 133 134 template <DeviceType T> 135 inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{})) 136 { 137 static_assert(std::is_empty<impl::Interface<T>>::value, ""); 138 } 139 140 template <DeviceType T> 141 inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept 142 { 143 PetscFunctionBegin; 144 if (this != &other) { 145 pool_type::operator=(std::move(other)); 146 PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_)); 147 event_ = util::exchange(other.event_, cupmEvent_t{}); 148 } 149 PetscFunctionReturn(*this); 150 } 151 152 template <DeviceType T> 153 inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept 154 { 155 PetscFunctionBegin; 156 if (PetscUnlikely(!event_)) PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_)); 157 PetscFunctionReturn(event_); 158 } 159 160 template <DeviceType T> 161 inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept 162 { 163 PetscFunctionBegin; 164 PetscCallCUPM(cupmEventRecord(get(), stream)); 165 PetscFunctionReturn(PETSC_SUCCESS); 166 } 167 168 template <DeviceType T> 169 inline CUPMEvent<T>::operator bool() const noexcept 170 { 171 return event_ != cupmEvent_t{}; 172 } 173 174 } // namespace cupm 175 176 } // namespace device 177 178 } // namespace Petsc 179