1 #ifndef PETSC_CUPMEVENT_HPP 2 #define PETSC_CUPMEVENT_HPP 3 4 #include <petsc/private/cupminterface.hpp> 5 #include <petsc/private/cpp/memory.hpp> 6 #include <petsc/private/cpp/object_pool.hpp> 7 8 #include <stack> 9 10 namespace Petsc 11 { 12 13 namespace device 14 { 15 16 namespace cupm 17 { 18 19 // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and 20 // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20% 21 // speedup. 22 template <DeviceType T, unsigned long flags> 23 class CUPMEventPool : impl::Interface<T>, public RegisterFinalizeable<CUPMEventPool<T, flags>> { 24 public: 25 PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T); 26 27 PetscErrorCode allocate(cupmEvent_t *) noexcept; 28 PetscErrorCode deallocate(cupmEvent_t *) noexcept; 29 30 PetscErrorCode finalize_() noexcept; 31 32 private: 33 std::stack<cupmEvent_t> pool_; 34 }; 35 36 template <DeviceType T, unsigned long flags> 37 inline PetscErrorCode CUPMEventPool<T, flags>::finalize_() noexcept 38 { 39 PetscFunctionBegin; 40 while (!pool_.empty()) { 41 PetscCallCUPM(cupmEventDestroy(std::move(pool_.top()))); 42 PetscCallCXX(pool_.pop()); 43 } 44 PetscFunctionReturn(PETSC_SUCCESS); 45 } 46 47 template <DeviceType T, unsigned long flags> 48 inline PetscErrorCode CUPMEventPool<T, flags>::allocate(cupmEvent_t *event) noexcept 49 { 50 PetscFunctionBegin; 51 PetscAssertPointer(event, 1); 52 if (pool_.empty()) { 53 PetscCall(this->register_finalize()); 54 PetscCallCUPM(cupmEventCreateWithFlags(event, flags)); 55 } else { 56 PetscCallCXX(*event = std::move(pool_.top())); 57 PetscCallCXX(pool_.pop()); 58 } 59 PetscFunctionReturn(PETSC_SUCCESS); 60 } 61 62 template <DeviceType T, unsigned long flags> 63 inline PetscErrorCode CUPMEventPool<T, flags>::deallocate(cupmEvent_t *in_event) noexcept 64 { 65 PetscFunctionBegin; 66 PetscAssertPointer(in_event, 1); 67 if (auto event = std::exchange(*in_event, cupmEvent_t{})) { 68 if (this->registered()) { 69 PetscCallCXX(pool_.push(std::move(event))); 70 } else { 71 PetscCallCUPM(cupmEventDestroy(event)); 72 } 73 } 74 PetscFunctionReturn(PETSC_SUCCESS); 75 } 76 77 template <DeviceType T, unsigned long flags> 78 CUPMEventPool<T, flags> &cupm_event_pool() noexcept 79 { 80 static CUPMEventPool<T, flags> pool; 81 return pool; 82 } 83 84 // pool of events with timing disabled 85 template <DeviceType T> 86 inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) & 87 { 88 return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>(); 89 } 90 91 // pool of events with timing enabled 92 template <DeviceType T> 93 inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) & 94 { 95 return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>(); 96 } 97 98 // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the 99 // event-stream pairing for the async allocator. It is also used as the data member of 100 // PetscEvent. 101 template <DeviceType T> 102 class CUPMEvent : impl::Interface<T>, public memory::PoolAllocated { 103 using pool_type = memory::PoolAllocated; 104 105 public: 106 PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T); 107 108 constexpr CUPMEvent() noexcept = default; 109 ~CUPMEvent() noexcept; 110 111 CUPMEvent(CUPMEvent &&) noexcept; 112 CUPMEvent &operator=(CUPMEvent &&) noexcept; 113 114 // event is not copyable 115 CUPMEvent(const CUPMEvent &) = delete; 116 CUPMEvent &operator=(const CUPMEvent &) = delete; 117 118 PETSC_NODISCARD cupmEvent_t get() noexcept; 119 PetscErrorCode record(cupmStream_t) noexcept; 120 121 explicit operator bool() const noexcept; 122 123 private: 124 cupmEvent_t event_{}; 125 }; 126 127 template <DeviceType T> 128 inline CUPMEvent<T>::~CUPMEvent() noexcept 129 { 130 PetscFunctionBegin; 131 PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_)); 132 PetscFunctionReturnVoid(); 133 } 134 135 template <DeviceType T> 136 inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{})) 137 { 138 static_assert(std::is_empty<impl::Interface<T>>::value, ""); 139 } 140 141 template <DeviceType T> 142 inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept 143 { 144 PetscFunctionBegin; 145 if (this != &other) { 146 pool_type::operator=(std::move(other)); 147 PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_)); 148 event_ = util::exchange(other.event_, cupmEvent_t{}); 149 } 150 PetscFunctionReturn(*this); 151 } 152 153 template <DeviceType T> 154 inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept 155 { 156 PetscFunctionBegin; 157 if (PetscUnlikely(!event_)) PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_)); 158 PetscFunctionReturn(event_); 159 } 160 161 template <DeviceType T> 162 inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept 163 { 164 PetscFunctionBegin; 165 PetscCallCUPM(cupmEventRecord(get(), stream)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 template <DeviceType T> 170 inline CUPMEvent<T>::operator bool() const noexcept 171 { 172 return event_ != cupmEvent_t{}; 173 } 174 175 } // namespace cupm 176 177 } // namespace device 178 179 } // namespace Petsc 180 181 #endif // PETSC_CUPMEVENT_HPP 182