xref: /petsc/src/sys/objects/device/impls/cupm/cupmevent.hpp (revision 66af8762ec03dbef0e079729eb2a1734a35ed7ff)
1 #pragma once
2 
3 #include <petsc/private/cupminterface.hpp>
4 #include <petsc/private/cpp/memory.hpp>
5 #include <petsc/private/cpp/object_pool.hpp>
6 
7 #include <stack>
8 
9 namespace Petsc
10 {
11 
12 namespace device
13 {
14 
15 namespace cupm
16 {
17 
18 // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and
19 // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20%
20 // speedup.
21 template <DeviceType T, unsigned long flags>
22 class CUPMEventPool : impl::Interface<T>, public RegisterFinalizeable<CUPMEventPool<T, flags>> {
23 public:
24   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
25 
26   PetscErrorCode allocate(cupmEvent_t *) noexcept;
27   PetscErrorCode deallocate(cupmEvent_t *) noexcept;
28 
29   PetscErrorCode finalize_() noexcept;
30 
31 private:
32   std::stack<cupmEvent_t> pool_;
33 };
34 
35 template <DeviceType T, unsigned long flags>
36 inline PetscErrorCode CUPMEventPool<T, flags>::finalize_() noexcept
37 {
38   PetscFunctionBegin;
39   while (!pool_.empty()) {
40     PetscCallCUPM(cupmEventDestroy(std::move(pool_.top())));
41     PetscCallCXX(pool_.pop());
42   }
43   PetscFunctionReturn(PETSC_SUCCESS);
44 }
45 
46 template <DeviceType T, unsigned long flags>
47 inline PetscErrorCode CUPMEventPool<T, flags>::allocate(cupmEvent_t *event) noexcept
48 {
49   PetscFunctionBegin;
50   PetscAssertPointer(event, 1);
51   if (pool_.empty()) {
52     PetscCall(this->register_finalize());
53     PetscCallCUPM(cupmEventCreateWithFlags(event, flags));
54   } else {
55     PetscCallCXX(*event = std::move(pool_.top()));
56     PetscCallCXX(pool_.pop());
57   }
58   PetscFunctionReturn(PETSC_SUCCESS);
59 }
60 
61 template <DeviceType T, unsigned long flags>
62 inline PetscErrorCode CUPMEventPool<T, flags>::deallocate(cupmEvent_t *in_event) noexcept
63 {
64   PetscFunctionBegin;
65   PetscAssertPointer(in_event, 1);
66   if (auto event = std::exchange(*in_event, cupmEvent_t{})) {
67     if (this->registered()) {
68       PetscCallCXX(pool_.push(std::move(event)));
69     } else {
70       PetscCallCUPM(cupmEventDestroy(event));
71     }
72   }
73   PetscFunctionReturn(PETSC_SUCCESS);
74 }
75 
76 template <DeviceType T, unsigned long flags>
77 CUPMEventPool<T, flags> &cupm_event_pool() noexcept
78 {
79   static CUPMEventPool<T, flags> pool;
80   return pool;
81 }
82 
83 // pool of events with timing disabled
84 template <DeviceType T>
85 inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) &
86 {
87   return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>();
88 }
89 
90 // pool of events with timing enabled
91 template <DeviceType T>
92 inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) &
93 {
94   return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>();
95 }
96 
97 // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the
98 // event-stream pairing for the async allocator. It is also used as the data member of
99 // PetscEvent.
100 template <DeviceType T>
101 class CUPMEvent : impl::Interface<T>, public memory::PoolAllocated {
102   using pool_type = memory::PoolAllocated;
103 
104 public:
105   PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
106 
107   constexpr CUPMEvent() noexcept = default;
108   ~CUPMEvent() noexcept;
109 
110   CUPMEvent(CUPMEvent &&) noexcept;
111   CUPMEvent &operator=(CUPMEvent &&) noexcept;
112 
113   // event is not copyable
114   CUPMEvent(const CUPMEvent &)            = delete;
115   CUPMEvent &operator=(const CUPMEvent &) = delete;
116 
117   PETSC_NODISCARD cupmEvent_t get() noexcept;
118   PetscErrorCode              record(cupmStream_t) noexcept;
119 
120   explicit operator bool() const noexcept;
121 
122 private:
123   cupmEvent_t event_{};
124 };
125 
126 template <DeviceType T>
127 inline CUPMEvent<T>::~CUPMEvent() noexcept
128 {
129   PetscFunctionBegin;
130   PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
131   PetscFunctionReturnVoid();
132 }
133 
134 template <DeviceType T>
135 inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{}))
136 {
137   static_assert(std::is_empty<impl::Interface<T>>::value, "");
138 }
139 
140 template <DeviceType T>
141 inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept
142 {
143   PetscFunctionBegin;
144   if (this != &other) {
145     pool_type::operator=(std::move(other));
146     PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
147     event_ = util::exchange(other.event_, cupmEvent_t{});
148   }
149   PetscFunctionReturn(*this);
150 }
151 
152 template <DeviceType T>
153 inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept
154 {
155   PetscFunctionBegin;
156   if (PetscUnlikely(!event_)) PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_));
157   PetscFunctionReturn(event_);
158 }
159 
160 template <DeviceType T>
161 inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept
162 {
163   PetscFunctionBegin;
164   PetscCallCUPM(cupmEventRecord(get(), stream));
165   PetscFunctionReturn(PETSC_SUCCESS);
166 }
167 
168 template <DeviceType T>
169 inline CUPMEvent<T>::operator bool() const noexcept
170 {
171   return event_ != cupmEvent_t{};
172 }
173 
174 } // namespace cupm
175 
176 } // namespace device
177 
178 } // namespace Petsc
179