1 #pragma once
2
3 #include <petsc/private/cupminterface.hpp>
4 #include <petsc/private/cpp/memory.hpp>
5 #include <petsc/private/cpp/object_pool.hpp>
6
7 #include <stack>
8
9 namespace Petsc
10 {
11
12 namespace device
13 {
14
15 namespace cupm
16 {
17
18 // A pool for allocating cupmEvent_t's. While events are generally very cheap to create and
19 // destroy, they are not free. Using the pool vs on-demand creation and destruction yields a ~20%
20 // speedup.
21 template <DeviceType T, unsigned long flags>
22 class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL CUPMEventPool : impl::Interface<T>, public RegisterFinalizeable<CUPMEventPool<T, flags>> {
23 public:
24 PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
25
26 PetscErrorCode allocate(cupmEvent_t *) noexcept;
27 PetscErrorCode deallocate(cupmEvent_t *) noexcept;
28
29 PetscErrorCode finalize_() noexcept;
30
31 private:
32 std::stack<cupmEvent_t> pool_;
33 };
34
35 template <DeviceType T, unsigned long flags>
finalize_()36 inline PetscErrorCode CUPMEventPool<T, flags>::finalize_() noexcept
37 {
38 PetscFunctionBegin;
39 while (!pool_.empty()) {
40 PetscCallCUPM(cupmEventDestroy(std::move(pool_.top())));
41 PetscCallCXX(pool_.pop());
42 }
43 PetscFunctionReturn(PETSC_SUCCESS);
44 }
45
46 template <DeviceType T, unsigned long flags>
allocate(cupmEvent_t * event)47 inline PetscErrorCode CUPMEventPool<T, flags>::allocate(cupmEvent_t *event) noexcept
48 {
49 PetscFunctionBegin;
50 PetscAssertPointer(event, 1);
51 if (pool_.empty()) {
52 PetscCall(this->register_finalize());
53 PetscCallCUPM(cupmEventCreateWithFlags(event, (unsigned int)flags));
54 } else {
55 PetscCallCXX(*event = std::move(pool_.top()));
56 PetscCallCXX(pool_.pop());
57 }
58 PetscFunctionReturn(PETSC_SUCCESS);
59 }
60
61 template <DeviceType T, unsigned long flags>
deallocate(cupmEvent_t * in_event)62 inline PetscErrorCode CUPMEventPool<T, flags>::deallocate(cupmEvent_t *in_event) noexcept
63 {
64 PetscFunctionBegin;
65 PetscAssertPointer(in_event, 1);
66 if (auto event = std::exchange(*in_event, cupmEvent_t{})) {
67 if (this->registered()) {
68 PetscCallCXX(pool_.push(std::move(event)));
69 } else {
70 PetscCallCUPM(cupmEventDestroy(event));
71 }
72 }
73 PetscFunctionReturn(PETSC_SUCCESS);
74 }
75
76 template <DeviceType T, unsigned long flags>
cupm_event_pool()77 CUPMEventPool<T, flags> &cupm_event_pool() noexcept
78 {
79 static CUPMEventPool<T, flags> pool;
80 return pool;
81 }
82
83 // pool of events with timing disabled
84 template <DeviceType T>
cupm_fast_event_pool()85 inline auto cupm_fast_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>()) &
86 {
87 return cupm_event_pool<T, impl::Interface<T>::cupmEventDisableTiming>();
88 }
89
90 // pool of events with timing enabled
91 template <DeviceType T>
cupm_timer_event_pool()92 inline auto cupm_timer_event_pool() noexcept -> decltype(cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>()) &
93 {
94 return cupm_event_pool<T, impl::Interface<T>::cupmEventDefault>();
95 }
96
97 // A simple wrapper of cupmEvent_t. This is used in conjunction with CUPMStream to build the
98 // event-stream pairing for the async allocator. It is also used as the data member of
99 // PetscEvent.
100 template <DeviceType T>
101 class PETSC_SINGLE_LIBRARY_VISIBILITY_INTERNAL CUPMEvent : impl::Interface<T>, public memory::PoolAllocated {
102 using pool_type = memory::PoolAllocated;
103
104 public:
105 PETSC_CUPM_INHERIT_INTERFACE_TYPEDEFS_USING(T);
106
107 constexpr CUPMEvent() noexcept = default;
108 ~CUPMEvent() noexcept;
109
110 CUPMEvent(CUPMEvent &&) noexcept;
111 CUPMEvent &operator=(CUPMEvent &&) noexcept;
112
113 // event is not copyable
114 CUPMEvent(const CUPMEvent &) = delete;
115 CUPMEvent &operator=(const CUPMEvent &) = delete;
116
117 PETSC_NODISCARD cupmEvent_t get() noexcept;
118 PetscErrorCode record(cupmStream_t) noexcept;
119
120 explicit operator bool() const noexcept;
121
122 private:
123 cupmEvent_t event_{};
124 };
125
126 template <DeviceType T>
~CUPMEvent()127 inline CUPMEvent<T>::~CUPMEvent() noexcept
128 {
129 PetscFunctionBegin;
130 PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
131 PetscFunctionReturnVoid();
132 }
133
134 template <DeviceType T>
CUPMEvent(CUPMEvent && other)135 inline CUPMEvent<T>::CUPMEvent(CUPMEvent &&other) noexcept : pool_type(std::move(other)), event_(util::exchange(other.event_, cupmEvent_t{}))
136 {
137 static_assert(std::is_empty<impl::Interface<T>>::value, "");
138 }
139
140 template <DeviceType T>
operator =(CUPMEvent && other)141 inline CUPMEvent<T> &CUPMEvent<T>::operator=(CUPMEvent &&other) noexcept
142 {
143 PetscFunctionBegin;
144 if (this != &other) {
145 pool_type::operator=(std::move(other));
146 PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().deallocate(&event_));
147 event_ = util::exchange(other.event_, cupmEvent_t{});
148 }
149 PetscFunctionReturn(*this);
150 }
151
152 template <DeviceType T>
get()153 inline typename CUPMEvent<T>::cupmEvent_t CUPMEvent<T>::get() noexcept
154 {
155 PetscFunctionBegin;
156 if (PetscUnlikely(!event_)) PetscCallAbort(PETSC_COMM_SELF, cupm_fast_event_pool<T>().allocate(&event_));
157 PetscFunctionReturn(event_);
158 }
159
160 template <DeviceType T>
record(cupmStream_t stream)161 inline PetscErrorCode CUPMEvent<T>::record(cupmStream_t stream) noexcept
162 {
163 PetscFunctionBegin;
164 PetscCallCUPM(cupmEventRecord(get(), stream));
165 PetscFunctionReturn(PETSC_SUCCESS);
166 }
167
168 template <DeviceType T>
operator bool() const169 inline CUPMEvent<T>::operator bool() const noexcept
170 {
171 return event_ != cupmEvent_t{};
172 }
173
174 } // namespace cupm
175
176 } // namespace device
177
178 } // namespace Petsc
179