xref: /petsc/src/sys/objects/device/impls/segmentedmempool.hpp (revision 6016107252cfa03568230349a14202a384fbf0c0)
1 #ifndef PETSC_SEGMENTEDMEMPOOL_HPP
2 #define PETSC_SEGMENTEDMEMPOOL_HPP
3 
4 #include <petsc/private/deviceimpl.h>
5 #include <petsc/private/cpp/macros.hpp>
6 #include <petsc/private/cpp/type_traits.hpp>
7 #include <petsc/private/cpp/utility.hpp>
8 #include <petsc/private/cpp/register_finalize.hpp>
9 
10 #include <limits>
11 #include <deque>
12 #include <vector>
13 
14 namespace Petsc {
15 
16 namespace device {
17 
18 template <typename T>
19 class StreamBase {
20 public:
21   using id_type      = int;
22   using derived_type = T;
23 
24   static const id_type INVALID_ID;
25 
26   // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion
27   template <typename U = T>
28   PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_());
29 
30   PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); }
31 
32   template <typename E>
33   PETSC_NODISCARD PetscErrorCode record_event(E &&event) const noexcept {
34     return static_cast<const T &>(*this).record_event_(std::forward<E>(event));
35   }
36 
37   template <typename E>
38   PETSC_NODISCARD PetscErrorCode wait_for_event(E &&event) const noexcept {
39     return static_cast<const T &>(*this).wait_for_(std::forward<E>(event));
40   }
41 
42 protected:
43   constexpr StreamBase() noexcept = default;
44 
45   struct default_event_type { };
46   using default_stream_type = std::nullptr_t;
47 
48   PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; }
49 
50   PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; }
51 
52   template <typename U = T>
53   PETSC_NODISCARD static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept {
54     return 0;
55   }
56 
57   template <typename U = T>
58   PETSC_NODISCARD static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept {
59     return 0;
60   }
61 };
62 
63 template <typename T>
64 const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1;
65 
66 struct DefaultStream : StreamBase<DefaultStream> {
67   using stream_type = typename StreamBase<DefaultStream>::default_stream_type;
68   using id_type     = typename StreamBase<DefaultStream>::id_type;
69   using event_type  = typename StreamBase<DefaultStream>::default_event_type;
70 };
71 
72 } // namespace device
73 
74 namespace memory {
75 
76 namespace impl {
77 
78 // ==========================================================================================
79 // MemoryChunk
80 //
81 // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning
82 // MemoryBlock and its size/capacity
83 // ==========================================================================================
84 
85 template <typename EventType>
86 class MemoryChunk {
87 public:
88   using event_type = EventType;
89   using size_type  = std::size_t;
90 
91   MemoryChunk(size_type, size_type) noexcept;
92   explicit MemoryChunk(size_type) noexcept;
93 
94   MemoryChunk(MemoryChunk &&) noexcept;
95   MemoryChunk &operator=(MemoryChunk &&) noexcept;
96 
97   MemoryChunk(const MemoryChunk &) noexcept            = delete;
98   MemoryChunk &operator=(const MemoryChunk &) noexcept = delete;
99 
100   PETSC_NODISCARD size_type start() const noexcept { return start_; }
101   PETSC_NODISCARD size_type size() const noexcept { return size_; }
102   // REVIEW ME:
103   // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in
104   // theory only the last chunk needs to do this
105   PETSC_NODISCARD size_type capacity() const noexcept { return size_; }
106   PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); }
107 
108   template <typename U>
109   PETSC_NODISCARD PetscErrorCode release(const device::StreamBase<U> *) noexcept;
110   template <typename U>
111   PETSC_NODISCARD PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept;
112   template <typename U>
113   PETSC_NODISCARD bool           can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept;
114   PETSC_NODISCARD PetscErrorCode resize(size_type) noexcept;
115 
116 private:
117   // clang-format off
118   event_type      event_{};          // event recorded when the chunk was released
119   bool            open_      = true; // is this chunk open?
120   // id of the last stream to use the chunk, populated on release
121   int             stream_id_ = device::DefaultStream::INVALID_ID;
122   size_type       size_      = 0;    // size of the chunk
123   const size_type start_     = 0;    // offset from the start of the owning block
124 
125   // clang-format on
126   template <typename U>
127   PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept;
128 };
129 
130 // ==========================================================================================
131 // MemoryChunk - Private API
132 // ==========================================================================================
133 
134 // asks and answers the question: can this stream claim this chunk without serializing?
135 template <typename E>
136 template <typename U>
137 inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept {
138   return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id());
139 }
140 
141 // ==========================================================================================
142 // MemoryChunk - Public API
143 // ==========================================================================================
144 
145 template <typename E>
146 inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start) { }
147 
148 template <typename E>
149 inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size) { }
150 
151 template <typename E>
152 inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept :
153   event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_)) { }
154 
155 template <typename E>
156 inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept {
157   PetscFunctionBegin;
158   if (this != &other) {
159     event_     = std::move(other.event_);
160     open_      = util::exchange(other.open_, false);
161     stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID);
162     size_      = util::exchange(other.size_, 0);
163     start_     = std::move(other.start_);
164   }
165   PetscFunctionReturn(*this);
166 }
167 
168 /*
169   MemoryChunk::release - release a chunk on a stream
170 
171   Input Parameter:
172 . stream - the stream to release the chunk with
173 
174   Notes:
175   Inserts a release operation on stream and records the state of stream at the time this
176   routine was called.
177 
178   Future allocation requests which attempt to claim the chunk on the same stream may re-acquire
179   the chunk without serialization.
180 
181   If another stream attempts to claim the chunk they must wait for the recorded event before
182   claiming the chunk.
183 */
184 template <typename E>
185 template <typename U>
186 inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept {
187   PetscFunctionBegin;
188   open_      = true;
189   stream_id_ = stream->get_id();
190   PetscCall(stream->record_event(event_));
191   PetscFunctionReturn(0);
192 }
193 
194 /*
195   MemoryChunk::claim - attempt to claim a particular chunk
196 
197   Input Parameters:
198 + stream    - the stream on which to attempt to claim
199 . req_size  - the requested size (in elements) to attempt to claim
200 - serialize - (optional, false) whether the claimant allows serialization
201 
202   Output Parameter:
203 . success - true if the chunk was claimed, false otherwise
204 */
205 template <typename E>
206 template <typename U>
207 inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept {
208   PetscFunctionBegin;
209   if ((*success = can_claim(stream, req_size, serialize))) {
210     if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_));
211     PetscCall(resize(req_size));
212     open_ = false;
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 /*
218   MemoryChunk::can_claim - test whether a particular chunk can be claimed
219 
220   Input Parameters:
221 + stream    - the stream on which to attempt to claim
222 . req_size  - the requested size (in elements) to attempt to claim
223 - serialize - whether the claimant allows serialization
224 
225   Output:
226 . [return] - true if the chunk is claimable given the configuration, false otherwise
227 */
228 template <typename E>
229 template <typename U>
230 inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept {
231   if (open_ && (req_size <= capacity())) {
232     // fully compatible
233     if (stream_compat_(stream)) return true;
234     // stream wasn't compatible, but could claim if we serialized
235     if (serialize) return true;
236     // incompatible stream and did not want to serialize
237   }
238   return false;
239 }
240 
241 /*
242   MemoryChunk::resize - grow a chunk to new size
243 
244   Input Parameter:
245 . newsize - the new size Requested
246 
247   Notes:
248   newsize cannot be larger than capacity
249 */
250 template <typename E>
251 inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept {
252   PetscFunctionBegin;
253   PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity());
254   size_ = newsize;
255   PetscFunctionReturn(0);
256 }
257 
258 // ==========================================================================================
259 // MemoryBlock
260 //
261 // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving
262 // and restoring a block is thread-safe (so may be used by multiple device streams).
263 // ==========================================================================================
264 
265 template <typename T, typename AllocatorType, typename StreamType>
266 class MemoryBlock {
267 public:
268   using value_type      = T;
269   using allocator_type  = AllocatorType;
270   using stream_type     = StreamType;
271   using event_type      = typename stream_type::event_type;
272   using chunk_type      = MemoryChunk<event_type>;
273   using size_type       = typename chunk_type::size_type;
274   using chunk_list_type = std::vector<chunk_type>;
275 
276   template <typename U>
277   MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept;
278 
279   ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value);
280 
281   MemoryBlock(MemoryBlock &&) noexcept;
282   MemoryBlock &operator=(MemoryBlock &&) noexcept;
283 
284   // memory blocks are not copyable
285   MemoryBlock(const MemoryBlock &)            = delete;
286   MemoryBlock &operator=(const MemoryBlock &) = delete;
287 
288   /* --- actual functions --- */
289   PETSC_NODISCARD PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept;
290   PETSC_NODISCARD PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept;
291   PETSC_NODISCARD PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept;
292   PETSC_NODISCARD bool           owns_pointer(const T *) const noexcept;
293 
294   PETSC_NODISCARD size_type size() const noexcept { return size_; }
295   PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); }
296   PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); }
297 
298 private:
299   value_type     *mem_{};
300   allocator_type *allocator_{};
301   size_type       size_{};
302   chunk_list_type chunks_{};
303 
304   PETSC_NODISCARD PetscErrorCode clear_(const stream_type *) noexcept;
305 };
306 
307 // ==========================================================================================
308 // MemoryBlock - Private API
309 // ==========================================================================================
310 
311 // clear the memory block, called from destructors and move assignment/construction
312 template <typename T, typename A, typename S>
313 PETSC_NODISCARD PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept {
314   PetscFunctionBegin;
315   if (PetscLikely(mem_)) {
316     PetscCall(allocator_->deallocate(mem_, stream));
317     mem_ = nullptr;
318   }
319   size_ = 0;
320   PetscCallCXX(chunks_.clear());
321   PetscFunctionReturn(0);
322 }
323 
324 // ==========================================================================================
325 // MemoryBlock - Public API
326 // ==========================================================================================
327 
328 // default constructor, allocates memory immediately
329 template <typename T, typename A, typename S>
330 template <typename U>
331 MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s) {
332   PetscFunctionBegin;
333   PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream));
334   PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s);
335   PetscFunctionReturnVoid();
336 }
337 
338 template <typename T, typename A, typename S>
339 MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value) {
340   stream_type stream;
341 
342   PetscFunctionBegin;
343   PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
344   PetscFunctionReturnVoid();
345 }
346 
347 template <typename T, typename A, typename S>
348 MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_)) { }
349 
350 template <typename T, typename A, typename S>
351 MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept {
352   PetscFunctionBegin;
353   if (this != &other) {
354     stream_type stream;
355 
356     PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
357     mem_       = util::exchange(other.mem_, nullptr);
358     allocator_ = other.allocator_;
359     size_      = util::exchange(other.size_, 0);
360     chunks_    = std::move(other.chunks_);
361   }
362   PetscFunctionReturn(*this);
363 }
364 
365 /*
366   MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise
367 */
368 template <typename T, typename A, typename S>
369 inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept {
370   // each pool is linear in memory, so it suffices to check the bounds
371   return (ptr >= mem_) && (ptr < std::next(mem_, size()));
372 }
373 
374 /*
375   MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock
376 
377   Input Parameters:
378 + req_size - the requested size of the allocation (in elements)
379 . ptr      - ptr to fill
380 - stream   - stream to fill the pointer on
381 
382   Output Parameter:
383 . success  - true if chunk was gotten, false otherwise
384 
385   Notes:
386   If the current memory could not satisfy the memory request, ptr is unchanged
387 */
388 template <typename T, typename A, typename S>
389 inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept {
390   PetscFunctionBegin;
391   *success = false;
392   if (req_size <= size()) {
393     const auto try_create_chunk = [&]() {
394       const auto was_empty     = chunks_.empty();
395       const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset();
396 
397       PetscFunctionBegin;
398       if (block_alloced + req_size <= size()) {
399         PetscCallCXX(chunks_.emplace_back(block_alloced, req_size));
400         PetscCall(chunks_.back().claim(stream, req_size, success));
401         *ptr = mem_ + block_alloced;
402         if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size());
403       }
404       PetscFunctionReturn(0);
405     };
406     const auto try_find_open_chunk = [&](bool serialize = false) {
407       PetscFunctionBegin;
408       for (auto &chunk : chunks_) {
409         PetscCall(chunk.claim(stream, req_size, success, serialize));
410         if (*success) {
411           *ptr = mem_ + chunk.start();
412           break;
413         }
414       }
415       PetscFunctionReturn(0);
416     };
417 
418     // search previously distributed chunks, but only claim one if it is on the same stream
419     // as us
420     PetscCall(try_find_open_chunk());
421 
422     // if we are here we couldn't reuse one of our own chunks so check first if the pool
423     // has room for a new one
424     if (!*success) PetscCall(try_create_chunk());
425 
426     // try pruning dead chunks off the back, note we do this regardless of whether we are
427     // successful
428     while (chunks_.back().can_claim(stream, 0, false)) {
429       PetscCallCXX(chunks_.pop_back());
430       if (chunks_.empty()) {
431         // if chunks are empty it implies we have managed to claim (and subsequently destroy)
432         // our own chunk twice! something has gone wrong
433         PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size());
434         break;
435       }
436     }
437 
438     // if previously unsuccessful see if enough space has opened up due to pruning. note that
439     // if the chunk list was emptied from the pruning this call must succeed in allocating a
440     // chunk, otherwise something is wrong
441     if (!*success) PetscCall(try_create_chunk());
442 
443     // last resort, iterate over all chunks and see if we can steal one by waiting on the
444     // current owner to finish using it
445     if (!*success) PetscCall(try_find_open_chunk(true));
446 
447     // sets memory to NaN or infinity depending on the type to catch out uninitialized memory
448     // accesses.
449     if (PetscDefined(USE_DEBUG) && *success) PetscCall(allocator_->set_canary(*ptr, req_size, stream));
450   }
451   PetscFunctionReturn(0);
452 }
453 
454 /*
455   MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock
456 
457   Input Parameters:
458 + ptr     - ptr to restore
459 - stream  - stream to restore the pointer on
460 
461   Output Parameter:
462 . success - true if chunk was restored, false otherwise
463 
464   Notes:
465   ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned
466   by this MemoryBlock then it is restored on stream. The same stream may recieve ptr again
467   without synchronization, but other streams may not do so until either serializing or the
468   stream is idle again.
469 */
470 template <typename T, typename A, typename S>
471 inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept {
472   chunk_type *chunk = nullptr;
473 
474   PetscFunctionBegin;
475   PetscCall(try_find_chunk(*ptr, &chunk));
476   if (chunk) {
477     PetscCall(chunk->release(stream));
478     *ptr     = nullptr;
479     *success = true;
480   } else {
481     *success = false;
482   }
483   PetscFunctionReturn(0);
484 }
485 
486 /*
487   MemoryBlock::try_find_chunk - try to find the chunk which owns ptr
488 
489   Input Parameter:
490 . ptr - the pointer to lookk for
491 
492   Output Parameter:
493 . ret_chunk - pointer to the owning chunk or nullptr if not found
494 */
495 template <typename T, typename A, typename S>
496 inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept {
497   PetscFunctionBegin;
498   *ret_chunk = nullptr;
499   if (owns_pointer(ptr)) {
500     const auto offset = static_cast<size_type>(ptr - mem_);
501 
502     for (auto &chunk : chunks_) {
503       if (chunk.start() == offset) {
504         *ret_chunk = &chunk;
505         break;
506       }
507     }
508 
509     PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size())));
510   }
511   PetscFunctionReturn(0);
512 }
513 
514 namespace detail {
515 
516 template <typename T>
517 struct real_type {
518   using type = T;
519 };
520 
521 template <>
522 struct real_type<PetscScalar> {
523   using type = PetscReal;
524 };
525 
526 } // namespace detail
527 
528 template <typename T>
529 struct SegmentedMemoryPoolAllocatorBase {
530   using value_type      = T;
531   using size_type       = std::size_t;
532   using real_value_type = typename detail::real_type<T>::type;
533 
534   template <typename U>
535   PETSC_NODISCARD static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept;
536   template <typename U>
537   PETSC_NODISCARD static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept;
538   template <typename U>
539   PETSC_NODISCARD static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept;
540   template <typename U>
541   PETSC_NODISCARD static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept;
542   template <typename U>
543   PETSC_NODISCARD static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept;
544 };
545 
546 template <typename T>
547 template <typename U>
548 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept {
549   PetscFunctionBegin;
550   PetscCall(PetscMalloc1(n, ptr));
551   PetscFunctionReturn(0);
552 }
553 
554 template <typename T>
555 template <typename U>
556 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept {
557   PetscFunctionBegin;
558   PetscCall(PetscFree(ptr));
559   PetscFunctionReturn(0);
560 }
561 
562 template <typename T>
563 template <typename U>
564 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept {
565   PetscFunctionBegin;
566   PetscCall(PetscArrayzero(ptr, n));
567   PetscFunctionReturn(0);
568 }
569 
570 template <typename T>
571 template <typename U>
572 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept {
573   PetscFunctionBegin;
574   PetscCall(PetscArraycpy(dest, src, n));
575   PetscFunctionReturn(0);
576 }
577 
578 template <typename T>
579 template <typename U>
580 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept {
581   using limit_type            = std::numeric_limits<real_value_type>;
582   constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max();
583 
584   PetscFunctionBegin;
585   for (size_type i = 0; i < n; ++i) ptr[i] = canary;
586   PetscFunctionReturn(0);
587 }
588 
589 } // namespace impl
590 
591 // ==========================================================================================
592 // SegmentedMemoryPool
593 // ==========================================================================================
594 
595 template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256>
596 class SegmentedMemoryPool;
597 
598 // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks.
599 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
600 class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> {
601 public:
602   using value_type     = MemType;
603   using stream_type    = StreamType;
604   using allocator_type = AllocType;
605   using block_type     = impl::MemoryBlock<value_type, allocator_type, stream_type>;
606   using pool_type      = std::deque<block_type>;
607   using size_type      = typename block_type::size_type;
608 
609   explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value);
610 
611   PETSC_NODISCARD PetscErrorCode allocate(PetscInt, value_type **, const stream_type *) noexcept;
612   PETSC_NODISCARD PetscErrorCode deallocate(value_type **, const stream_type *) noexcept;
613   PETSC_NODISCARD PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept;
614 
615 private:
616   pool_type      pool_;
617   allocator_type allocator_;
618   size_type      chunk_size_;
619 
620   PETSC_NODISCARD PetscErrorCode make_block_(size_type, const stream_type *) noexcept;
621 
622   friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>;
623   PETSC_NODISCARD PetscErrorCode register_finalize_(const stream_type *) noexcept;
624   PETSC_NODISCARD PetscErrorCode finalize_() noexcept;
625 };
626 
627 // ==========================================================================================
628 // SegmentedMemoryPool - Private API
629 // ==========================================================================================
630 
631 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
632 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept {
633   const auto block_size = std::max(size, chunk_size_);
634 
635   PetscFunctionBegin;
636   PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream));
637   PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size()));
638   PetscFunctionReturn(0);
639 }
640 
641 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
642 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept {
643   PetscFunctionBegin;
644   PetscCall(make_block_(chunk_size_, stream));
645   PetscFunctionReturn(0);
646 }
647 
648 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
649 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept {
650   PetscFunctionBegin;
651   PetscCallCXX(pool_.clear());
652   chunk_size_ = DefaultChunkSize;
653   PetscFunctionReturn(0);
654 }
655 
656 // ==========================================================================================
657 // SegmentedMemoryPool - Public API
658 // ==========================================================================================
659 
660 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
661 inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) :
662   allocator_(std::move(alloc)), chunk_size_(size) { }
663 
664 /*
665   SegmentedMemoryPool::allocate - get an allocation from the memory pool
666 
667   Input Parameters:
668 + req_size - size (in elements) to get
669 . ptr      - the pointer to hold the allocation
670 - stream   - the stream on which to get the allocation
671 
672   Output Parameter:
673 . ptr - the pointer holding the allocation
674 
675   Notes:
676   req_size cannot be negative. If req_size if zero, ptr is set to nullptr
677 */
678 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
679 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream) noexcept {
680   const auto size  = static_cast<size_type>(req_size);
681   auto       found = false;
682 
683   PetscFunctionBegin;
684   PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size);
685   PetscValidPointer(ptr, 2);
686   PetscValidPointer(stream, 3);
687   *ptr = nullptr;
688   if (!req_size) PetscFunctionReturn(0);
689   PetscCall(this->register_finalize(PETSC_COMM_SELF, stream));
690   for (auto &block : pool_) {
691     PetscCall(block.try_allocate_chunk(size, ptr, stream, &found));
692     if (PetscLikely(found)) PetscFunctionReturn(0);
693   }
694 
695   PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size));
696   // if we are here we couldn't find an open block in the pool, so make a new block
697   PetscCall(make_block_(size, stream));
698   // and assign it
699   PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found));
700   PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size());
701   PetscFunctionReturn(0);
702 }
703 
704 /*
705   SegmentedMemoryPool::deallocate - release a pointer back to the memory pool
706 
707   Input Parameters:
708 + ptr    - the pointer to release
709 - stream - the stream to release it on
710 
711   Notes:
712   If ptr is not owned by the pool it is unchanged.
713 */
714 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
715 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept {
716   PetscFunctionBegin;
717   PetscValidPointer(ptr, 1);
718   PetscValidPointer(stream, 2);
719   // nobody owns a nullptr, and if they do then they have bigger problems
720   if (!*ptr) PetscFunctionReturn(0);
721   for (auto &block : pool_) {
722     auto found = false;
723 
724     PetscCall(block.try_deallocate_chunk(ptr, stream, &found));
725     if (PetscLikely(found)) break;
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 /*
731   SegmentedMemoryPool::reallocate - Resize an allocated buffer
732 
733   Input Parameters:
734 + new_req_size - the new buffer size
735 . ptr          - pointer to the buffer
736 - stream       - stream to resize with
737 
738   Ouput Parameter:
739 . ptr - pointer to the new region
740 
741   Notes:
742   ptr must have been allocated by the pool.
743 
744   It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated).
745 */
746 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
747 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept {
748   using chunk_type = typename block_type::chunk_type;
749 
750   const auto  new_size = static_cast<size_type>(new_req_size);
751   const auto  old_ptr  = *ptr;
752   chunk_type *chunk    = nullptr;
753 
754   PetscFunctionBegin;
755   PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size);
756   PetscValidPointer(ptr, 2);
757   PetscValidPointer(stream, 3);
758 
759   // if reallocating to zero, just free
760   if (PetscUnlikely(new_size == 0)) {
761     PetscCall(deallocate(ptr, stream));
762     PetscFunctionReturn(0);
763   }
764 
765   // search the blocks for the owning chunk
766   for (auto &block : pool_) {
767     PetscCall(block.try_find_chunk(old_ptr, &chunk));
768     if (chunk) break; // found
769   }
770   PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr);
771 
772   if (chunk->capacity() < new_size) {
773     // chunk does not have enough room, need to grab a fresh chunk and copy to it
774     *ptr = nullptr;
775     PetscCall(chunk->release(stream));
776     PetscCall(allocate(new_size, ptr, stream));
777     PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream));
778   } else {
779     // chunk had enough room we can simply grow (or shrink) to fit the new size
780     PetscCall(chunk->resize(new_size));
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 } // namespace memory
786 
787 } // namespace Petsc
788 
789 #endif // PETSC_SEGMENTEDMEMPOOL_HPP
790