xref: /petsc/src/sys/objects/device/impls/segmentedmempool.hpp (revision a336c15037c72f93cd561f5a5e11e93175f2efd9)
1 #pragma once
2 
3 #include <petsc/private/deviceimpl.h>
4 
5 #include <petsc/private/cpp/macros.hpp>
6 #include <petsc/private/cpp/type_traits.hpp>
7 #include <petsc/private/cpp/utility.hpp>
8 #include <petsc/private/cpp/register_finalize.hpp>
9 #include <petsc/private/cpp/memory.hpp>
10 
11 #include <limits>
12 #include <deque>
13 #include <vector>
14 
15 namespace Petsc
16 {
17 
18 namespace device
19 {
20 
21 template <typename T>
22 class StreamBase {
23 public:
24   using id_type      = int;
25   using derived_type = T;
26 
27   static const id_type INVALID_ID;
28 
29   // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion
30   template <typename U = T>
31   PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_());
32 
33   PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); }
34 
35   template <typename E>
36   PetscErrorCode record_event(E &&event) const noexcept
37   {
38     return static_cast<const T &>(*this).record_event_(std::forward<E>(event));
39   }
40 
41   template <typename E>
42   PetscErrorCode wait_for_event(E &&event) const noexcept
43   {
44     return static_cast<const T &>(*this).wait_for_(std::forward<E>(event));
45   }
46 
47 protected:
48   constexpr StreamBase() noexcept = default;
49 
50   struct default_event_type { };
51   using default_stream_type = std::nullptr_t;
52 
53   PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; }
54 
55   PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; }
56 
57   template <typename U = T>
58   static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept
59   {
60     return PETSC_SUCCESS;
61   }
62 
63   template <typename U = T>
64   static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept
65   {
66     return PETSC_SUCCESS;
67   }
68 };
69 
70 template <typename T>
71 const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1;
72 
73 struct DefaultStream : StreamBase<DefaultStream> {
74   using stream_type = typename StreamBase<DefaultStream>::default_stream_type;
75   using id_type     = typename StreamBase<DefaultStream>::id_type;
76   using event_type  = typename StreamBase<DefaultStream>::default_event_type;
77 };
78 
79 } // namespace device
80 
81 namespace memory
82 {
83 
84 namespace impl
85 {
86 
87 // ==========================================================================================
88 // MemoryChunk
89 //
90 // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning
91 // MemoryBlock and its size/capacity
92 // ==========================================================================================
93 
94 template <typename EventType>
95 class MemoryChunk {
96 public:
97   using event_type = EventType;
98   using size_type  = std::size_t;
99 
100   MemoryChunk(size_type, size_type) noexcept;
101   explicit MemoryChunk(size_type) noexcept;
102 
103   MemoryChunk(MemoryChunk &&) noexcept;
104   MemoryChunk &operator=(MemoryChunk &&) noexcept;
105 
106   MemoryChunk(const MemoryChunk &) noexcept            = delete;
107   MemoryChunk &operator=(const MemoryChunk &) noexcept = delete;
108 
109   PETSC_NODISCARD size_type start() const noexcept { return start_; }
110   PETSC_NODISCARD size_type size() const noexcept { return size_; }
111   // REVIEW ME:
112   // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in
113   // theory only the last chunk needs to do this
114   PETSC_NODISCARD size_type capacity() const noexcept { return size_; }
115   PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); }
116 
117   template <typename U>
118   PetscErrorCode release(const device::StreamBase<U> *) noexcept;
119   template <typename U>
120   PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept;
121   template <typename U>
122   PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept;
123   PetscErrorCode       resize(size_type) noexcept;
124   PETSC_NODISCARD bool contains(size_type) const noexcept;
125 
126 private:
127   event_type event_{};                                       // event recorded when the chunk was released
128   bool       open_      = true;                              // is this chunk open?
129   int        stream_id_ = device::DefaultStream::INVALID_ID; // id of the last stream to use the chunk, populated on release
130   size_type  size_      = 0;                                 // size of the chunk
131   size_type  start_     = 0;                                 // offset from the start of the owning block
132 
133   template <typename U>
134   PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept;
135 };
136 
137 // ==========================================================================================
138 // MemoryChunk - Private API
139 // ==========================================================================================
140 
141 // asks and answers the question: can this stream claim this chunk without serializing?
142 template <typename E>
143 template <typename U>
144 inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept
145 {
146   return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id());
147 }
148 
149 // ==========================================================================================
150 // MemoryChunk - Public API
151 // ==========================================================================================
152 
153 template <typename E>
154 inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start)
155 {
156 }
157 
158 template <typename E>
159 inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size)
160 {
161 }
162 
163 template <typename E>
164 inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept :
165   event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_))
166 {
167 }
168 
169 template <typename E>
170 inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept
171 {
172   PetscFunctionBegin;
173   if (this != &other) {
174     event_     = std::move(other.event_);
175     open_      = util::exchange(other.open_, false);
176     stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID);
177     size_      = util::exchange(other.size_, 0);
178     start_     = std::move(other.start_);
179   }
180   PetscFunctionReturn(*this);
181 }
182 
183 /*
184   MemoryChunk::release - release a chunk on a stream
185 
186   Input Parameter:
187 . stream - the stream to release the chunk with
188 
189   Notes:
190   Inserts a release operation on stream and records the state of stream at the time this
191   routine was called.
192 
193   Future allocation requests which attempt to claim the chunk on the same stream may re-acquire
194   the chunk without serialization.
195 
196   If another stream attempts to claim the chunk they must wait for the recorded event before
197   claiming the chunk.
198 */
199 template <typename E>
200 template <typename U>
201 inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept
202 {
203   PetscFunctionBegin;
204   open_      = true;
205   stream_id_ = stream->get_id();
206   PetscCall(stream->record_event(event_));
207   PetscFunctionReturn(PETSC_SUCCESS);
208 }
209 
210 /*
211   MemoryChunk::claim - attempt to claim a particular chunk
212 
213   Input Parameters:
214 + stream    - the stream on which to attempt to claim
215 . req_size  - the requested size (in elements) to attempt to claim
216 - serialize - (optional, false) whether the claimant allows serialization
217 
218   Output Parameter:
219 . success - true if the chunk was claimed, false otherwise
220 */
221 template <typename E>
222 template <typename U>
223 inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept
224 {
225   PetscFunctionBegin;
226   if ((*success = can_claim(stream, req_size, serialize))) {
227     if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_));
228     PetscCall(resize(req_size));
229     open_ = false;
230   }
231   PetscFunctionReturn(PETSC_SUCCESS);
232 }
233 
234 /*
235   MemoryChunk::can_claim - test whether a particular chunk can be claimed
236 
237   Input Parameters:
238 + stream    - the stream on which to attempt to claim
239 . req_size  - the requested size (in elements) to attempt to claim
240 - serialize - whether the claimant allows serialization
241 
242   Output:
243 . [return] - true if the chunk is claimable given the configuration, false otherwise
244 */
245 template <typename E>
246 template <typename U>
247 inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept
248 {
249   if (open_ && (req_size <= capacity())) {
250     // fully compatible
251     if (stream_compat_(stream)) return true;
252     // stream wasn't compatible, but could claim if we serialized
253     if (serialize) return true;
254     // incompatible stream and did not want to serialize
255   }
256   return false;
257 }
258 
259 /*
260   MemoryChunk::resize - grow a chunk to new size
261 
262   Input Parameter:
263 . newsize - the new size Requested
264 
265   Notes:
266   newsize cannot be larger than capacity
267 */
268 template <typename E>
269 inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept
270 {
271   PetscFunctionBegin;
272   PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity());
273   size_ = newsize;
274   PetscFunctionReturn(PETSC_SUCCESS);
275 }
276 
277 /*
278   MemoryChunk::contains - query whether a memory chunk contains a particular offset
279 
280   Input Parameters:
281 . offset - The offset from the MemoryBlock start
282 
283   Notes:
284   Returns true if the chunk contains the offset, false otherwise
285 */
286 template <typename E>
287 inline bool MemoryChunk<E>::contains(size_type offset) const noexcept
288 {
289   return (offset >= start()) && (offset < total_offset());
290 }
291 
292 // ==========================================================================================
293 // MemoryBlock
294 //
295 // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving
296 // and restoring a block is thread-safe (so may be used by multiple device streams).
297 // ==========================================================================================
298 
299 template <typename T, typename AllocatorType, typename StreamType>
300 class MemoryBlock {
301 public:
302   using value_type      = T;
303   using allocator_type  = AllocatorType;
304   using stream_type     = StreamType;
305   using event_type      = typename stream_type::event_type;
306   using chunk_type      = MemoryChunk<event_type>;
307   using size_type       = typename chunk_type::size_type;
308   using chunk_list_type = std::vector<chunk_type>;
309 
310   template <typename U>
311   MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept;
312 
313   ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value);
314 
315   MemoryBlock(MemoryBlock &&) noexcept;
316   MemoryBlock &operator=(MemoryBlock &&) noexcept;
317 
318   // memory blocks are not copyable
319   MemoryBlock(const MemoryBlock &)            = delete;
320   MemoryBlock &operator=(const MemoryBlock &) = delete;
321 
322   /* --- actual functions --- */
323   PetscErrorCode       try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept;
324   PetscErrorCode       try_deallocate_chunk(T **, const stream_type *, bool *) noexcept;
325   PetscErrorCode       try_find_chunk(const T *, chunk_type **) noexcept;
326   PETSC_NODISCARD bool owns_pointer(const T *) const noexcept;
327 
328   PETSC_NODISCARD size_type size() const noexcept { return size_; }
329   PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); }
330   PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); }
331 
332 private:
333   value_type     *mem_{};
334   allocator_type *allocator_{};
335   size_type       size_{};
336   chunk_list_type chunks_{};
337 
338   PetscErrorCode clear_(const stream_type *) noexcept;
339 };
340 
341 // ==========================================================================================
342 // MemoryBlock - Private API
343 // ==========================================================================================
344 
345 // clear the memory block, called from destructors and move assignment/construction
346 template <typename T, typename A, typename S>
347 PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept
348 {
349   PetscFunctionBegin;
350   if (PetscLikely(mem_)) {
351     PetscCall(allocator_->deallocate(mem_, stream));
352     mem_ = nullptr;
353   }
354   size_ = 0;
355   PetscCallCXX(chunks_.clear());
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 // ==========================================================================================
360 // MemoryBlock - Public API
361 // ==========================================================================================
362 
363 // default constructor, allocates memory immediately
364 template <typename T, typename A, typename S>
365 template <typename U>
366 MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s)
367 {
368   PetscFunctionBegin;
369   PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream));
370   PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s);
371   PetscFunctionReturnVoid();
372 }
373 
374 template <typename T, typename A, typename S>
375 MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value)
376 {
377   stream_type stream;
378 
379   PetscFunctionBegin;
380   PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
381   PetscFunctionReturnVoid();
382 }
383 
384 template <typename T, typename A, typename S>
385 MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_))
386 {
387 }
388 
389 template <typename T, typename A, typename S>
390 MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept
391 {
392   PetscFunctionBegin;
393   if (this != &other) {
394     stream_type stream;
395 
396     PetscCallAbort(PETSC_COMM_SELF, clear_(&stream));
397     mem_       = util::exchange(other.mem_, nullptr);
398     allocator_ = other.allocator_;
399     size_      = util::exchange(other.size_, 0);
400     chunks_    = std::move(other.chunks_);
401   }
402   PetscFunctionReturn(*this);
403 }
404 
405 /*
406   MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise
407 */
408 template <typename T, typename A, typename S>
409 inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept
410 {
411   // each pool is linear in memory, so it suffices to check the bounds
412   return (ptr >= mem_) && (ptr < std::next(mem_, size()));
413 }
414 
415 /*
416   MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock
417 
418   Input Parameters:
419 + req_size - the requested size of the allocation (in elements)
420 . ptr      - ptr to fill
421 - stream   - stream to fill the pointer on
422 
423   Output Parameter:
424 . success  - true if chunk was gotten, false otherwise
425 
426   Notes:
427   If the current memory could not satisfy the memory request, ptr is unchanged
428 */
429 template <typename T, typename A, typename S>
430 inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept
431 {
432   PetscFunctionBegin;
433   *success = false;
434   if (req_size <= size()) {
435     const auto try_create_chunk = [&]() {
436       const auto was_empty     = chunks_.empty();
437       const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset();
438 
439       PetscFunctionBegin;
440       if (block_alloced + req_size <= size()) {
441         PetscCallCXX(chunks_.emplace_back(block_alloced, req_size));
442         PetscCall(chunks_.back().claim(stream, req_size, success));
443         *ptr = mem_ + block_alloced;
444         if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size());
445       }
446       PetscFunctionReturn(PETSC_SUCCESS);
447     };
448     const auto try_find_open_chunk = [&](bool serialize = false) {
449       PetscFunctionBegin;
450       for (auto &chunk : chunks_) {
451         PetscCall(chunk.claim(stream, req_size, success, serialize));
452         if (*success) {
453           *ptr = mem_ + chunk.start();
454           break;
455         }
456       }
457       PetscFunctionReturn(PETSC_SUCCESS);
458     };
459     const auto try_steal_other_stream_chunk = [&]() {
460       PetscFunctionBegin;
461       PetscCall(try_find_open_chunk(true));
462       PetscFunctionReturn(PETSC_SUCCESS);
463     };
464 
465     // search previously distributed chunks, but only claim one if it is on the same stream
466     // as us
467     PetscCall(try_find_open_chunk());
468 
469     // if we are here we couldn't reuse one of our own chunks so check first if the pool
470     // has room for a new one
471     if (!*success) PetscCall(try_create_chunk());
472 
473     // try pruning dead chunks off the back, note we do this regardless of whether we are
474     // successful
475     while (chunks_.back().can_claim(stream, 0, false)) {
476       PetscCallCXX(chunks_.pop_back());
477       if (chunks_.empty()) {
478         // if chunks are empty it implies we have managed to claim (and subsequently destroy)
479         // our own chunk twice! something has gone wrong
480         PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size());
481         break;
482       }
483     }
484 
485     // if previously unsuccessful see if enough space has opened up due to pruning. note that
486     // if the chunk list was emptied from the pruning this call must succeed in allocating a
487     // chunk, otherwise something is wrong
488     if (!*success) PetscCall(try_create_chunk());
489 
490     // last resort, iterate over all chunks and see if we can steal one by waiting on the
491     // current owner to finish using it
492     if (!*success) PetscCall(try_steal_other_stream_chunk());
493   }
494   PetscFunctionReturn(PETSC_SUCCESS);
495 }
496 
497 /*
498   MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock
499 
500   Input Parameters:
501 + ptr     - ptr to restore
502 - stream  - stream to restore the pointer on
503 
504   Output Parameter:
505 . success - true if chunk was restored, false otherwise
506 
507   Notes:
508   ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned
509   by this MemoryBlock then it is restored on stream. The same stream may receive ptr again
510   without synchronization, but other streams may not do so until either serializing or the
511   stream is idle again.
512 */
513 template <typename T, typename A, typename S>
514 inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept
515 {
516   chunk_type *chunk = nullptr;
517 
518   PetscFunctionBegin;
519   PetscCall(try_find_chunk(*ptr, &chunk));
520   if (chunk) {
521     PetscCall(chunk->release(stream));
522     *ptr     = nullptr;
523     *success = true;
524   } else {
525     *success = false;
526   }
527   PetscFunctionReturn(PETSC_SUCCESS);
528 }
529 
530 /*
531   MemoryBlock::try_find_chunk - try to find the chunk which owns ptr
532 
533   Input Parameter:
534 . ptr - the pointer to look for
535 
536   Output Parameter:
537 . ret_chunk - pointer to the owning chunk or nullptr if not found
538 */
539 template <typename T, typename A, typename S>
540 inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept
541 {
542   PetscFunctionBegin;
543   *ret_chunk = nullptr;
544   if (owns_pointer(ptr)) {
545     const auto offset = static_cast<size_type>(ptr - mem_);
546 
547     for (auto &chunk : chunks_) {
548       if (chunk.contains(offset)) {
549         *ret_chunk = &chunk;
550         break;
551       }
552     }
553 
554     PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size())));
555   }
556   PetscFunctionReturn(PETSC_SUCCESS);
557 }
558 
559 namespace detail
560 {
561 
562 template <typename T>
563 struct real_type {
564   using type = T;
565 };
566 
567 template <>
568 struct real_type<PetscScalar> {
569   using type = PetscReal;
570 };
571 
572 } // namespace detail
573 
574 template <typename T>
575 struct SegmentedMemoryPoolAllocatorBase {
576   using value_type      = T;
577   using size_type       = std::size_t;
578   using real_value_type = typename detail::real_type<T>::type;
579 
580   template <typename U>
581   static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept;
582   template <typename U>
583   static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept;
584   template <typename U>
585   static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept;
586   template <typename U>
587   static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept;
588   template <typename U>
589   static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept;
590 };
591 
592 template <typename T>
593 template <typename U>
594 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept
595 {
596   PetscFunctionBegin;
597   PetscCall(PetscMalloc1(n, ptr));
598   PetscFunctionReturn(PETSC_SUCCESS);
599 }
600 
601 template <typename T>
602 template <typename U>
603 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept
604 {
605   PetscFunctionBegin;
606   PetscCall(PetscFree(ptr));
607   PetscFunctionReturn(PETSC_SUCCESS);
608 }
609 
610 template <typename T>
611 template <typename U>
612 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
613 {
614   PetscFunctionBegin;
615   PetscCall(PetscArrayzero(ptr, n));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 template <typename T>
620 template <typename U>
621 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept
622 {
623   PetscFunctionBegin;
624   PetscCall(PetscArraycpy(dest, src, n));
625   PetscFunctionReturn(PETSC_SUCCESS);
626 }
627 
628 template <typename T>
629 template <typename U>
630 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept
631 {
632   using limit_type            = std::numeric_limits<real_value_type>;
633   constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max();
634 
635   PetscFunctionBegin;
636   for (size_type i = 0; i < n; ++i) ptr[i] = canary;
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 } // namespace impl
641 
642 // ==========================================================================================
643 // SegmentedMemoryPool
644 //
645 // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an
646 // allocated buffer. This buffer is further split into memory "chunks" which control
647 // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states:
648 //
649 // 1. Open:
650 //    The chunk is free to be claimed by the next suitable allocation request. If the
651 //    allocation request is made on the same stream as the chunk was deallocated on, no
652 //    serialization needs to occur. If not, the allocating stream must wait for the
653 //    event. Claiming the chunk "closes" the chunk.
654 //
655 // 2. Closed:
656 //    The chunk has been claimed by an allocation request. It cannot be opened again until it
657 //    is deallocated; doing so "opens" the chunk.
658 //
659 // Note that there does not need to be a chunk for every region, chunks are created to satisfy
660 // an allocation request.
661 //
662 // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may
663 // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation
664 // request. This region exists _only_ at the end, as there are no gaps between chunks.
665 //
666 //
667 // |-----------------------------------------------------------------------------------------
668 // | SegmentedMemoryPool
669 // |
670 // | ||-------------||
671 // | ||             ||    -------------------------------------------------------------------
672 // | ||             ||    | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX...
673 // | ||             ||    | |             |      |                   |            |
674 // | ||             ||    | x-----x-------x-----xx---------x---------x------x-----x
675 // | || MemoryBlock || -> | ------|-------------|----------|----------------|--------
676 // | ||             ||    | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk |
677 // | ||             ||    | ---------------------------------------------------------
678 // | ||             ||    -------------------------------------------------------------------
679 // | ||-------------||
680 // | ||             ||
681 // | ||     ...     ||
682 // | ||             ||
683 // ==========================================================================================
684 
685 template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256>
686 class SegmentedMemoryPool;
687 
688 // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks.
689 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
690 class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> {
691 public:
692   using value_type     = MemType;
693   using stream_type    = StreamType;
694   using allocator_type = AllocType;
695   using block_type     = impl::MemoryBlock<value_type, allocator_type, stream_type>;
696   using pool_type      = std::deque<block_type>;
697   using size_type      = typename block_type::size_type;
698 
699   explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value);
700 
701   PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept;
702   PetscErrorCode deallocate(value_type **, const stream_type *) noexcept;
703   PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept;
704 
705 private:
706   pool_type      pool_;
707   allocator_type allocator_;
708   size_type      chunk_size_;
709 
710   PetscErrorCode make_block_(size_type, const stream_type *) noexcept;
711 
712   friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>;
713   PetscErrorCode register_finalize_(const stream_type *) noexcept;
714   PetscErrorCode finalize_() noexcept;
715 
716   PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept;
717 };
718 
719 // ==========================================================================================
720 // SegmentedMemoryPool - Private API
721 // ==========================================================================================
722 
723 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
724 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept
725 {
726   const auto block_size = std::max(size, chunk_size_);
727 
728   PetscFunctionBegin;
729   PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream));
730   PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size()));
731   PetscFunctionReturn(PETSC_SUCCESS);
732 }
733 
734 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
735 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept
736 {
737   PetscFunctionBegin;
738   PetscCall(make_block_(chunk_size_, stream));
739   PetscFunctionReturn(PETSC_SUCCESS);
740 }
741 
742 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
743 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept
744 {
745   PetscFunctionBegin;
746   PetscCallCXX(pool_.clear());
747   chunk_size_ = DefaultChunkSize;
748   PetscFunctionReturn(PETSC_SUCCESS);
749 }
750 
751 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
752 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept
753 {
754   auto found = false;
755 
756   PetscFunctionBegin;
757   PetscCall(this->register_finalize(stream));
758   for (auto &block : pool_) {
759     PetscCall(block.try_allocate_chunk(size, ptr, stream, &found));
760     if (PetscLikely(found)) PetscFunctionReturn(PETSC_SUCCESS);
761   }
762 
763   PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size));
764   // if we are here we couldn't find an open block in the pool, so make a new block
765   PetscCall(make_block_(size, stream));
766   // and assign it
767   PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found));
768   PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size());
769   PetscFunctionReturn(PETSC_SUCCESS);
770 }
771 
772 // ==========================================================================================
773 // SegmentedMemoryPool - Public API
774 // ==========================================================================================
775 
776 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
777 inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size)
778 {
779 }
780 
781 /*
782   SegmentedMemoryPool::allocate - get an allocation from the memory pool
783 
784   Input Parameters:
785 + req_size - size (in elements) to get
786 . ptr      - the pointer to hold the allocation
787 - stream   - the stream on which to get the allocation
788 
789   Output Parameter:
790 . ptr - the pointer holding the allocation
791 
792   Notes:
793   `req_size` cannot be negative. If `req_size` if zero, `ptr` is set to `nullptr`
794 */
795 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
796 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept
797 {
798   value_type *ret_ptr = nullptr;
799 
800   PetscFunctionBegin;
801   PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size);
802   PetscAssertPointer(ptr, 2);
803   PetscAssertPointer(stream, 3);
804   if (req_size) {
805     const auto size         = static_cast<size_type>(req_size);
806     auto       aligned_size = alignment == alignof(char) ? size : size + alignment;
807     void      *vptr         = nullptr;
808 
809     PetscCall(allocate_(aligned_size, &ret_ptr, stream));
810     vptr = ret_ptr;
811     std::align(alignment, size, vptr, aligned_size);
812     ret_ptr = reinterpret_cast<value_type *>(vptr);
813     // sets memory to infinity or NaN depending on the type to catch out uninitialized memory accesses.
814     if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream));
815   }
816   *ptr = ret_ptr;
817   PetscFunctionReturn(PETSC_SUCCESS);
818 }
819 
820 /*
821   SegmentedMemoryPool::deallocate - release a pointer back to the memory pool
822 
823   Input Parameters:
824 + ptr    - the pointer to release
825 - stream - the stream to release it on
826 
827   Notes:
828   If `ptr` is not owned by the pool it is unchanged.
829 */
830 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
831 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept
832 {
833   PetscFunctionBegin;
834   PetscAssertPointer(ptr, 1);
835   PetscAssertPointer(stream, 2);
836   // nobody owns a nullptr, and if they do then they have bigger problems
837   if (!*ptr) PetscFunctionReturn(PETSC_SUCCESS);
838   for (auto &block : pool_) {
839     auto found = false;
840 
841     PetscCall(block.try_deallocate_chunk(ptr, stream, &found));
842     if (PetscLikely(found)) break;
843   }
844   PetscFunctionReturn(PETSC_SUCCESS);
845 }
846 
847 /*
848   SegmentedMemoryPool::reallocate - Resize an allocated buffer
849 
850   Input Parameters:
851 + new_req_size - the new buffer size
852 . ptr          - pointer to the buffer
853 - stream       - stream to resize with
854 
855   Output Parameter:
856 . ptr - pointer to the new region
857 
858   Notes:
859   `ptr` must have been allocated by the pool.
860 
861   It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated).
862 */
863 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize>
864 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept
865 {
866   using chunk_type = typename block_type::chunk_type;
867 
868   const auto  new_size = static_cast<size_type>(new_req_size);
869   const auto  old_ptr  = *ptr;
870   chunk_type *chunk    = nullptr;
871 
872   PetscFunctionBegin;
873   PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size);
874   PetscAssertPointer(ptr, 2);
875   PetscAssertPointer(stream, 3);
876 
877   // if reallocating to zero, just free
878   if (PetscUnlikely(new_size == 0)) {
879     PetscCall(deallocate(ptr, stream));
880     PetscFunctionReturn(PETSC_SUCCESS);
881   }
882 
883   // search the blocks for the owning chunk
884   for (auto &block : pool_) {
885     PetscCall(block.try_find_chunk(old_ptr, &chunk));
886     if (chunk) break; // found
887   }
888   PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr);
889 
890   if (chunk->capacity() < new_size) {
891     // chunk does not have enough room, need to grab a fresh chunk and copy to it
892     *ptr = nullptr;
893     PetscCall(chunk->release(stream));
894     PetscCall(allocate(new_size, ptr, stream));
895     PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream));
896   } else {
897     // chunk had enough room we can simply grow (or shrink) to fit the new size
898     PetscCall(chunk->resize(new_size));
899   }
900   PetscFunctionReturn(PETSC_SUCCESS);
901 }
902 
903 } // namespace memory
904 
905 } // namespace Petsc
906