1 #ifndef PETSC_SEGMENTEDMEMPOOL_HPP 2 #define PETSC_SEGMENTEDMEMPOOL_HPP 3 4 #include <petsc/private/deviceimpl.h> 5 #include <petsc/private/cpp/macros.hpp> 6 #include <petsc/private/cpp/type_traits.hpp> 7 #include <petsc/private/cpp/utility.hpp> 8 #include <petsc/private/cpp/register_finalize.hpp> 9 10 #include <limits> 11 #include <deque> 12 #include <vector> 13 14 namespace Petsc { 15 16 namespace device { 17 18 template <typename T> 19 class StreamBase { 20 public: 21 using id_type = int; 22 using derived_type = T; 23 24 static const id_type INVALID_ID; 25 26 // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion 27 template <typename U = T> 28 PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_()); 29 30 PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); } 31 32 template <typename E> 33 PETSC_NODISCARD PetscErrorCode record_event(E &&event) const noexcept { 34 return static_cast<const T &>(*this).record_event_(std::forward<E>(event)); 35 } 36 37 template <typename E> 38 PETSC_NODISCARD PetscErrorCode wait_for_event(E &&event) const noexcept { 39 return static_cast<const T &>(*this).wait_for_(std::forward<E>(event)); 40 } 41 42 protected: 43 constexpr StreamBase() noexcept = default; 44 45 struct default_event_type { }; 46 using default_stream_type = std::nullptr_t; 47 48 PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; } 49 50 PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; } 51 52 template <typename U = T> 53 PETSC_NODISCARD static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept { 54 return 0; 55 } 56 57 template <typename U = T> 58 PETSC_NODISCARD static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept { 59 return 0; 60 } 61 }; 62 63 template <typename T> 64 const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1; 65 66 struct DefaultStream : StreamBase<DefaultStream> { 67 using stream_type = typename StreamBase<DefaultStream>::default_stream_type; 68 using id_type = typename StreamBase<DefaultStream>::id_type; 69 using event_type = typename StreamBase<DefaultStream>::default_event_type; 70 }; 71 72 } // namespace device 73 74 namespace memory { 75 76 namespace impl { 77 78 // ========================================================================================== 79 // MemoryChunk 80 // 81 // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning 82 // MemoryBlock and its size/capacity 83 // ========================================================================================== 84 85 template <typename EventType> 86 class MemoryChunk { 87 public: 88 using event_type = EventType; 89 using size_type = std::size_t; 90 91 MemoryChunk(size_type, size_type) noexcept; 92 explicit MemoryChunk(size_type) noexcept; 93 94 MemoryChunk(MemoryChunk &&) noexcept; 95 MemoryChunk &operator=(MemoryChunk &&) noexcept; 96 97 MemoryChunk(const MemoryChunk &) noexcept = delete; 98 MemoryChunk &operator=(const MemoryChunk &) noexcept = delete; 99 100 PETSC_NODISCARD size_type start() const noexcept { return start_; } 101 PETSC_NODISCARD size_type size() const noexcept { return size_; } 102 // REVIEW ME: 103 // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in 104 // theory only the last chunk needs to do this 105 PETSC_NODISCARD size_type capacity() const noexcept { return size_; } 106 PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); } 107 108 template <typename U> 109 PETSC_NODISCARD PetscErrorCode release(const device::StreamBase<U> *) noexcept; 110 template <typename U> 111 PETSC_NODISCARD PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept; 112 template <typename U> 113 PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept; 114 PETSC_NODISCARD PetscErrorCode resize(size_type) noexcept; 115 116 private: 117 // clang-format off 118 event_type event_{}; // event recorded when the chunk was released 119 bool open_ = true; // is this chunk open? 120 // id of the last stream to use the chunk, populated on release 121 int stream_id_ = device::DefaultStream::INVALID_ID; 122 size_type size_ = 0; // size of the chunk 123 const size_type start_ = 0; // offset from the start of the owning block 124 125 // clang-format on 126 template <typename U> 127 PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept; 128 }; 129 130 // ========================================================================================== 131 // MemoryChunk - Private API 132 // ========================================================================================== 133 134 // asks and answers the question: can this stream claim this chunk without serializing? 135 template <typename E> 136 template <typename U> 137 inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept { 138 return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id()); 139 } 140 141 // ========================================================================================== 142 // MemoryChunk - Public API 143 // ========================================================================================== 144 145 template <typename E> 146 inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start) { } 147 148 template <typename E> 149 inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size) { } 150 151 template <typename E> 152 inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept : 153 event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_)) { } 154 155 template <typename E> 156 inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept { 157 PetscFunctionBegin; 158 if (this != &other) { 159 event_ = std::move(other.event_); 160 open_ = util::exchange(other.open_, false); 161 stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID); 162 size_ = util::exchange(other.size_, 0); 163 start_ = std::move(other.start_); 164 } 165 PetscFunctionReturn(*this); 166 } 167 168 /* 169 MemoryChunk::release - release a chunk on a stream 170 171 Input Parameter: 172 . stream - the stream to release the chunk with 173 174 Notes: 175 Inserts a release operation on stream and records the state of stream at the time this 176 routine was called. 177 178 Future allocation requests which attempt to claim the chunk on the same stream may re-acquire 179 the chunk without serialization. 180 181 If another stream attempts to claim the chunk they must wait for the recorded event before 182 claiming the chunk. 183 */ 184 template <typename E> 185 template <typename U> 186 inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept { 187 PetscFunctionBegin; 188 open_ = true; 189 stream_id_ = stream->get_id(); 190 PetscCall(stream->record_event(event_)); 191 PetscFunctionReturn(0); 192 } 193 194 /* 195 MemoryChunk::claim - attempt to claim a particular chunk 196 197 Input Parameters: 198 + stream - the stream on which to attempt to claim 199 . req_size - the requested size (in elements) to attempt to claim 200 - serialize - (optional, false) whether the claimant allows serialization 201 202 Output Parameter: 203 . success - true if the chunk was claimed, false otherwise 204 */ 205 template <typename E> 206 template <typename U> 207 inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept { 208 PetscFunctionBegin; 209 if ((*success = can_claim(stream, req_size, serialize))) { 210 if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_)); 211 PetscCall(resize(req_size)); 212 open_ = false; 213 } 214 PetscFunctionReturn(0); 215 } 216 217 /* 218 MemoryChunk::can_claim - test whether a particular chunk can be claimed 219 220 Input Parameters: 221 + stream - the stream on which to attempt to claim 222 . req_size - the requested size (in elements) to attempt to claim 223 - serialize - whether the claimant allows serialization 224 225 Output: 226 . [return] - true if the chunk is claimable given the configuration, false otherwise 227 */ 228 template <typename E> 229 template <typename U> 230 inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept { 231 if (open_ && (req_size <= capacity())) { 232 // fully compatible 233 if (stream_compat_(stream)) return true; 234 // stream wasn't compatible, but could claim if we serialized 235 if (serialize) return true; 236 // incompatible stream and did not want to serialize 237 } 238 return false; 239 } 240 241 /* 242 MemoryChunk::resize - grow a chunk to new size 243 244 Input Parameter: 245 . newsize - the new size Requested 246 247 Notes: 248 newsize cannot be larger than capacity 249 */ 250 template <typename E> 251 inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept { 252 PetscFunctionBegin; 253 PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity()); 254 size_ = newsize; 255 PetscFunctionReturn(0); 256 } 257 258 // ========================================================================================== 259 // MemoryBlock 260 // 261 // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving 262 // and restoring a block is thread-safe (so may be used by multiple device streams). 263 // ========================================================================================== 264 265 template <typename T, typename AllocatorType, typename StreamType> 266 class MemoryBlock { 267 public: 268 using value_type = T; 269 using allocator_type = AllocatorType; 270 using stream_type = StreamType; 271 using event_type = typename stream_type::event_type; 272 using chunk_type = MemoryChunk<event_type>; 273 using size_type = typename chunk_type::size_type; 274 using chunk_list_type = std::vector<chunk_type>; 275 276 template <typename U> 277 MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept; 278 279 ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value); 280 281 MemoryBlock(MemoryBlock &&) noexcept; 282 MemoryBlock &operator=(MemoryBlock &&) noexcept; 283 284 // memory blocks are not copyable 285 MemoryBlock(const MemoryBlock &) = delete; 286 MemoryBlock &operator=(const MemoryBlock &) = delete; 287 288 /* --- actual functions --- */ 289 PETSC_NODISCARD PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept; 290 PETSC_NODISCARD PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept; 291 PETSC_NODISCARD PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept; 292 PETSC_NODISCARD bool owns_pointer(const T *) const noexcept; 293 294 PETSC_NODISCARD size_type size() const noexcept { return size_; } 295 PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); } 296 PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); } 297 298 private: 299 value_type *mem_{}; 300 allocator_type *allocator_{}; 301 size_type size_{}; 302 chunk_list_type chunks_{}; 303 304 PETSC_NODISCARD PetscErrorCode clear_(const stream_type *) noexcept; 305 }; 306 307 // ========================================================================================== 308 // MemoryBlock - Private API 309 // ========================================================================================== 310 311 // clear the memory block, called from destructors and move assignment/construction 312 template <typename T, typename A, typename S> 313 PETSC_NODISCARD PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept { 314 PetscFunctionBegin; 315 if (PetscLikely(mem_)) { 316 PetscCall(allocator_->deallocate(mem_, stream)); 317 mem_ = nullptr; 318 } 319 size_ = 0; 320 PetscCallCXX(chunks_.clear()); 321 PetscFunctionReturn(0); 322 } 323 324 // ========================================================================================== 325 // MemoryBlock - Public API 326 // ========================================================================================== 327 328 // default constructor, allocates memory immediately 329 template <typename T, typename A, typename S> 330 template <typename U> 331 MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s) { 332 PetscFunctionBegin; 333 PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream)); 334 PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s); 335 PetscFunctionReturnVoid(); 336 } 337 338 template <typename T, typename A, typename S> 339 MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value) { 340 stream_type stream; 341 342 PetscFunctionBegin; 343 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 344 PetscFunctionReturnVoid(); 345 } 346 347 template <typename T, typename A, typename S> 348 MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_)) { } 349 350 template <typename T, typename A, typename S> 351 MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept { 352 PetscFunctionBegin; 353 if (this != &other) { 354 stream_type stream; 355 356 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 357 mem_ = util::exchange(other.mem_, nullptr); 358 allocator_ = other.allocator_; 359 size_ = util::exchange(other.size_, 0); 360 chunks_ = std::move(other.chunks_); 361 } 362 PetscFunctionReturn(*this); 363 } 364 365 /* 366 MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise 367 */ 368 template <typename T, typename A, typename S> 369 inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept { 370 // each pool is linear in memory, so it suffices to check the bounds 371 return (ptr >= mem_) && (ptr < std::next(mem_, size())); 372 } 373 374 /* 375 MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock 376 377 Input Parameters: 378 + req_size - the requested size of the allocation (in elements) 379 . ptr - ptr to fill 380 - stream - stream to fill the pointer on 381 382 Output Parameter: 383 . success - true if chunk was gotten, false otherwise 384 385 Notes: 386 If the current memory could not satisfy the memory request, ptr is unchanged 387 */ 388 template <typename T, typename A, typename S> 389 inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept { 390 PetscFunctionBegin; 391 *success = false; 392 if (req_size <= size()) { 393 const auto try_create_chunk = [&]() { 394 const auto was_empty = chunks_.empty(); 395 const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset(); 396 397 PetscFunctionBegin; 398 if (block_alloced + req_size <= size()) { 399 PetscCallCXX(chunks_.emplace_back(block_alloced, req_size)); 400 PetscCall(chunks_.back().claim(stream, req_size, success)); 401 *ptr = mem_ + block_alloced; 402 if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size()); 403 } 404 PetscFunctionReturn(0); 405 }; 406 const auto try_find_open_chunk = [&](bool serialize = false) { 407 PetscFunctionBegin; 408 for (auto &chunk : chunks_) { 409 PetscCall(chunk.claim(stream, req_size, success, serialize)); 410 if (*success) { 411 *ptr = mem_ + chunk.start(); 412 break; 413 } 414 } 415 PetscFunctionReturn(0); 416 }; 417 418 // search previously distributed chunks, but only claim one if it is on the same stream 419 // as us 420 PetscCall(try_find_open_chunk()); 421 422 // if we are here we couldn't reuse one of our own chunks so check first if the pool 423 // has room for a new one 424 if (!*success) PetscCall(try_create_chunk()); 425 426 // try pruning dead chunks off the back, note we do this regardless of whether we are 427 // successful 428 while (chunks_.back().can_claim(stream, 0, false)) { 429 PetscCallCXX(chunks_.pop_back()); 430 if (chunks_.empty()) { 431 // if chunks are empty it implies we have managed to claim (and subsequently destroy) 432 // our own chunk twice! something has gone wrong 433 PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size()); 434 break; 435 } 436 } 437 438 // if previously unsuccessful see if enough space has opened up due to pruning. note that 439 // if the chunk list was emptied from the pruning this call must succeed in allocating a 440 // chunk, otherwise something is wrong 441 if (!*success) PetscCall(try_create_chunk()); 442 443 // last resort, iterate over all chunks and see if we can steal one by waiting on the 444 // current owner to finish using it 445 if (!*success) PetscCall(try_find_open_chunk(true)); 446 447 // sets memory to NaN or infinity depending on the type to catch out uninitialized memory 448 // accesses. 449 if (PetscDefined(USE_DEBUG) && *success) PetscCall(allocator_->set_canary(*ptr, req_size, stream)); 450 } 451 PetscFunctionReturn(0); 452 } 453 454 /* 455 MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock 456 457 Input Parameters: 458 + ptr - ptr to restore 459 - stream - stream to restore the pointer on 460 461 Output Parameter: 462 . success - true if chunk was restored, false otherwise 463 464 Notes: 465 ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned 466 by this MemoryBlock then it is restored on stream. The same stream may recieve ptr again 467 without synchronization, but other streams may not do so until either serializing or the 468 stream is idle again. 469 */ 470 template <typename T, typename A, typename S> 471 inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept { 472 chunk_type *chunk = nullptr; 473 474 PetscFunctionBegin; 475 PetscCall(try_find_chunk(*ptr, &chunk)); 476 if (chunk) { 477 PetscCall(chunk->release(stream)); 478 *ptr = nullptr; 479 *success = true; 480 } else { 481 *success = false; 482 } 483 PetscFunctionReturn(0); 484 } 485 486 /* 487 MemoryBlock::try_find_chunk - try to find the chunk which owns ptr 488 489 Input Parameter: 490 . ptr - the pointer to lookk for 491 492 Output Parameter: 493 . ret_chunk - pointer to the owning chunk or nullptr if not found 494 */ 495 template <typename T, typename A, typename S> 496 inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept { 497 PetscFunctionBegin; 498 *ret_chunk = nullptr; 499 if (owns_pointer(ptr)) { 500 const auto offset = static_cast<size_type>(ptr - mem_); 501 502 for (auto &chunk : chunks_) { 503 if (chunk.start() == offset) { 504 *ret_chunk = &chunk; 505 break; 506 } 507 } 508 509 PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size()))); 510 } 511 PetscFunctionReturn(0); 512 } 513 514 namespace detail { 515 516 template <typename T> 517 struct real_type { 518 using type = T; 519 }; 520 521 template <> 522 struct real_type<PetscScalar> { 523 using type = PetscReal; 524 }; 525 526 } // namespace detail 527 528 template <typename T> 529 struct SegmentedMemoryPoolAllocatorBase { 530 using value_type = T; 531 using size_type = std::size_t; 532 using real_value_type = typename detail::real_type<T>::type; 533 534 template <typename U> 535 PETSC_NODISCARD static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept; 536 template <typename U> 537 PETSC_NODISCARD static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept; 538 template <typename U> 539 PETSC_NODISCARD static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept; 540 template <typename U> 541 PETSC_NODISCARD static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept; 542 template <typename U> 543 PETSC_NODISCARD static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept; 544 }; 545 546 template <typename T> 547 template <typename U> 548 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept { 549 PetscFunctionBegin; 550 PetscCall(PetscMalloc1(n, ptr)); 551 PetscFunctionReturn(0); 552 } 553 554 template <typename T> 555 template <typename U> 556 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept { 557 PetscFunctionBegin; 558 PetscCall(PetscFree(ptr)); 559 PetscFunctionReturn(0); 560 } 561 562 template <typename T> 563 template <typename U> 564 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept { 565 PetscFunctionBegin; 566 PetscCall(PetscArrayzero(ptr, n)); 567 PetscFunctionReturn(0); 568 } 569 570 template <typename T> 571 template <typename U> 572 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept { 573 PetscFunctionBegin; 574 PetscCall(PetscArraycpy(dest, src, n)); 575 PetscFunctionReturn(0); 576 } 577 578 template <typename T> 579 template <typename U> 580 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept { 581 using limit_type = std::numeric_limits<real_value_type>; 582 constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max(); 583 584 PetscFunctionBegin; 585 for (size_type i = 0; i < n; ++i) ptr[i] = canary; 586 PetscFunctionReturn(0); 587 } 588 589 } // namespace impl 590 591 // ========================================================================================== 592 // SegmentedMemoryPool 593 // ========================================================================================== 594 595 template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256> 596 class SegmentedMemoryPool; 597 598 // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks. 599 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 600 class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> { 601 public: 602 using value_type = MemType; 603 using stream_type = StreamType; 604 using allocator_type = AllocType; 605 using block_type = impl::MemoryBlock<value_type, allocator_type, stream_type>; 606 using pool_type = std::deque<block_type>; 607 using size_type = typename block_type::size_type; 608 609 explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value); 610 611 PETSC_NODISCARD PetscErrorCode allocate(PetscInt, value_type **, const stream_type *) noexcept; 612 PETSC_NODISCARD PetscErrorCode deallocate(value_type **, const stream_type *) noexcept; 613 PETSC_NODISCARD PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept; 614 615 private: 616 pool_type pool_; 617 allocator_type allocator_; 618 size_type chunk_size_; 619 620 PETSC_NODISCARD PetscErrorCode make_block_(size_type, const stream_type *) noexcept; 621 622 friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>; 623 PETSC_NODISCARD PetscErrorCode register_finalize_(const stream_type *) noexcept; 624 PETSC_NODISCARD PetscErrorCode finalize_() noexcept; 625 }; 626 627 // ========================================================================================== 628 // SegmentedMemoryPool - Private API 629 // ========================================================================================== 630 631 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 632 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept { 633 const auto block_size = std::max(size, chunk_size_); 634 635 PetscFunctionBegin; 636 PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream)); 637 PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size())); 638 PetscFunctionReturn(0); 639 } 640 641 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 642 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept { 643 PetscFunctionBegin; 644 PetscCall(make_block_(chunk_size_, stream)); 645 PetscFunctionReturn(0); 646 } 647 648 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 649 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept { 650 PetscFunctionBegin; 651 PetscCallCXX(pool_.clear()); 652 chunk_size_ = DefaultChunkSize; 653 PetscFunctionReturn(0); 654 } 655 656 // ========================================================================================== 657 // SegmentedMemoryPool - Public API 658 // ========================================================================================== 659 660 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 661 inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : 662 allocator_(std::move(alloc)), chunk_size_(size) { } 663 664 /* 665 SegmentedMemoryPool::allocate - get an allocation from the memory pool 666 667 Input Parameters: 668 + req_size - size (in elements) to get 669 . ptr - the pointer to hold the allocation 670 - stream - the stream on which to get the allocation 671 672 Output Parameter: 673 . ptr - the pointer holding the allocation 674 675 Notes: 676 req_size cannot be negative. If req_size if zero, ptr is set to nullptr 677 */ 678 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 679 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream) noexcept { 680 const auto size = static_cast<size_type>(req_size); 681 auto found = false; 682 683 PetscFunctionBegin; 684 PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size); 685 PetscValidPointer(ptr, 2); 686 PetscValidPointer(stream, 3); 687 *ptr = nullptr; 688 if (!req_size) PetscFunctionReturn(0); 689 PetscCall(this->register_finalize(PETSC_COMM_SELF, stream)); 690 for (auto &block : pool_) { 691 PetscCall(block.try_allocate_chunk(size, ptr, stream, &found)); 692 if (PetscLikely(found)) PetscFunctionReturn(0); 693 } 694 695 PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size)); 696 // if we are here we couldn't find an open block in the pool, so make a new block 697 PetscCall(make_block_(size, stream)); 698 // and assign it 699 PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found)); 700 PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size()); 701 PetscFunctionReturn(0); 702 } 703 704 /* 705 SegmentedMemoryPool::deallocate - release a pointer back to the memory pool 706 707 Input Parameters: 708 + ptr - the pointer to release 709 - stream - the stream to release it on 710 711 Notes: 712 If ptr is not owned by the pool it is unchanged. 713 */ 714 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 715 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept { 716 PetscFunctionBegin; 717 PetscValidPointer(ptr, 1); 718 PetscValidPointer(stream, 2); 719 // nobody owns a nullptr, and if they do then they have bigger problems 720 if (!*ptr) PetscFunctionReturn(0); 721 for (auto &block : pool_) { 722 auto found = false; 723 724 PetscCall(block.try_deallocate_chunk(ptr, stream, &found)); 725 if (PetscLikely(found)) break; 726 } 727 PetscFunctionReturn(0); 728 } 729 730 /* 731 SegmentedMemoryPool::reallocate - Resize an allocated buffer 732 733 Input Parameters: 734 + new_req_size - the new buffer size 735 . ptr - pointer to the buffer 736 - stream - stream to resize with 737 738 Ouput Parameter: 739 . ptr - pointer to the new region 740 741 Notes: 742 ptr must have been allocated by the pool. 743 744 It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated). 745 */ 746 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 747 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept { 748 using chunk_type = typename block_type::chunk_type; 749 750 const auto new_size = static_cast<size_type>(new_req_size); 751 const auto old_ptr = *ptr; 752 chunk_type *chunk = nullptr; 753 754 PetscFunctionBegin; 755 PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size); 756 PetscValidPointer(ptr, 2); 757 PetscValidPointer(stream, 3); 758 759 // if reallocating to zero, just free 760 if (PetscUnlikely(new_size == 0)) { 761 PetscCall(deallocate(ptr, stream)); 762 PetscFunctionReturn(0); 763 } 764 765 // search the blocks for the owning chunk 766 for (auto &block : pool_) { 767 PetscCall(block.try_find_chunk(old_ptr, &chunk)); 768 if (chunk) break; // found 769 } 770 PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr); 771 772 if (chunk->capacity() < new_size) { 773 // chunk does not have enough room, need to grab a fresh chunk and copy to it 774 *ptr = nullptr; 775 PetscCall(chunk->release(stream)); 776 PetscCall(allocate(new_size, ptr, stream)); 777 PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream)); 778 } else { 779 // chunk had enough room we can simply grow (or shrink) to fit the new size 780 PetscCall(chunk->resize(new_size)); 781 } 782 PetscFunctionReturn(0); 783 } 784 785 } // namespace memory 786 787 } // namespace Petsc 788 789 #endif // PETSC_SEGMENTEDMEMPOOL_HPP 790