1 #ifndef PETSC_SEGMENTEDMEMPOOL_HPP 2 #define PETSC_SEGMENTEDMEMPOOL_HPP 3 4 #include <petsc/private/deviceimpl.h> 5 6 #include <petsc/private/cpp/macros.hpp> 7 #include <petsc/private/cpp/type_traits.hpp> 8 #include <petsc/private/cpp/utility.hpp> 9 #include <petsc/private/cpp/register_finalize.hpp> 10 #include <petsc/private/cpp/memory.hpp> 11 12 #include <limits> 13 #include <deque> 14 #include <vector> 15 16 namespace Petsc { 17 18 namespace device { 19 20 template <typename T> 21 class StreamBase { 22 public: 23 using id_type = int; 24 using derived_type = T; 25 26 static const id_type INVALID_ID; 27 28 // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion 29 template <typename U = T> 30 PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_()); 31 32 PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); } 33 34 template <typename E> 35 PETSC_NODISCARD PetscErrorCode record_event(E &&event) const noexcept { 36 return static_cast<const T &>(*this).record_event_(std::forward<E>(event)); 37 } 38 39 template <typename E> 40 PETSC_NODISCARD PetscErrorCode wait_for_event(E &&event) const noexcept { 41 return static_cast<const T &>(*this).wait_for_(std::forward<E>(event)); 42 } 43 44 protected: 45 constexpr StreamBase() noexcept = default; 46 47 struct default_event_type { }; 48 using default_stream_type = std::nullptr_t; 49 50 PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; } 51 52 PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; } 53 54 template <typename U = T> 55 PETSC_NODISCARD static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept { 56 return 0; 57 } 58 59 template <typename U = T> 60 PETSC_NODISCARD static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept { 61 return 0; 62 } 63 }; 64 65 template <typename T> 66 const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1; 67 68 struct DefaultStream : StreamBase<DefaultStream> { 69 using stream_type = typename StreamBase<DefaultStream>::default_stream_type; 70 using id_type = typename StreamBase<DefaultStream>::id_type; 71 using event_type = typename StreamBase<DefaultStream>::default_event_type; 72 }; 73 74 } // namespace device 75 76 namespace memory { 77 78 namespace impl { 79 80 // ========================================================================================== 81 // MemoryChunk 82 // 83 // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning 84 // MemoryBlock and its size/capacity 85 // ========================================================================================== 86 87 template <typename EventType> 88 class MemoryChunk { 89 public: 90 using event_type = EventType; 91 using size_type = std::size_t; 92 93 MemoryChunk(size_type, size_type) noexcept; 94 explicit MemoryChunk(size_type) noexcept; 95 96 MemoryChunk(MemoryChunk &&) noexcept; 97 MemoryChunk &operator=(MemoryChunk &&) noexcept; 98 99 MemoryChunk(const MemoryChunk &) noexcept = delete; 100 MemoryChunk &operator=(const MemoryChunk &) noexcept = delete; 101 102 PETSC_NODISCARD size_type start() const noexcept { return start_; } 103 PETSC_NODISCARD size_type size() const noexcept { return size_; } 104 // REVIEW ME: 105 // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in 106 // theory only the last chunk needs to do this 107 PETSC_NODISCARD size_type capacity() const noexcept { return size_; } 108 PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); } 109 110 template <typename U> 111 PETSC_NODISCARD PetscErrorCode release(const device::StreamBase<U> *) noexcept; 112 template <typename U> 113 PETSC_NODISCARD PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept; 114 template <typename U> 115 PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept; 116 PETSC_NODISCARD PetscErrorCode resize(size_type) noexcept; 117 PETSC_NODISCARD bool contains(size_type) const noexcept; 118 119 private: 120 // clang-format off 121 event_type event_{}; // event recorded when the chunk was released 122 bool open_ = true; // is this chunk open? 123 // id of the last stream to use the chunk, populated on release 124 int stream_id_ = device::DefaultStream::INVALID_ID; 125 size_type size_ = 0; // size of the chunk 126 const size_type start_ = 0; // offset from the start of the owning block 127 // clang-format on 128 129 template <typename U> 130 PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept; 131 }; 132 133 // ========================================================================================== 134 // MemoryChunk - Private API 135 // ========================================================================================== 136 137 // asks and answers the question: can this stream claim this chunk without serializing? 138 template <typename E> 139 template <typename U> 140 inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept { 141 return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id()); 142 } 143 144 // ========================================================================================== 145 // MemoryChunk - Public API 146 // ========================================================================================== 147 148 template <typename E> 149 inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start) { } 150 151 template <typename E> 152 inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size) { } 153 154 template <typename E> 155 inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept : 156 event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_)) { } 157 158 template <typename E> 159 inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept { 160 PetscFunctionBegin; 161 if (this != &other) { 162 event_ = std::move(other.event_); 163 open_ = util::exchange(other.open_, false); 164 stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID); 165 size_ = util::exchange(other.size_, 0); 166 start_ = std::move(other.start_); 167 } 168 PetscFunctionReturn(*this); 169 } 170 171 /* 172 MemoryChunk::release - release a chunk on a stream 173 174 Input Parameter: 175 . stream - the stream to release the chunk with 176 177 Notes: 178 Inserts a release operation on stream and records the state of stream at the time this 179 routine was called. 180 181 Future allocation requests which attempt to claim the chunk on the same stream may re-acquire 182 the chunk without serialization. 183 184 If another stream attempts to claim the chunk they must wait for the recorded event before 185 claiming the chunk. 186 */ 187 template <typename E> 188 template <typename U> 189 inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept { 190 PetscFunctionBegin; 191 open_ = true; 192 stream_id_ = stream->get_id(); 193 PetscCall(stream->record_event(event_)); 194 PetscFunctionReturn(0); 195 } 196 197 /* 198 MemoryChunk::claim - attempt to claim a particular chunk 199 200 Input Parameters: 201 + stream - the stream on which to attempt to claim 202 . req_size - the requested size (in elements) to attempt to claim 203 - serialize - (optional, false) whether the claimant allows serialization 204 205 Output Parameter: 206 . success - true if the chunk was claimed, false otherwise 207 */ 208 template <typename E> 209 template <typename U> 210 inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept { 211 PetscFunctionBegin; 212 if ((*success = can_claim(stream, req_size, serialize))) { 213 if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_)); 214 PetscCall(resize(req_size)); 215 open_ = false; 216 } 217 PetscFunctionReturn(0); 218 } 219 220 /* 221 MemoryChunk::can_claim - test whether a particular chunk can be claimed 222 223 Input Parameters: 224 + stream - the stream on which to attempt to claim 225 . req_size - the requested size (in elements) to attempt to claim 226 - serialize - whether the claimant allows serialization 227 228 Output: 229 . [return] - true if the chunk is claimable given the configuration, false otherwise 230 */ 231 template <typename E> 232 template <typename U> 233 inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept { 234 if (open_ && (req_size <= capacity())) { 235 // fully compatible 236 if (stream_compat_(stream)) return true; 237 // stream wasn't compatible, but could claim if we serialized 238 if (serialize) return true; 239 // incompatible stream and did not want to serialize 240 } 241 return false; 242 } 243 244 /* 245 MemoryChunk::resize - grow a chunk to new size 246 247 Input Parameter: 248 . newsize - the new size Requested 249 250 Notes: 251 newsize cannot be larger than capacity 252 */ 253 template <typename E> 254 inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept { 255 PetscFunctionBegin; 256 PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity()); 257 size_ = newsize; 258 PetscFunctionReturn(0); 259 } 260 261 /* 262 MemoryChunk::contains - query whether a memory chunk contains a particular offset 263 264 Input Parameters: 265 . offset - The offset from the MemoryBlock start 266 267 Notes: 268 Returns true if the chunk contains the offset, false otherwise 269 */ 270 template <typename E> 271 inline bool MemoryChunk<E>::contains(size_type offset) const noexcept { 272 return (offset >= start()) && (offset < total_offset()); 273 } 274 275 // ========================================================================================== 276 // MemoryBlock 277 // 278 // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving 279 // and restoring a block is thread-safe (so may be used by multiple device streams). 280 // ========================================================================================== 281 282 template <typename T, typename AllocatorType, typename StreamType> 283 class MemoryBlock { 284 public: 285 using value_type = T; 286 using allocator_type = AllocatorType; 287 using stream_type = StreamType; 288 using event_type = typename stream_type::event_type; 289 using chunk_type = MemoryChunk<event_type>; 290 using size_type = typename chunk_type::size_type; 291 using chunk_list_type = std::vector<chunk_type>; 292 293 template <typename U> 294 MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept; 295 296 ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value); 297 298 MemoryBlock(MemoryBlock &&) noexcept; 299 MemoryBlock &operator=(MemoryBlock &&) noexcept; 300 301 // memory blocks are not copyable 302 MemoryBlock(const MemoryBlock &) = delete; 303 MemoryBlock &operator=(const MemoryBlock &) = delete; 304 305 /* --- actual functions --- */ 306 PETSC_NODISCARD PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept; 307 PETSC_NODISCARD PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept; 308 PETSC_NODISCARD PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept; 309 PETSC_NODISCARD bool owns_pointer(const T *) const noexcept; 310 311 PETSC_NODISCARD size_type size() const noexcept { return size_; } 312 PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); } 313 PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); } 314 315 private: 316 value_type *mem_{}; 317 allocator_type *allocator_{}; 318 size_type size_{}; 319 chunk_list_type chunks_{}; 320 321 PETSC_NODISCARD PetscErrorCode clear_(const stream_type *) noexcept; 322 }; 323 324 // ========================================================================================== 325 // MemoryBlock - Private API 326 // ========================================================================================== 327 328 // clear the memory block, called from destructors and move assignment/construction 329 template <typename T, typename A, typename S> 330 PETSC_NODISCARD PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept { 331 PetscFunctionBegin; 332 if (PetscLikely(mem_)) { 333 PetscCall(allocator_->deallocate(mem_, stream)); 334 mem_ = nullptr; 335 } 336 size_ = 0; 337 PetscCallCXX(chunks_.clear()); 338 PetscFunctionReturn(0); 339 } 340 341 // ========================================================================================== 342 // MemoryBlock - Public API 343 // ========================================================================================== 344 345 // default constructor, allocates memory immediately 346 template <typename T, typename A, typename S> 347 template <typename U> 348 MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s) { 349 PetscFunctionBegin; 350 PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream)); 351 PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s); 352 PetscFunctionReturnVoid(); 353 } 354 355 template <typename T, typename A, typename S> 356 MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value) { 357 stream_type stream; 358 359 PetscFunctionBegin; 360 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 361 PetscFunctionReturnVoid(); 362 } 363 364 template <typename T, typename A, typename S> 365 MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_)) { } 366 367 template <typename T, typename A, typename S> 368 MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept { 369 PetscFunctionBegin; 370 if (this != &other) { 371 stream_type stream; 372 373 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 374 mem_ = util::exchange(other.mem_, nullptr); 375 allocator_ = other.allocator_; 376 size_ = util::exchange(other.size_, 0); 377 chunks_ = std::move(other.chunks_); 378 } 379 PetscFunctionReturn(*this); 380 } 381 382 /* 383 MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise 384 */ 385 template <typename T, typename A, typename S> 386 inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept { 387 // each pool is linear in memory, so it suffices to check the bounds 388 return (ptr >= mem_) && (ptr < std::next(mem_, size())); 389 } 390 391 /* 392 MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock 393 394 Input Parameters: 395 + req_size - the requested size of the allocation (in elements) 396 . ptr - ptr to fill 397 - stream - stream to fill the pointer on 398 399 Output Parameter: 400 . success - true if chunk was gotten, false otherwise 401 402 Notes: 403 If the current memory could not satisfy the memory request, ptr is unchanged 404 */ 405 template <typename T, typename A, typename S> 406 inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept { 407 PetscFunctionBegin; 408 *success = false; 409 if (req_size <= size()) { 410 const auto try_create_chunk = [&]() { 411 const auto was_empty = chunks_.empty(); 412 const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset(); 413 414 PetscFunctionBegin; 415 if (block_alloced + req_size <= size()) { 416 PetscCallCXX(chunks_.emplace_back(block_alloced, req_size)); 417 PetscCall(chunks_.back().claim(stream, req_size, success)); 418 *ptr = mem_ + block_alloced; 419 if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size()); 420 } 421 PetscFunctionReturn(0); 422 }; 423 const auto try_find_open_chunk = [&](bool serialize = false) { 424 PetscFunctionBegin; 425 for (auto &chunk : chunks_) { 426 PetscCall(chunk.claim(stream, req_size, success, serialize)); 427 if (*success) { 428 *ptr = mem_ + chunk.start(); 429 break; 430 } 431 } 432 PetscFunctionReturn(0); 433 }; 434 const auto try_steal_other_stream_chunk = [&]() { 435 PetscFunctionBegin; 436 PetscCall(try_find_open_chunk(true)); 437 PetscFunctionReturn(0); 438 }; 439 440 // search previously distributed chunks, but only claim one if it is on the same stream 441 // as us 442 PetscCall(try_find_open_chunk()); 443 444 // if we are here we couldn't reuse one of our own chunks so check first if the pool 445 // has room for a new one 446 if (!*success) PetscCall(try_create_chunk()); 447 448 // try pruning dead chunks off the back, note we do this regardless of whether we are 449 // successful 450 while (chunks_.back().can_claim(stream, 0, false)) { 451 PetscCallCXX(chunks_.pop_back()); 452 if (chunks_.empty()) { 453 // if chunks are empty it implies we have managed to claim (and subsequently destroy) 454 // our own chunk twice! something has gone wrong 455 PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size()); 456 break; 457 } 458 } 459 460 // if previously unsuccessful see if enough space has opened up due to pruning. note that 461 // if the chunk list was emptied from the pruning this call must succeed in allocating a 462 // chunk, otherwise something is wrong 463 if (!*success) PetscCall(try_create_chunk()); 464 465 // last resort, iterate over all chunks and see if we can steal one by waiting on the 466 // current owner to finish using it 467 if (!*success) PetscCall(try_steal_other_stream_chunk()); 468 } 469 PetscFunctionReturn(0); 470 } 471 472 /* 473 MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock 474 475 Input Parameters: 476 + ptr - ptr to restore 477 - stream - stream to restore the pointer on 478 479 Output Parameter: 480 . success - true if chunk was restored, false otherwise 481 482 Notes: 483 ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned 484 by this MemoryBlock then it is restored on stream. The same stream may recieve ptr again 485 without synchronization, but other streams may not do so until either serializing or the 486 stream is idle again. 487 */ 488 template <typename T, typename A, typename S> 489 inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept { 490 chunk_type *chunk = nullptr; 491 492 PetscFunctionBegin; 493 PetscCall(try_find_chunk(*ptr, &chunk)); 494 if (chunk) { 495 PetscCall(chunk->release(stream)); 496 *ptr = nullptr; 497 *success = true; 498 } else { 499 *success = false; 500 } 501 PetscFunctionReturn(0); 502 } 503 504 /* 505 MemoryBlock::try_find_chunk - try to find the chunk which owns ptr 506 507 Input Parameter: 508 . ptr - the pointer to lookk for 509 510 Output Parameter: 511 . ret_chunk - pointer to the owning chunk or nullptr if not found 512 */ 513 template <typename T, typename A, typename S> 514 inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept { 515 PetscFunctionBegin; 516 *ret_chunk = nullptr; 517 if (owns_pointer(ptr)) { 518 const auto offset = static_cast<size_type>(ptr - mem_); 519 520 for (auto &chunk : chunks_) { 521 if (chunk.contains(offset)) { 522 *ret_chunk = &chunk; 523 break; 524 } 525 } 526 527 PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size()))); 528 } 529 PetscFunctionReturn(0); 530 } 531 532 namespace detail { 533 534 template <typename T> 535 struct real_type { 536 using type = T; 537 }; 538 539 template <> 540 struct real_type<PetscScalar> { 541 using type = PetscReal; 542 }; 543 544 } // namespace detail 545 546 template <typename T> 547 struct SegmentedMemoryPoolAllocatorBase { 548 using value_type = T; 549 using size_type = std::size_t; 550 using real_value_type = typename detail::real_type<T>::type; 551 552 template <typename U> 553 PETSC_NODISCARD static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept; 554 template <typename U> 555 PETSC_NODISCARD static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept; 556 template <typename U> 557 PETSC_NODISCARD static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept; 558 template <typename U> 559 PETSC_NODISCARD static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept; 560 template <typename U> 561 PETSC_NODISCARD static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept; 562 }; 563 564 template <typename T> 565 template <typename U> 566 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept { 567 PetscFunctionBegin; 568 PetscCall(PetscMalloc1(n, ptr)); 569 PetscFunctionReturn(0); 570 } 571 572 template <typename T> 573 template <typename U> 574 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept { 575 PetscFunctionBegin; 576 PetscCall(PetscFree(ptr)); 577 PetscFunctionReturn(0); 578 } 579 580 template <typename T> 581 template <typename U> 582 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept { 583 PetscFunctionBegin; 584 PetscCall(PetscArrayzero(ptr, n)); 585 PetscFunctionReturn(0); 586 } 587 588 template <typename T> 589 template <typename U> 590 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept { 591 PetscFunctionBegin; 592 PetscCall(PetscArraycpy(dest, src, n)); 593 PetscFunctionReturn(0); 594 } 595 596 template <typename T> 597 template <typename U> 598 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept { 599 using limit_type = std::numeric_limits<real_value_type>; 600 constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max(); 601 602 PetscFunctionBegin; 603 for (size_type i = 0; i < n; ++i) ptr[i] = canary; 604 PetscFunctionReturn(0); 605 } 606 607 } // namespace impl 608 609 // ========================================================================================== 610 // SegmentedMemoryPool 611 // 612 // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an 613 // allocated buffer. This buffer is further split into memory "chunks" which control 614 // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states: 615 // 616 // 1. Open: 617 // The chunk is free to be claimed by the next suitable allocation request. If the 618 // allocation request is made on the same stream as the chunk was deallocated on, no 619 // serialization needs to occur. If not, the allocating stream must wait for the 620 // event. Claiming the chunk "closes" the chunk. 621 // 622 // 2. Closed: 623 // The chunk has been claimed by an allocation request. It cannot be opened again until it 624 // is deallocated; doing so "opens" the chunk. 625 // 626 // Note that there does not need to be a chunk for every region, chunks are created to satisfy 627 // an allocation request. 628 // 629 // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may 630 // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation 631 // request. This region exists _only_ at the end, as there are no gaps between chunks. 632 // 633 // 634 // |----------------------------------------------------------------------------------------- 635 // | SegmentedMemoryPool 636 // | 637 // | ||-------------|| 638 // | || || ------------------------------------------------------------------- 639 // | || || | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX... 640 // | || || | | | | | | 641 // | || || | x-----x-------x-----xx---------x---------x------x-----x 642 // | || MemoryBlock || -> | ------|-------------|----------|----------------|-------- 643 // | || || | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk | 644 // | || || | --------------------------------------------------------- 645 // | || || ------------------------------------------------------------------- 646 // | ||-------------|| 647 // | || || 648 // | || ... || 649 // | || || 650 // ========================================================================================== 651 652 template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256> 653 class SegmentedMemoryPool; 654 655 // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks. 656 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 657 class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> { 658 public: 659 using value_type = MemType; 660 using stream_type = StreamType; 661 using allocator_type = AllocType; 662 using block_type = impl::MemoryBlock<value_type, allocator_type, stream_type>; 663 using pool_type = std::deque<block_type>; 664 using size_type = typename block_type::size_type; 665 666 explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value); 667 668 PETSC_NODISCARD PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept; 669 PETSC_NODISCARD PetscErrorCode deallocate(value_type **, const stream_type *) noexcept; 670 PETSC_NODISCARD PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept; 671 672 private: 673 pool_type pool_; 674 allocator_type allocator_; 675 size_type chunk_size_; 676 677 PETSC_NODISCARD PetscErrorCode make_block_(size_type, const stream_type *) noexcept; 678 679 friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>; 680 PETSC_NODISCARD PetscErrorCode register_finalize_(const stream_type *) noexcept; 681 PETSC_NODISCARD PetscErrorCode finalize_() noexcept; 682 683 PETSC_NODISCARD PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept; 684 }; 685 686 // ========================================================================================== 687 // SegmentedMemoryPool - Private API 688 // ========================================================================================== 689 690 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 691 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept { 692 const auto block_size = std::max(size, chunk_size_); 693 694 PetscFunctionBegin; 695 PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream)); 696 PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size())); 697 PetscFunctionReturn(0); 698 } 699 700 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 701 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept { 702 PetscFunctionBegin; 703 PetscCall(make_block_(chunk_size_, stream)); 704 PetscFunctionReturn(0); 705 } 706 707 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 708 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept { 709 PetscFunctionBegin; 710 PetscCallCXX(pool_.clear()); 711 chunk_size_ = DefaultChunkSize; 712 PetscFunctionReturn(0); 713 } 714 715 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 716 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept { 717 auto found = false; 718 719 PetscFunctionBegin; 720 PetscCall(this->register_finalize(PETSC_COMM_SELF, stream)); 721 for (auto &block : pool_) { 722 PetscCall(block.try_allocate_chunk(size, ptr, stream, &found)); 723 if (PetscLikely(found)) PetscFunctionReturn(0); 724 } 725 726 PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size)); 727 // if we are here we couldn't find an open block in the pool, so make a new block 728 PetscCall(make_block_(size, stream)); 729 // and assign it 730 PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found)); 731 PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size()); 732 PetscFunctionReturn(0); 733 } 734 735 // ========================================================================================== 736 // SegmentedMemoryPool - Public API 737 // ========================================================================================== 738 739 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 740 inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : 741 allocator_(std::move(alloc)), chunk_size_(size) { } 742 743 /* 744 SegmentedMemoryPool::allocate - get an allocation from the memory pool 745 746 Input Parameters: 747 + req_size - size (in elements) to get 748 . ptr - the pointer to hold the allocation 749 - stream - the stream on which to get the allocation 750 751 Output Parameter: 752 . ptr - the pointer holding the allocation 753 754 Notes: 755 req_size cannot be negative. If req_size if zero, ptr is set to nullptr 756 */ 757 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 758 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept { 759 value_type *ret_ptr = nullptr; 760 761 PetscFunctionBegin; 762 PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size); 763 PetscValidPointer(ptr, 2); 764 PetscValidPointer(stream, 3); 765 if (req_size) { 766 const auto size = static_cast<size_type>(req_size); 767 auto aligned_size = alignment == alignof(char) ? size : size + alignment; 768 void *vptr = nullptr; 769 770 PetscCall(allocate_(aligned_size, &ret_ptr, stream)); 771 vptr = ret_ptr; 772 std::align(alignment, size, vptr, aligned_size); 773 ret_ptr = reinterpret_cast<value_type *>(vptr); 774 // sets memory to NaN or infinity depending on the type to catch out uninitialized memory 775 // accesses. 776 if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream)); 777 } 778 *ptr = ret_ptr; 779 PetscFunctionReturn(0); 780 } 781 782 /* 783 SegmentedMemoryPool::deallocate - release a pointer back to the memory pool 784 785 Input Parameters: 786 + ptr - the pointer to release 787 - stream - the stream to release it on 788 789 Notes: 790 If ptr is not owned by the pool it is unchanged. 791 */ 792 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 793 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept { 794 PetscFunctionBegin; 795 PetscValidPointer(ptr, 1); 796 PetscValidPointer(stream, 2); 797 // nobody owns a nullptr, and if they do then they have bigger problems 798 if (!*ptr) PetscFunctionReturn(0); 799 for (auto &block : pool_) { 800 auto found = false; 801 802 PetscCall(block.try_deallocate_chunk(ptr, stream, &found)); 803 if (PetscLikely(found)) break; 804 } 805 PetscFunctionReturn(0); 806 } 807 808 /* 809 SegmentedMemoryPool::reallocate - Resize an allocated buffer 810 811 Input Parameters: 812 + new_req_size - the new buffer size 813 . ptr - pointer to the buffer 814 - stream - stream to resize with 815 816 Ouput Parameter: 817 . ptr - pointer to the new region 818 819 Notes: 820 ptr must have been allocated by the pool. 821 822 It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated). 823 */ 824 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 825 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept { 826 using chunk_type = typename block_type::chunk_type; 827 828 const auto new_size = static_cast<size_type>(new_req_size); 829 const auto old_ptr = *ptr; 830 chunk_type *chunk = nullptr; 831 832 PetscFunctionBegin; 833 PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size); 834 PetscValidPointer(ptr, 2); 835 PetscValidPointer(stream, 3); 836 837 // if reallocating to zero, just free 838 if (PetscUnlikely(new_size == 0)) { 839 PetscCall(deallocate(ptr, stream)); 840 PetscFunctionReturn(0); 841 } 842 843 // search the blocks for the owning chunk 844 for (auto &block : pool_) { 845 PetscCall(block.try_find_chunk(old_ptr, &chunk)); 846 if (chunk) break; // found 847 } 848 PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr); 849 850 if (chunk->capacity() < new_size) { 851 // chunk does not have enough room, need to grab a fresh chunk and copy to it 852 *ptr = nullptr; 853 PetscCall(chunk->release(stream)); 854 PetscCall(allocate(new_size, ptr, stream)); 855 PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream)); 856 } else { 857 // chunk had enough room we can simply grow (or shrink) to fit the new size 858 PetscCall(chunk->resize(new_size)); 859 } 860 PetscFunctionReturn(0); 861 } 862 863 } // namespace memory 864 865 } // namespace Petsc 866 867 #endif // PETSC_SEGMENTEDMEMPOOL_HPP 868