1 #ifndef PETSC_SEGMENTEDMEMPOOL_HPP 2 #define PETSC_SEGMENTEDMEMPOOL_HPP 3 4 #include <petsc/private/deviceimpl.h> 5 6 #include <petsc/private/cpp/macros.hpp> 7 #include <petsc/private/cpp/type_traits.hpp> 8 #include <petsc/private/cpp/utility.hpp> 9 #include <petsc/private/cpp/register_finalize.hpp> 10 #include <petsc/private/cpp/memory.hpp> 11 12 #include <limits> 13 #include <deque> 14 #include <vector> 15 16 namespace Petsc 17 { 18 19 namespace device 20 { 21 22 template <typename T> 23 class StreamBase { 24 public: 25 using id_type = int; 26 using derived_type = T; 27 28 static const id_type INVALID_ID; 29 30 // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion 31 template <typename U = T> 32 PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_()); 33 34 PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); } 35 36 template <typename E> 37 PETSC_NODISCARD PetscErrorCode record_event(E &&event) const noexcept 38 { 39 return static_cast<const T &>(*this).record_event_(std::forward<E>(event)); 40 } 41 42 template <typename E> 43 PETSC_NODISCARD PetscErrorCode wait_for_event(E &&event) const noexcept 44 { 45 return static_cast<const T &>(*this).wait_for_(std::forward<E>(event)); 46 } 47 48 protected: 49 constexpr StreamBase() noexcept = default; 50 51 struct default_event_type { }; 52 using default_stream_type = std::nullptr_t; 53 54 PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; } 55 56 PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; } 57 58 template <typename U = T> 59 PETSC_NODISCARD static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept 60 { 61 return 0; 62 } 63 64 template <typename U = T> 65 PETSC_NODISCARD static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept 66 { 67 return 0; 68 } 69 }; 70 71 template <typename T> 72 const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1; 73 74 struct DefaultStream : StreamBase<DefaultStream> { 75 using stream_type = typename StreamBase<DefaultStream>::default_stream_type; 76 using id_type = typename StreamBase<DefaultStream>::id_type; 77 using event_type = typename StreamBase<DefaultStream>::default_event_type; 78 }; 79 80 } // namespace device 81 82 namespace memory 83 { 84 85 namespace impl 86 { 87 88 // ========================================================================================== 89 // MemoryChunk 90 // 91 // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning 92 // MemoryBlock and its size/capacity 93 // ========================================================================================== 94 95 template <typename EventType> 96 class MemoryChunk { 97 public: 98 using event_type = EventType; 99 using size_type = std::size_t; 100 101 MemoryChunk(size_type, size_type) noexcept; 102 explicit MemoryChunk(size_type) noexcept; 103 104 MemoryChunk(MemoryChunk &&) noexcept; 105 MemoryChunk &operator=(MemoryChunk &&) noexcept; 106 107 MemoryChunk(const MemoryChunk &) noexcept = delete; 108 MemoryChunk &operator=(const MemoryChunk &) noexcept = delete; 109 110 PETSC_NODISCARD size_type start() const noexcept { return start_; } 111 PETSC_NODISCARD size_type size() const noexcept { return size_; } 112 // REVIEW ME: 113 // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in 114 // theory only the last chunk needs to do this 115 PETSC_NODISCARD size_type capacity() const noexcept { return size_; } 116 PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); } 117 118 template <typename U> 119 PETSC_NODISCARD PetscErrorCode release(const device::StreamBase<U> *) noexcept; 120 template <typename U> 121 PETSC_NODISCARD PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept; 122 template <typename U> 123 PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept; 124 PETSC_NODISCARD PetscErrorCode resize(size_type) noexcept; 125 PETSC_NODISCARD bool contains(size_type) const noexcept; 126 127 private: 128 // clang-format off 129 event_type event_{}; // event recorded when the chunk was released 130 bool open_ = true; // is this chunk open? 131 // id of the last stream to use the chunk, populated on release 132 int stream_id_ = device::DefaultStream::INVALID_ID; 133 size_type size_ = 0; // size of the chunk 134 const size_type start_ = 0; // offset from the start of the owning block 135 // clang-format on 136 137 template <typename U> 138 PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept; 139 }; 140 141 // ========================================================================================== 142 // MemoryChunk - Private API 143 // ========================================================================================== 144 145 // asks and answers the question: can this stream claim this chunk without serializing? 146 template <typename E> 147 template <typename U> 148 inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept 149 { 150 return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id()); 151 } 152 153 // ========================================================================================== 154 // MemoryChunk - Public API 155 // ========================================================================================== 156 157 template <typename E> 158 inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start) 159 { 160 } 161 162 template <typename E> 163 inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size) 164 { 165 } 166 167 template <typename E> 168 inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept : 169 event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_)) 170 { 171 } 172 173 template <typename E> 174 inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept 175 { 176 PetscFunctionBegin; 177 if (this != &other) { 178 event_ = std::move(other.event_); 179 open_ = util::exchange(other.open_, false); 180 stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID); 181 size_ = util::exchange(other.size_, 0); 182 start_ = std::move(other.start_); 183 } 184 PetscFunctionReturn(*this); 185 } 186 187 /* 188 MemoryChunk::release - release a chunk on a stream 189 190 Input Parameter: 191 . stream - the stream to release the chunk with 192 193 Notes: 194 Inserts a release operation on stream and records the state of stream at the time this 195 routine was called. 196 197 Future allocation requests which attempt to claim the chunk on the same stream may re-acquire 198 the chunk without serialization. 199 200 If another stream attempts to claim the chunk they must wait for the recorded event before 201 claiming the chunk. 202 */ 203 template <typename E> 204 template <typename U> 205 inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept 206 { 207 PetscFunctionBegin; 208 open_ = true; 209 stream_id_ = stream->get_id(); 210 PetscCall(stream->record_event(event_)); 211 PetscFunctionReturn(0); 212 } 213 214 /* 215 MemoryChunk::claim - attempt to claim a particular chunk 216 217 Input Parameters: 218 + stream - the stream on which to attempt to claim 219 . req_size - the requested size (in elements) to attempt to claim 220 - serialize - (optional, false) whether the claimant allows serialization 221 222 Output Parameter: 223 . success - true if the chunk was claimed, false otherwise 224 */ 225 template <typename E> 226 template <typename U> 227 inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept 228 { 229 PetscFunctionBegin; 230 if ((*success = can_claim(stream, req_size, serialize))) { 231 if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_)); 232 PetscCall(resize(req_size)); 233 open_ = false; 234 } 235 PetscFunctionReturn(0); 236 } 237 238 /* 239 MemoryChunk::can_claim - test whether a particular chunk can be claimed 240 241 Input Parameters: 242 + stream - the stream on which to attempt to claim 243 . req_size - the requested size (in elements) to attempt to claim 244 - serialize - whether the claimant allows serialization 245 246 Output: 247 . [return] - true if the chunk is claimable given the configuration, false otherwise 248 */ 249 template <typename E> 250 template <typename U> 251 inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept 252 { 253 if (open_ && (req_size <= capacity())) { 254 // fully compatible 255 if (stream_compat_(stream)) return true; 256 // stream wasn't compatible, but could claim if we serialized 257 if (serialize) return true; 258 // incompatible stream and did not want to serialize 259 } 260 return false; 261 } 262 263 /* 264 MemoryChunk::resize - grow a chunk to new size 265 266 Input Parameter: 267 . newsize - the new size Requested 268 269 Notes: 270 newsize cannot be larger than capacity 271 */ 272 template <typename E> 273 inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept 274 { 275 PetscFunctionBegin; 276 PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity()); 277 size_ = newsize; 278 PetscFunctionReturn(0); 279 } 280 281 /* 282 MemoryChunk::contains - query whether a memory chunk contains a particular offset 283 284 Input Parameters: 285 . offset - The offset from the MemoryBlock start 286 287 Notes: 288 Returns true if the chunk contains the offset, false otherwise 289 */ 290 template <typename E> 291 inline bool MemoryChunk<E>::contains(size_type offset) const noexcept 292 { 293 return (offset >= start()) && (offset < total_offset()); 294 } 295 296 // ========================================================================================== 297 // MemoryBlock 298 // 299 // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving 300 // and restoring a block is thread-safe (so may be used by multiple device streams). 301 // ========================================================================================== 302 303 template <typename T, typename AllocatorType, typename StreamType> 304 class MemoryBlock { 305 public: 306 using value_type = T; 307 using allocator_type = AllocatorType; 308 using stream_type = StreamType; 309 using event_type = typename stream_type::event_type; 310 using chunk_type = MemoryChunk<event_type>; 311 using size_type = typename chunk_type::size_type; 312 using chunk_list_type = std::vector<chunk_type>; 313 314 template <typename U> 315 MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept; 316 317 ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value); 318 319 MemoryBlock(MemoryBlock &&) noexcept; 320 MemoryBlock &operator=(MemoryBlock &&) noexcept; 321 322 // memory blocks are not copyable 323 MemoryBlock(const MemoryBlock &) = delete; 324 MemoryBlock &operator=(const MemoryBlock &) = delete; 325 326 /* --- actual functions --- */ 327 PETSC_NODISCARD PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept; 328 PETSC_NODISCARD PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept; 329 PETSC_NODISCARD PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept; 330 PETSC_NODISCARD bool owns_pointer(const T *) const noexcept; 331 332 PETSC_NODISCARD size_type size() const noexcept { return size_; } 333 PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); } 334 PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); } 335 336 private: 337 value_type *mem_{}; 338 allocator_type *allocator_{}; 339 size_type size_{}; 340 chunk_list_type chunks_{}; 341 342 PETSC_NODISCARD PetscErrorCode clear_(const stream_type *) noexcept; 343 }; 344 345 // ========================================================================================== 346 // MemoryBlock - Private API 347 // ========================================================================================== 348 349 // clear the memory block, called from destructors and move assignment/construction 350 template <typename T, typename A, typename S> 351 PETSC_NODISCARD PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept 352 { 353 PetscFunctionBegin; 354 if (PetscLikely(mem_)) { 355 PetscCall(allocator_->deallocate(mem_, stream)); 356 mem_ = nullptr; 357 } 358 size_ = 0; 359 PetscCallCXX(chunks_.clear()); 360 PetscFunctionReturn(0); 361 } 362 363 // ========================================================================================== 364 // MemoryBlock - Public API 365 // ========================================================================================== 366 367 // default constructor, allocates memory immediately 368 template <typename T, typename A, typename S> 369 template <typename U> 370 MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s) 371 { 372 PetscFunctionBegin; 373 PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream)); 374 PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s); 375 PetscFunctionReturnVoid(); 376 } 377 378 template <typename T, typename A, typename S> 379 MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value) 380 { 381 stream_type stream; 382 383 PetscFunctionBegin; 384 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 385 PetscFunctionReturnVoid(); 386 } 387 388 template <typename T, typename A, typename S> 389 MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_)) 390 { 391 } 392 393 template <typename T, typename A, typename S> 394 MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept 395 { 396 PetscFunctionBegin; 397 if (this != &other) { 398 stream_type stream; 399 400 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 401 mem_ = util::exchange(other.mem_, nullptr); 402 allocator_ = other.allocator_; 403 size_ = util::exchange(other.size_, 0); 404 chunks_ = std::move(other.chunks_); 405 } 406 PetscFunctionReturn(*this); 407 } 408 409 /* 410 MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise 411 */ 412 template <typename T, typename A, typename S> 413 inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept 414 { 415 // each pool is linear in memory, so it suffices to check the bounds 416 return (ptr >= mem_) && (ptr < std::next(mem_, size())); 417 } 418 419 /* 420 MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock 421 422 Input Parameters: 423 + req_size - the requested size of the allocation (in elements) 424 . ptr - ptr to fill 425 - stream - stream to fill the pointer on 426 427 Output Parameter: 428 . success - true if chunk was gotten, false otherwise 429 430 Notes: 431 If the current memory could not satisfy the memory request, ptr is unchanged 432 */ 433 template <typename T, typename A, typename S> 434 inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept 435 { 436 PetscFunctionBegin; 437 *success = false; 438 if (req_size <= size()) { 439 const auto try_create_chunk = [&]() { 440 const auto was_empty = chunks_.empty(); 441 const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset(); 442 443 PetscFunctionBegin; 444 if (block_alloced + req_size <= size()) { 445 PetscCallCXX(chunks_.emplace_back(block_alloced, req_size)); 446 PetscCall(chunks_.back().claim(stream, req_size, success)); 447 *ptr = mem_ + block_alloced; 448 if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size()); 449 } 450 PetscFunctionReturn(0); 451 }; 452 const auto try_find_open_chunk = [&](bool serialize = false) { 453 PetscFunctionBegin; 454 for (auto &chunk : chunks_) { 455 PetscCall(chunk.claim(stream, req_size, success, serialize)); 456 if (*success) { 457 *ptr = mem_ + chunk.start(); 458 break; 459 } 460 } 461 PetscFunctionReturn(0); 462 }; 463 const auto try_steal_other_stream_chunk = [&]() { 464 PetscFunctionBegin; 465 PetscCall(try_find_open_chunk(true)); 466 PetscFunctionReturn(0); 467 }; 468 469 // search previously distributed chunks, but only claim one if it is on the same stream 470 // as us 471 PetscCall(try_find_open_chunk()); 472 473 // if we are here we couldn't reuse one of our own chunks so check first if the pool 474 // has room for a new one 475 if (!*success) PetscCall(try_create_chunk()); 476 477 // try pruning dead chunks off the back, note we do this regardless of whether we are 478 // successful 479 while (chunks_.back().can_claim(stream, 0, false)) { 480 PetscCallCXX(chunks_.pop_back()); 481 if (chunks_.empty()) { 482 // if chunks are empty it implies we have managed to claim (and subsequently destroy) 483 // our own chunk twice! something has gone wrong 484 PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size()); 485 break; 486 } 487 } 488 489 // if previously unsuccessful see if enough space has opened up due to pruning. note that 490 // if the chunk list was emptied from the pruning this call must succeed in allocating a 491 // chunk, otherwise something is wrong 492 if (!*success) PetscCall(try_create_chunk()); 493 494 // last resort, iterate over all chunks and see if we can steal one by waiting on the 495 // current owner to finish using it 496 if (!*success) PetscCall(try_steal_other_stream_chunk()); 497 } 498 PetscFunctionReturn(0); 499 } 500 501 /* 502 MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock 503 504 Input Parameters: 505 + ptr - ptr to restore 506 - stream - stream to restore the pointer on 507 508 Output Parameter: 509 . success - true if chunk was restored, false otherwise 510 511 Notes: 512 ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned 513 by this MemoryBlock then it is restored on stream. The same stream may recieve ptr again 514 without synchronization, but other streams may not do so until either serializing or the 515 stream is idle again. 516 */ 517 template <typename T, typename A, typename S> 518 inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept 519 { 520 chunk_type *chunk = nullptr; 521 522 PetscFunctionBegin; 523 PetscCall(try_find_chunk(*ptr, &chunk)); 524 if (chunk) { 525 PetscCall(chunk->release(stream)); 526 *ptr = nullptr; 527 *success = true; 528 } else { 529 *success = false; 530 } 531 PetscFunctionReturn(0); 532 } 533 534 /* 535 MemoryBlock::try_find_chunk - try to find the chunk which owns ptr 536 537 Input Parameter: 538 . ptr - the pointer to lookk for 539 540 Output Parameter: 541 . ret_chunk - pointer to the owning chunk or nullptr if not found 542 */ 543 template <typename T, typename A, typename S> 544 inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept 545 { 546 PetscFunctionBegin; 547 *ret_chunk = nullptr; 548 if (owns_pointer(ptr)) { 549 const auto offset = static_cast<size_type>(ptr - mem_); 550 551 for (auto &chunk : chunks_) { 552 if (chunk.contains(offset)) { 553 *ret_chunk = &chunk; 554 break; 555 } 556 } 557 558 PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size()))); 559 } 560 PetscFunctionReturn(0); 561 } 562 563 namespace detail 564 { 565 566 template <typename T> 567 struct real_type { 568 using type = T; 569 }; 570 571 template <> 572 struct real_type<PetscScalar> { 573 using type = PetscReal; 574 }; 575 576 } // namespace detail 577 578 template <typename T> 579 struct SegmentedMemoryPoolAllocatorBase { 580 using value_type = T; 581 using size_type = std::size_t; 582 using real_value_type = typename detail::real_type<T>::type; 583 584 template <typename U> 585 PETSC_NODISCARD static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept; 586 template <typename U> 587 PETSC_NODISCARD static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept; 588 template <typename U> 589 PETSC_NODISCARD static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept; 590 template <typename U> 591 PETSC_NODISCARD static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept; 592 template <typename U> 593 PETSC_NODISCARD static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept; 594 }; 595 596 template <typename T> 597 template <typename U> 598 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept 599 { 600 PetscFunctionBegin; 601 PetscCall(PetscMalloc1(n, ptr)); 602 PetscFunctionReturn(0); 603 } 604 605 template <typename T> 606 template <typename U> 607 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept 608 { 609 PetscFunctionBegin; 610 PetscCall(PetscFree(ptr)); 611 PetscFunctionReturn(0); 612 } 613 614 template <typename T> 615 template <typename U> 616 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept 617 { 618 PetscFunctionBegin; 619 PetscCall(PetscArrayzero(ptr, n)); 620 PetscFunctionReturn(0); 621 } 622 623 template <typename T> 624 template <typename U> 625 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept 626 { 627 PetscFunctionBegin; 628 PetscCall(PetscArraycpy(dest, src, n)); 629 PetscFunctionReturn(0); 630 } 631 632 template <typename T> 633 template <typename U> 634 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept 635 { 636 using limit_type = std::numeric_limits<real_value_type>; 637 constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max(); 638 639 PetscFunctionBegin; 640 for (size_type i = 0; i < n; ++i) ptr[i] = canary; 641 PetscFunctionReturn(0); 642 } 643 644 } // namespace impl 645 646 // ========================================================================================== 647 // SegmentedMemoryPool 648 // 649 // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an 650 // allocated buffer. This buffer is further split into memory "chunks" which control 651 // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states: 652 // 653 // 1. Open: 654 // The chunk is free to be claimed by the next suitable allocation request. If the 655 // allocation request is made on the same stream as the chunk was deallocated on, no 656 // serialization needs to occur. If not, the allocating stream must wait for the 657 // event. Claiming the chunk "closes" the chunk. 658 // 659 // 2. Closed: 660 // The chunk has been claimed by an allocation request. It cannot be opened again until it 661 // is deallocated; doing so "opens" the chunk. 662 // 663 // Note that there does not need to be a chunk for every region, chunks are created to satisfy 664 // an allocation request. 665 // 666 // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may 667 // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation 668 // request. This region exists _only_ at the end, as there are no gaps between chunks. 669 // 670 // 671 // |----------------------------------------------------------------------------------------- 672 // | SegmentedMemoryPool 673 // | 674 // | ||-------------|| 675 // | || || ------------------------------------------------------------------- 676 // | || || | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX... 677 // | || || | | | | | | 678 // | || || | x-----x-------x-----xx---------x---------x------x-----x 679 // | || MemoryBlock || -> | ------|-------------|----------|----------------|-------- 680 // | || || | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk | 681 // | || || | --------------------------------------------------------- 682 // | || || ------------------------------------------------------------------- 683 // | ||-------------|| 684 // | || || 685 // | || ... || 686 // | || || 687 // ========================================================================================== 688 689 template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256> 690 class SegmentedMemoryPool; 691 692 // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks. 693 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 694 class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> { 695 public: 696 using value_type = MemType; 697 using stream_type = StreamType; 698 using allocator_type = AllocType; 699 using block_type = impl::MemoryBlock<value_type, allocator_type, stream_type>; 700 using pool_type = std::deque<block_type>; 701 using size_type = typename block_type::size_type; 702 703 explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value); 704 705 PETSC_NODISCARD PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept; 706 PETSC_NODISCARD PetscErrorCode deallocate(value_type **, const stream_type *) noexcept; 707 PETSC_NODISCARD PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept; 708 709 private: 710 pool_type pool_; 711 allocator_type allocator_; 712 size_type chunk_size_; 713 714 PETSC_NODISCARD PetscErrorCode make_block_(size_type, const stream_type *) noexcept; 715 716 friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>; 717 PETSC_NODISCARD PetscErrorCode register_finalize_(const stream_type *) noexcept; 718 PETSC_NODISCARD PetscErrorCode finalize_() noexcept; 719 720 PETSC_NODISCARD PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept; 721 }; 722 723 // ========================================================================================== 724 // SegmentedMemoryPool - Private API 725 // ========================================================================================== 726 727 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 728 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept 729 { 730 const auto block_size = std::max(size, chunk_size_); 731 732 PetscFunctionBegin; 733 PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream)); 734 PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size())); 735 PetscFunctionReturn(0); 736 } 737 738 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 739 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept 740 { 741 PetscFunctionBegin; 742 PetscCall(make_block_(chunk_size_, stream)); 743 PetscFunctionReturn(0); 744 } 745 746 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 747 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept 748 { 749 PetscFunctionBegin; 750 PetscCallCXX(pool_.clear()); 751 chunk_size_ = DefaultChunkSize; 752 PetscFunctionReturn(0); 753 } 754 755 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 756 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept 757 { 758 auto found = false; 759 760 PetscFunctionBegin; 761 PetscCall(this->register_finalize(PETSC_COMM_SELF, stream)); 762 for (auto &block : pool_) { 763 PetscCall(block.try_allocate_chunk(size, ptr, stream, &found)); 764 if (PetscLikely(found)) PetscFunctionReturn(0); 765 } 766 767 PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size)); 768 // if we are here we couldn't find an open block in the pool, so make a new block 769 PetscCall(make_block_(size, stream)); 770 // and assign it 771 PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found)); 772 PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size()); 773 PetscFunctionReturn(0); 774 } 775 776 // ========================================================================================== 777 // SegmentedMemoryPool - Public API 778 // ========================================================================================== 779 780 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 781 inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size) 782 { 783 } 784 785 /* 786 SegmentedMemoryPool::allocate - get an allocation from the memory pool 787 788 Input Parameters: 789 + req_size - size (in elements) to get 790 . ptr - the pointer to hold the allocation 791 - stream - the stream on which to get the allocation 792 793 Output Parameter: 794 . ptr - the pointer holding the allocation 795 796 Notes: 797 req_size cannot be negative. If req_size if zero, ptr is set to nullptr 798 */ 799 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 800 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept 801 { 802 value_type *ret_ptr = nullptr; 803 804 PetscFunctionBegin; 805 PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size); 806 PetscValidPointer(ptr, 2); 807 PetscValidPointer(stream, 3); 808 if (req_size) { 809 const auto size = static_cast<size_type>(req_size); 810 auto aligned_size = alignment == alignof(char) ? size : size + alignment; 811 void *vptr = nullptr; 812 813 PetscCall(allocate_(aligned_size, &ret_ptr, stream)); 814 vptr = ret_ptr; 815 std::align(alignment, size, vptr, aligned_size); 816 ret_ptr = reinterpret_cast<value_type *>(vptr); 817 // sets memory to NaN or infinity depending on the type to catch out uninitialized memory 818 // accesses. 819 if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream)); 820 } 821 *ptr = ret_ptr; 822 PetscFunctionReturn(0); 823 } 824 825 /* 826 SegmentedMemoryPool::deallocate - release a pointer back to the memory pool 827 828 Input Parameters: 829 + ptr - the pointer to release 830 - stream - the stream to release it on 831 832 Notes: 833 If ptr is not owned by the pool it is unchanged. 834 */ 835 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 836 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept 837 { 838 PetscFunctionBegin; 839 PetscValidPointer(ptr, 1); 840 PetscValidPointer(stream, 2); 841 // nobody owns a nullptr, and if they do then they have bigger problems 842 if (!*ptr) PetscFunctionReturn(0); 843 for (auto &block : pool_) { 844 auto found = false; 845 846 PetscCall(block.try_deallocate_chunk(ptr, stream, &found)); 847 if (PetscLikely(found)) break; 848 } 849 PetscFunctionReturn(0); 850 } 851 852 /* 853 SegmentedMemoryPool::reallocate - Resize an allocated buffer 854 855 Input Parameters: 856 + new_req_size - the new buffer size 857 . ptr - pointer to the buffer 858 - stream - stream to resize with 859 860 Ouput Parameter: 861 . ptr - pointer to the new region 862 863 Notes: 864 ptr must have been allocated by the pool. 865 866 It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated). 867 */ 868 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 869 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept 870 { 871 using chunk_type = typename block_type::chunk_type; 872 873 const auto new_size = static_cast<size_type>(new_req_size); 874 const auto old_ptr = *ptr; 875 chunk_type *chunk = nullptr; 876 877 PetscFunctionBegin; 878 PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size); 879 PetscValidPointer(ptr, 2); 880 PetscValidPointer(stream, 3); 881 882 // if reallocating to zero, just free 883 if (PetscUnlikely(new_size == 0)) { 884 PetscCall(deallocate(ptr, stream)); 885 PetscFunctionReturn(0); 886 } 887 888 // search the blocks for the owning chunk 889 for (auto &block : pool_) { 890 PetscCall(block.try_find_chunk(old_ptr, &chunk)); 891 if (chunk) break; // found 892 } 893 PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr); 894 895 if (chunk->capacity() < new_size) { 896 // chunk does not have enough room, need to grab a fresh chunk and copy to it 897 *ptr = nullptr; 898 PetscCall(chunk->release(stream)); 899 PetscCall(allocate(new_size, ptr, stream)); 900 PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream)); 901 } else { 902 // chunk had enough room we can simply grow (or shrink) to fit the new size 903 PetscCall(chunk->resize(new_size)); 904 } 905 PetscFunctionReturn(0); 906 } 907 908 } // namespace memory 909 910 } // namespace Petsc 911 912 #endif // PETSC_SEGMENTEDMEMPOOL_HPP 913