1 #pragma once 2 3 #include <petsc/private/deviceimpl.h> 4 5 #include <petsc/private/cpp/macros.hpp> 6 #include <petsc/private/cpp/type_traits.hpp> 7 #include <petsc/private/cpp/utility.hpp> 8 #include <petsc/private/cpp/register_finalize.hpp> 9 #include <petsc/private/cpp/memory.hpp> 10 11 #include <limits> 12 #include <deque> 13 #include <vector> 14 15 namespace Petsc 16 { 17 18 namespace device 19 { 20 21 template <typename T> 22 class StreamBase { 23 public: 24 using id_type = int; 25 using derived_type = T; 26 27 static const id_type INVALID_ID; 28 29 // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion 30 template <typename U = T> 31 PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_()); 32 33 PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); } 34 35 template <typename E> 36 PetscErrorCode record_event(E &&event) const noexcept 37 { 38 return static_cast<const T &>(*this).record_event_(std::forward<E>(event)); 39 } 40 41 template <typename E> 42 PetscErrorCode wait_for_event(E &&event) const noexcept 43 { 44 return static_cast<const T &>(*this).wait_for_(std::forward<E>(event)); 45 } 46 47 protected: 48 constexpr StreamBase() noexcept = default; 49 50 struct default_event_type { }; 51 using default_stream_type = std::nullptr_t; 52 53 PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; } 54 55 PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; } 56 57 template <typename U = T> 58 static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept 59 { 60 return PETSC_SUCCESS; 61 } 62 63 template <typename U = T> 64 static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept 65 { 66 return PETSC_SUCCESS; 67 } 68 }; 69 70 template <typename T> 71 const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1; 72 73 struct DefaultStream : StreamBase<DefaultStream> { 74 using stream_type = typename StreamBase<DefaultStream>::default_stream_type; 75 using id_type = typename StreamBase<DefaultStream>::id_type; 76 using event_type = typename StreamBase<DefaultStream>::default_event_type; 77 }; 78 79 } // namespace device 80 81 namespace memory 82 { 83 84 namespace impl 85 { 86 87 // ========================================================================================== 88 // MemoryChunk 89 // 90 // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning 91 // MemoryBlock and its size/capacity 92 // ========================================================================================== 93 94 template <typename EventType> 95 class MemoryChunk { 96 public: 97 using event_type = EventType; 98 using size_type = std::size_t; 99 100 MemoryChunk(size_type, size_type) noexcept; 101 explicit MemoryChunk(size_type) noexcept; 102 103 MemoryChunk(MemoryChunk &&) noexcept; 104 MemoryChunk &operator=(MemoryChunk &&) noexcept; 105 106 MemoryChunk(const MemoryChunk &) noexcept = delete; 107 MemoryChunk &operator=(const MemoryChunk &) noexcept = delete; 108 109 PETSC_NODISCARD size_type start() const noexcept { return start_; } 110 PETSC_NODISCARD size_type size() const noexcept { return size_; } 111 // REVIEW ME: 112 // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in 113 // theory only the last chunk needs to do this 114 PETSC_NODISCARD size_type capacity() const noexcept { return size_; } 115 PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); } 116 117 template <typename U> 118 PetscErrorCode release(const device::StreamBase<U> *) noexcept; 119 template <typename U> 120 PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept; 121 template <typename U> 122 PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept; 123 PetscErrorCode resize(size_type) noexcept; 124 PETSC_NODISCARD bool contains(size_type) const noexcept; 125 126 private: 127 // clang-format off 128 event_type event_{}; // event recorded when the chunk was released 129 bool open_ = true; // is this chunk open? 130 // id of the last stream to use the chunk, populated on release 131 int stream_id_ = device::DefaultStream::INVALID_ID; 132 size_type size_ = 0; // size of the chunk 133 const size_type start_ = 0; // offset from the start of the owning block 134 // clang-format on 135 136 template <typename U> 137 PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept; 138 }; 139 140 // ========================================================================================== 141 // MemoryChunk - Private API 142 // ========================================================================================== 143 144 // asks and answers the question: can this stream claim this chunk without serializing? 145 template <typename E> 146 template <typename U> 147 inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept 148 { 149 return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id()); 150 } 151 152 // ========================================================================================== 153 // MemoryChunk - Public API 154 // ========================================================================================== 155 156 template <typename E> 157 inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start) 158 { 159 } 160 161 template <typename E> 162 inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size) 163 { 164 } 165 166 template <typename E> 167 inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept : 168 event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_)) 169 { 170 } 171 172 template <typename E> 173 inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept 174 { 175 PetscFunctionBegin; 176 if (this != &other) { 177 event_ = std::move(other.event_); 178 open_ = util::exchange(other.open_, false); 179 stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID); 180 size_ = util::exchange(other.size_, 0); 181 start_ = std::move(other.start_); 182 } 183 PetscFunctionReturn(*this); 184 } 185 186 /* 187 MemoryChunk::release - release a chunk on a stream 188 189 Input Parameter: 190 . stream - the stream to release the chunk with 191 192 Notes: 193 Inserts a release operation on stream and records the state of stream at the time this 194 routine was called. 195 196 Future allocation requests which attempt to claim the chunk on the same stream may re-acquire 197 the chunk without serialization. 198 199 If another stream attempts to claim the chunk they must wait for the recorded event before 200 claiming the chunk. 201 */ 202 template <typename E> 203 template <typename U> 204 inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept 205 { 206 PetscFunctionBegin; 207 open_ = true; 208 stream_id_ = stream->get_id(); 209 PetscCall(stream->record_event(event_)); 210 PetscFunctionReturn(PETSC_SUCCESS); 211 } 212 213 /* 214 MemoryChunk::claim - attempt to claim a particular chunk 215 216 Input Parameters: 217 + stream - the stream on which to attempt to claim 218 . req_size - the requested size (in elements) to attempt to claim 219 - serialize - (optional, false) whether the claimant allows serialization 220 221 Output Parameter: 222 . success - true if the chunk was claimed, false otherwise 223 */ 224 template <typename E> 225 template <typename U> 226 inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept 227 { 228 PetscFunctionBegin; 229 if ((*success = can_claim(stream, req_size, serialize))) { 230 if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_)); 231 PetscCall(resize(req_size)); 232 open_ = false; 233 } 234 PetscFunctionReturn(PETSC_SUCCESS); 235 } 236 237 /* 238 MemoryChunk::can_claim - test whether a particular chunk can be claimed 239 240 Input Parameters: 241 + stream - the stream on which to attempt to claim 242 . req_size - the requested size (in elements) to attempt to claim 243 - serialize - whether the claimant allows serialization 244 245 Output: 246 . [return] - true if the chunk is claimable given the configuration, false otherwise 247 */ 248 template <typename E> 249 template <typename U> 250 inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept 251 { 252 if (open_ && (req_size <= capacity())) { 253 // fully compatible 254 if (stream_compat_(stream)) return true; 255 // stream wasn't compatible, but could claim if we serialized 256 if (serialize) return true; 257 // incompatible stream and did not want to serialize 258 } 259 return false; 260 } 261 262 /* 263 MemoryChunk::resize - grow a chunk to new size 264 265 Input Parameter: 266 . newsize - the new size Requested 267 268 Notes: 269 newsize cannot be larger than capacity 270 */ 271 template <typename E> 272 inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept 273 { 274 PetscFunctionBegin; 275 PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity()); 276 size_ = newsize; 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 /* 281 MemoryChunk::contains - query whether a memory chunk contains a particular offset 282 283 Input Parameters: 284 . offset - The offset from the MemoryBlock start 285 286 Notes: 287 Returns true if the chunk contains the offset, false otherwise 288 */ 289 template <typename E> 290 inline bool MemoryChunk<E>::contains(size_type offset) const noexcept 291 { 292 return (offset >= start()) && (offset < total_offset()); 293 } 294 295 // ========================================================================================== 296 // MemoryBlock 297 // 298 // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving 299 // and restoring a block is thread-safe (so may be used by multiple device streams). 300 // ========================================================================================== 301 302 template <typename T, typename AllocatorType, typename StreamType> 303 class MemoryBlock { 304 public: 305 using value_type = T; 306 using allocator_type = AllocatorType; 307 using stream_type = StreamType; 308 using event_type = typename stream_type::event_type; 309 using chunk_type = MemoryChunk<event_type>; 310 using size_type = typename chunk_type::size_type; 311 using chunk_list_type = std::vector<chunk_type>; 312 313 template <typename U> 314 MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept; 315 316 ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value); 317 318 MemoryBlock(MemoryBlock &&) noexcept; 319 MemoryBlock &operator=(MemoryBlock &&) noexcept; 320 321 // memory blocks are not copyable 322 MemoryBlock(const MemoryBlock &) = delete; 323 MemoryBlock &operator=(const MemoryBlock &) = delete; 324 325 /* --- actual functions --- */ 326 PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept; 327 PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept; 328 PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept; 329 PETSC_NODISCARD bool owns_pointer(const T *) const noexcept; 330 331 PETSC_NODISCARD size_type size() const noexcept { return size_; } 332 PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); } 333 PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); } 334 335 private: 336 value_type *mem_{}; 337 allocator_type *allocator_{}; 338 size_type size_{}; 339 chunk_list_type chunks_{}; 340 341 PetscErrorCode clear_(const stream_type *) noexcept; 342 }; 343 344 // ========================================================================================== 345 // MemoryBlock - Private API 346 // ========================================================================================== 347 348 // clear the memory block, called from destructors and move assignment/construction 349 template <typename T, typename A, typename S> 350 PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept 351 { 352 PetscFunctionBegin; 353 if (PetscLikely(mem_)) { 354 PetscCall(allocator_->deallocate(mem_, stream)); 355 mem_ = nullptr; 356 } 357 size_ = 0; 358 PetscCallCXX(chunks_.clear()); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 // ========================================================================================== 363 // MemoryBlock - Public API 364 // ========================================================================================== 365 366 // default constructor, allocates memory immediately 367 template <typename T, typename A, typename S> 368 template <typename U> 369 MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s) 370 { 371 PetscFunctionBegin; 372 PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream)); 373 PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s); 374 PetscFunctionReturnVoid(); 375 } 376 377 template <typename T, typename A, typename S> 378 MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value) 379 { 380 stream_type stream; 381 382 PetscFunctionBegin; 383 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 384 PetscFunctionReturnVoid(); 385 } 386 387 template <typename T, typename A, typename S> 388 MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_)) 389 { 390 } 391 392 template <typename T, typename A, typename S> 393 MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept 394 { 395 PetscFunctionBegin; 396 if (this != &other) { 397 stream_type stream; 398 399 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 400 mem_ = util::exchange(other.mem_, nullptr); 401 allocator_ = other.allocator_; 402 size_ = util::exchange(other.size_, 0); 403 chunks_ = std::move(other.chunks_); 404 } 405 PetscFunctionReturn(*this); 406 } 407 408 /* 409 MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise 410 */ 411 template <typename T, typename A, typename S> 412 inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept 413 { 414 // each pool is linear in memory, so it suffices to check the bounds 415 return (ptr >= mem_) && (ptr < std::next(mem_, size())); 416 } 417 418 /* 419 MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock 420 421 Input Parameters: 422 + req_size - the requested size of the allocation (in elements) 423 . ptr - ptr to fill 424 - stream - stream to fill the pointer on 425 426 Output Parameter: 427 . success - true if chunk was gotten, false otherwise 428 429 Notes: 430 If the current memory could not satisfy the memory request, ptr is unchanged 431 */ 432 template <typename T, typename A, typename S> 433 inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept 434 { 435 PetscFunctionBegin; 436 *success = false; 437 if (req_size <= size()) { 438 const auto try_create_chunk = [&]() { 439 const auto was_empty = chunks_.empty(); 440 const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset(); 441 442 PetscFunctionBegin; 443 if (block_alloced + req_size <= size()) { 444 PetscCallCXX(chunks_.emplace_back(block_alloced, req_size)); 445 PetscCall(chunks_.back().claim(stream, req_size, success)); 446 *ptr = mem_ + block_alloced; 447 if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size()); 448 } 449 PetscFunctionReturn(PETSC_SUCCESS); 450 }; 451 const auto try_find_open_chunk = [&](bool serialize = false) { 452 PetscFunctionBegin; 453 for (auto &chunk : chunks_) { 454 PetscCall(chunk.claim(stream, req_size, success, serialize)); 455 if (*success) { 456 *ptr = mem_ + chunk.start(); 457 break; 458 } 459 } 460 PetscFunctionReturn(PETSC_SUCCESS); 461 }; 462 const auto try_steal_other_stream_chunk = [&]() { 463 PetscFunctionBegin; 464 PetscCall(try_find_open_chunk(true)); 465 PetscFunctionReturn(PETSC_SUCCESS); 466 }; 467 468 // search previously distributed chunks, but only claim one if it is on the same stream 469 // as us 470 PetscCall(try_find_open_chunk()); 471 472 // if we are here we couldn't reuse one of our own chunks so check first if the pool 473 // has room for a new one 474 if (!*success) PetscCall(try_create_chunk()); 475 476 // try pruning dead chunks off the back, note we do this regardless of whether we are 477 // successful 478 while (chunks_.back().can_claim(stream, 0, false)) { 479 PetscCallCXX(chunks_.pop_back()); 480 if (chunks_.empty()) { 481 // if chunks are empty it implies we have managed to claim (and subsequently destroy) 482 // our own chunk twice! something has gone wrong 483 PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size()); 484 break; 485 } 486 } 487 488 // if previously unsuccessful see if enough space has opened up due to pruning. note that 489 // if the chunk list was emptied from the pruning this call must succeed in allocating a 490 // chunk, otherwise something is wrong 491 if (!*success) PetscCall(try_create_chunk()); 492 493 // last resort, iterate over all chunks and see if we can steal one by waiting on the 494 // current owner to finish using it 495 if (!*success) PetscCall(try_steal_other_stream_chunk()); 496 } 497 PetscFunctionReturn(PETSC_SUCCESS); 498 } 499 500 /* 501 MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock 502 503 Input Parameters: 504 + ptr - ptr to restore 505 - stream - stream to restore the pointer on 506 507 Output Parameter: 508 . success - true if chunk was restored, false otherwise 509 510 Notes: 511 ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned 512 by this MemoryBlock then it is restored on stream. The same stream may receive ptr again 513 without synchronization, but other streams may not do so until either serializing or the 514 stream is idle again. 515 */ 516 template <typename T, typename A, typename S> 517 inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept 518 { 519 chunk_type *chunk = nullptr; 520 521 PetscFunctionBegin; 522 PetscCall(try_find_chunk(*ptr, &chunk)); 523 if (chunk) { 524 PetscCall(chunk->release(stream)); 525 *ptr = nullptr; 526 *success = true; 527 } else { 528 *success = false; 529 } 530 PetscFunctionReturn(PETSC_SUCCESS); 531 } 532 533 /* 534 MemoryBlock::try_find_chunk - try to find the chunk which owns ptr 535 536 Input Parameter: 537 . ptr - the pointer to look for 538 539 Output Parameter: 540 . ret_chunk - pointer to the owning chunk or nullptr if not found 541 */ 542 template <typename T, typename A, typename S> 543 inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept 544 { 545 PetscFunctionBegin; 546 *ret_chunk = nullptr; 547 if (owns_pointer(ptr)) { 548 const auto offset = static_cast<size_type>(ptr - mem_); 549 550 for (auto &chunk : chunks_) { 551 if (chunk.contains(offset)) { 552 *ret_chunk = &chunk; 553 break; 554 } 555 } 556 557 PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size()))); 558 } 559 PetscFunctionReturn(PETSC_SUCCESS); 560 } 561 562 namespace detail 563 { 564 565 template <typename T> 566 struct real_type { 567 using type = T; 568 }; 569 570 template <> 571 struct real_type<PetscScalar> { 572 using type = PetscReal; 573 }; 574 575 } // namespace detail 576 577 template <typename T> 578 struct SegmentedMemoryPoolAllocatorBase { 579 using value_type = T; 580 using size_type = std::size_t; 581 using real_value_type = typename detail::real_type<T>::type; 582 583 template <typename U> 584 static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept; 585 template <typename U> 586 static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept; 587 template <typename U> 588 static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept; 589 template <typename U> 590 static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept; 591 template <typename U> 592 static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept; 593 }; 594 595 template <typename T> 596 template <typename U> 597 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept 598 { 599 PetscFunctionBegin; 600 PetscCall(PetscMalloc1(n, ptr)); 601 PetscFunctionReturn(PETSC_SUCCESS); 602 } 603 604 template <typename T> 605 template <typename U> 606 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept 607 { 608 PetscFunctionBegin; 609 PetscCall(PetscFree(ptr)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 template <typename T> 614 template <typename U> 615 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept 616 { 617 PetscFunctionBegin; 618 PetscCall(PetscArrayzero(ptr, n)); 619 PetscFunctionReturn(PETSC_SUCCESS); 620 } 621 622 template <typename T> 623 template <typename U> 624 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept 625 { 626 PetscFunctionBegin; 627 PetscCall(PetscArraycpy(dest, src, n)); 628 PetscFunctionReturn(PETSC_SUCCESS); 629 } 630 631 template <typename T> 632 template <typename U> 633 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept 634 { 635 using limit_type = std::numeric_limits<real_value_type>; 636 constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max(); 637 638 PetscFunctionBegin; 639 for (size_type i = 0; i < n; ++i) ptr[i] = canary; 640 PetscFunctionReturn(PETSC_SUCCESS); 641 } 642 643 } // namespace impl 644 645 // ========================================================================================== 646 // SegmentedMemoryPool 647 // 648 // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an 649 // allocated buffer. This buffer is further split into memory "chunks" which control 650 // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states: 651 // 652 // 1. Open: 653 // The chunk is free to be claimed by the next suitable allocation request. If the 654 // allocation request is made on the same stream as the chunk was deallocated on, no 655 // serialization needs to occur. If not, the allocating stream must wait for the 656 // event. Claiming the chunk "closes" the chunk. 657 // 658 // 2. Closed: 659 // The chunk has been claimed by an allocation request. It cannot be opened again until it 660 // is deallocated; doing so "opens" the chunk. 661 // 662 // Note that there does not need to be a chunk for every region, chunks are created to satisfy 663 // an allocation request. 664 // 665 // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may 666 // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation 667 // request. This region exists _only_ at the end, as there are no gaps between chunks. 668 // 669 // 670 // |----------------------------------------------------------------------------------------- 671 // | SegmentedMemoryPool 672 // | 673 // | ||-------------|| 674 // | || || ------------------------------------------------------------------- 675 // | || || | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX... 676 // | || || | | | | | | 677 // | || || | x-----x-------x-----xx---------x---------x------x-----x 678 // | || MemoryBlock || -> | ------|-------------|----------|----------------|-------- 679 // | || || | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk | 680 // | || || | --------------------------------------------------------- 681 // | || || ------------------------------------------------------------------- 682 // | ||-------------|| 683 // | || || 684 // | || ... || 685 // | || || 686 // ========================================================================================== 687 688 template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256> 689 class SegmentedMemoryPool; 690 691 // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks. 692 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 693 class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> { 694 public: 695 using value_type = MemType; 696 using stream_type = StreamType; 697 using allocator_type = AllocType; 698 using block_type = impl::MemoryBlock<value_type, allocator_type, stream_type>; 699 using pool_type = std::deque<block_type>; 700 using size_type = typename block_type::size_type; 701 702 explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value); 703 704 PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept; 705 PetscErrorCode deallocate(value_type **, const stream_type *) noexcept; 706 PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept; 707 708 private: 709 pool_type pool_; 710 allocator_type allocator_; 711 size_type chunk_size_; 712 713 PetscErrorCode make_block_(size_type, const stream_type *) noexcept; 714 715 friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>; 716 PetscErrorCode register_finalize_(const stream_type *) noexcept; 717 PetscErrorCode finalize_() noexcept; 718 719 PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept; 720 }; 721 722 // ========================================================================================== 723 // SegmentedMemoryPool - Private API 724 // ========================================================================================== 725 726 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 727 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept 728 { 729 const auto block_size = std::max(size, chunk_size_); 730 731 PetscFunctionBegin; 732 PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream)); 733 PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size())); 734 PetscFunctionReturn(PETSC_SUCCESS); 735 } 736 737 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 738 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept 739 { 740 PetscFunctionBegin; 741 PetscCall(make_block_(chunk_size_, stream)); 742 PetscFunctionReturn(PETSC_SUCCESS); 743 } 744 745 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 746 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept 747 { 748 PetscFunctionBegin; 749 PetscCallCXX(pool_.clear()); 750 chunk_size_ = DefaultChunkSize; 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 755 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept 756 { 757 auto found = false; 758 759 PetscFunctionBegin; 760 PetscCall(this->register_finalize(stream)); 761 for (auto &block : pool_) { 762 PetscCall(block.try_allocate_chunk(size, ptr, stream, &found)); 763 if (PetscLikely(found)) PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size)); 767 // if we are here we couldn't find an open block in the pool, so make a new block 768 PetscCall(make_block_(size, stream)); 769 // and assign it 770 PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found)); 771 PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size()); 772 PetscFunctionReturn(PETSC_SUCCESS); 773 } 774 775 // ========================================================================================== 776 // SegmentedMemoryPool - Public API 777 // ========================================================================================== 778 779 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 780 inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size) 781 { 782 } 783 784 /* 785 SegmentedMemoryPool::allocate - get an allocation from the memory pool 786 787 Input Parameters: 788 + req_size - size (in elements) to get 789 . ptr - the pointer to hold the allocation 790 - stream - the stream on which to get the allocation 791 792 Output Parameter: 793 . ptr - the pointer holding the allocation 794 795 Notes: 796 req_size cannot be negative. If req_size if zero, ptr is set to nullptr 797 */ 798 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 799 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept 800 { 801 value_type *ret_ptr = nullptr; 802 803 PetscFunctionBegin; 804 PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size); 805 PetscAssertPointer(ptr, 2); 806 PetscAssertPointer(stream, 3); 807 if (req_size) { 808 const auto size = static_cast<size_type>(req_size); 809 auto aligned_size = alignment == alignof(char) ? size : size + alignment; 810 void *vptr = nullptr; 811 812 PetscCall(allocate_(aligned_size, &ret_ptr, stream)); 813 vptr = ret_ptr; 814 std::align(alignment, size, vptr, aligned_size); 815 ret_ptr = reinterpret_cast<value_type *>(vptr); 816 // sets memory to NaN or infinity depending on the type to catch out uninitialized memory 817 // accesses. 818 if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream)); 819 } 820 *ptr = ret_ptr; 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } 823 824 /* 825 SegmentedMemoryPool::deallocate - release a pointer back to the memory pool 826 827 Input Parameters: 828 + ptr - the pointer to release 829 - stream - the stream to release it on 830 831 Notes: 832 If ptr is not owned by the pool it is unchanged. 833 */ 834 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 835 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept 836 { 837 PetscFunctionBegin; 838 PetscAssertPointer(ptr, 1); 839 PetscAssertPointer(stream, 2); 840 // nobody owns a nullptr, and if they do then they have bigger problems 841 if (!*ptr) PetscFunctionReturn(PETSC_SUCCESS); 842 for (auto &block : pool_) { 843 auto found = false; 844 845 PetscCall(block.try_deallocate_chunk(ptr, stream, &found)); 846 if (PetscLikely(found)) break; 847 } 848 PetscFunctionReturn(PETSC_SUCCESS); 849 } 850 851 /* 852 SegmentedMemoryPool::reallocate - Resize an allocated buffer 853 854 Input Parameters: 855 + new_req_size - the new buffer size 856 . ptr - pointer to the buffer 857 - stream - stream to resize with 858 859 Output Parameter: 860 . ptr - pointer to the new region 861 862 Notes: 863 ptr must have been allocated by the pool. 864 865 It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated). 866 */ 867 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 868 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept 869 { 870 using chunk_type = typename block_type::chunk_type; 871 872 const auto new_size = static_cast<size_type>(new_req_size); 873 const auto old_ptr = *ptr; 874 chunk_type *chunk = nullptr; 875 876 PetscFunctionBegin; 877 PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size); 878 PetscAssertPointer(ptr, 2); 879 PetscAssertPointer(stream, 3); 880 881 // if reallocating to zero, just free 882 if (PetscUnlikely(new_size == 0)) { 883 PetscCall(deallocate(ptr, stream)); 884 PetscFunctionReturn(PETSC_SUCCESS); 885 } 886 887 // search the blocks for the owning chunk 888 for (auto &block : pool_) { 889 PetscCall(block.try_find_chunk(old_ptr, &chunk)); 890 if (chunk) break; // found 891 } 892 PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr); 893 894 if (chunk->capacity() < new_size) { 895 // chunk does not have enough room, need to grab a fresh chunk and copy to it 896 *ptr = nullptr; 897 PetscCall(chunk->release(stream)); 898 PetscCall(allocate(new_size, ptr, stream)); 899 PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream)); 900 } else { 901 // chunk had enough room we can simply grow (or shrink) to fit the new size 902 PetscCall(chunk->resize(new_size)); 903 } 904 PetscFunctionReturn(PETSC_SUCCESS); 905 } 906 907 } // namespace memory 908 909 } // namespace Petsc 910