1 #pragma once 2 3 #include <petsc/private/deviceimpl.h> 4 5 #include <petsc/private/cpp/macros.hpp> 6 #include <petsc/private/cpp/type_traits.hpp> 7 #include <petsc/private/cpp/utility.hpp> 8 #include <petsc/private/cpp/register_finalize.hpp> 9 #include <petsc/private/cpp/memory.hpp> 10 11 #include <limits> 12 #include <deque> 13 #include <vector> 14 15 namespace Petsc 16 { 17 18 namespace device 19 { 20 21 template <typename T> 22 class StreamBase { 23 public: 24 using id_type = int; 25 using derived_type = T; 26 27 static const id_type INVALID_ID; 28 29 // needed so that dependent auto works, see veccupmimpl.h for a detailed discussion 30 template <typename U = T> 31 PETSC_NODISCARD auto get_stream() const noexcept PETSC_DECLTYPE_AUTO_RETURNS(static_cast<const U &>(*this).get_stream_()); 32 33 PETSC_NODISCARD id_type get_id() const noexcept { return static_cast<const T &>(*this).get_id_(); } 34 35 template <typename E> 36 PetscErrorCode record_event(E &&event) const noexcept 37 { 38 return static_cast<const T &>(*this).record_event_(std::forward<E>(event)); 39 } 40 41 template <typename E> 42 PetscErrorCode wait_for_event(E &&event) const noexcept 43 { 44 return static_cast<const T &>(*this).wait_for_(std::forward<E>(event)); 45 } 46 47 protected: 48 constexpr StreamBase() noexcept = default; 49 50 struct default_event_type { }; 51 using default_stream_type = std::nullptr_t; 52 53 PETSC_NODISCARD static constexpr default_stream_type get_stream_() noexcept { return nullptr; } 54 55 PETSC_NODISCARD static constexpr id_type get_id_() noexcept { return 0; } 56 57 template <typename U = T> 58 static constexpr PetscErrorCode record_event_(const typename U::event_type &) noexcept 59 { 60 return PETSC_SUCCESS; 61 } 62 63 template <typename U = T> 64 static constexpr PetscErrorCode wait_for_(const typename U::event_type &) noexcept 65 { 66 return PETSC_SUCCESS; 67 } 68 }; 69 70 template <typename T> 71 const typename StreamBase<T>::id_type StreamBase<T>::INVALID_ID = -1; 72 73 struct DefaultStream : StreamBase<DefaultStream> { 74 using stream_type = typename StreamBase<DefaultStream>::default_stream_type; 75 using id_type = typename StreamBase<DefaultStream>::id_type; 76 using event_type = typename StreamBase<DefaultStream>::default_event_type; 77 }; 78 79 } // namespace device 80 81 namespace memory 82 { 83 84 namespace impl 85 { 86 87 // ========================================================================================== 88 // MemoryChunk 89 // 90 // Represents a checked-out region of a MemoryBlock. Tracks the offset into the owning 91 // MemoryBlock and its size/capacity 92 // ========================================================================================== 93 94 template <typename EventType> 95 class MemoryChunk { 96 public: 97 using event_type = EventType; 98 using size_type = std::size_t; 99 100 MemoryChunk(size_type, size_type) noexcept; 101 explicit MemoryChunk(size_type) noexcept; 102 103 MemoryChunk(MemoryChunk &&) noexcept; 104 MemoryChunk &operator=(MemoryChunk &&) noexcept; 105 106 MemoryChunk(const MemoryChunk &) noexcept = delete; 107 MemoryChunk &operator=(const MemoryChunk &) noexcept = delete; 108 109 PETSC_NODISCARD size_type start() const noexcept { return start_; } 110 PETSC_NODISCARD size_type size() const noexcept { return size_; } 111 // REVIEW ME: 112 // make this an actual field, normally each chunk shrinks_to_fit() on begin claimed, but in 113 // theory only the last chunk needs to do this 114 PETSC_NODISCARD size_type capacity() const noexcept { return size_; } 115 PETSC_NODISCARD size_type total_offset() const noexcept { return start() + size(); } 116 117 template <typename U> 118 PetscErrorCode release(const device::StreamBase<U> *) noexcept; 119 template <typename U> 120 PetscErrorCode claim(const device::StreamBase<U> *, size_type, bool *, bool = false) noexcept; 121 template <typename U> 122 PETSC_NODISCARD bool can_claim(const device::StreamBase<U> *, size_type, bool) const noexcept; 123 PetscErrorCode resize(size_type) noexcept; 124 PETSC_NODISCARD bool contains(size_type) const noexcept; 125 126 private: 127 event_type event_{}; // event recorded when the chunk was released 128 bool open_ = true; // is this chunk open? 129 int stream_id_ = device::DefaultStream::INVALID_ID; // id of the last stream to use the chunk, populated on release 130 size_type size_ = 0; // size of the chunk 131 const size_type start_ = 0; // offset from the start of the owning block 132 133 template <typename U> 134 PETSC_NODISCARD bool stream_compat_(const device::StreamBase<U> *) const noexcept; 135 }; 136 137 // ========================================================================================== 138 // MemoryChunk - Private API 139 // ========================================================================================== 140 141 // asks and answers the question: can this stream claim this chunk without serializing? 142 template <typename E> 143 template <typename U> 144 inline bool MemoryChunk<E>::stream_compat_(const device::StreamBase<U> *strm) const noexcept 145 { 146 return (stream_id_ == strm->INVALID_ID) || (stream_id_ == strm->get_id()); 147 } 148 149 // ========================================================================================== 150 // MemoryChunk - Public API 151 // ========================================================================================== 152 153 template <typename E> 154 inline MemoryChunk<E>::MemoryChunk(size_type start, size_type size) noexcept : size_(size), start_(start) 155 { 156 } 157 158 template <typename E> 159 inline MemoryChunk<E>::MemoryChunk(size_type size) noexcept : MemoryChunk(0, size) 160 { 161 } 162 163 template <typename E> 164 inline MemoryChunk<E>::MemoryChunk(MemoryChunk<E> &&other) noexcept : 165 event_(std::move(other.event_)), open_(util::exchange(other.open_, false)), stream_id_(util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID)), size_(util::exchange(other.size_, 0)), start_(std::move(other.start_)) 166 { 167 } 168 169 template <typename E> 170 inline MemoryChunk<E> &MemoryChunk<E>::operator=(MemoryChunk<E> &&other) noexcept 171 { 172 PetscFunctionBegin; 173 if (this != &other) { 174 event_ = std::move(other.event_); 175 open_ = util::exchange(other.open_, false); 176 stream_id_ = util::exchange(other.stream_id_, device::DefaultStream::INVALID_ID); 177 size_ = util::exchange(other.size_, 0); 178 start_ = std::move(other.start_); 179 } 180 PetscFunctionReturn(*this); 181 } 182 183 /* 184 MemoryChunk::release - release a chunk on a stream 185 186 Input Parameter: 187 . stream - the stream to release the chunk with 188 189 Notes: 190 Inserts a release operation on stream and records the state of stream at the time this 191 routine was called. 192 193 Future allocation requests which attempt to claim the chunk on the same stream may re-acquire 194 the chunk without serialization. 195 196 If another stream attempts to claim the chunk they must wait for the recorded event before 197 claiming the chunk. 198 */ 199 template <typename E> 200 template <typename U> 201 inline PetscErrorCode MemoryChunk<E>::release(const device::StreamBase<U> *stream) noexcept 202 { 203 PetscFunctionBegin; 204 open_ = true; 205 stream_id_ = stream->get_id(); 206 PetscCall(stream->record_event(event_)); 207 PetscFunctionReturn(PETSC_SUCCESS); 208 } 209 210 /* 211 MemoryChunk::claim - attempt to claim a particular chunk 212 213 Input Parameters: 214 + stream - the stream on which to attempt to claim 215 . req_size - the requested size (in elements) to attempt to claim 216 - serialize - (optional, false) whether the claimant allows serialization 217 218 Output Parameter: 219 . success - true if the chunk was claimed, false otherwise 220 */ 221 template <typename E> 222 template <typename U> 223 inline PetscErrorCode MemoryChunk<E>::claim(const device::StreamBase<U> *stream, size_type req_size, bool *success, bool serialize) noexcept 224 { 225 PetscFunctionBegin; 226 if ((*success = can_claim(stream, req_size, serialize))) { 227 if (serialize && !stream_compat_(stream)) PetscCall(stream->wait_for_event(event_)); 228 PetscCall(resize(req_size)); 229 open_ = false; 230 } 231 PetscFunctionReturn(PETSC_SUCCESS); 232 } 233 234 /* 235 MemoryChunk::can_claim - test whether a particular chunk can be claimed 236 237 Input Parameters: 238 + stream - the stream on which to attempt to claim 239 . req_size - the requested size (in elements) to attempt to claim 240 - serialize - whether the claimant allows serialization 241 242 Output: 243 . [return] - true if the chunk is claimable given the configuration, false otherwise 244 */ 245 template <typename E> 246 template <typename U> 247 inline bool MemoryChunk<E>::can_claim(const device::StreamBase<U> *stream, size_type req_size, bool serialize) const noexcept 248 { 249 if (open_ && (req_size <= capacity())) { 250 // fully compatible 251 if (stream_compat_(stream)) return true; 252 // stream wasn't compatible, but could claim if we serialized 253 if (serialize) return true; 254 // incompatible stream and did not want to serialize 255 } 256 return false; 257 } 258 259 /* 260 MemoryChunk::resize - grow a chunk to new size 261 262 Input Parameter: 263 . newsize - the new size Requested 264 265 Notes: 266 newsize cannot be larger than capacity 267 */ 268 template <typename E> 269 inline PetscErrorCode MemoryChunk<E>::resize(size_type newsize) noexcept 270 { 271 PetscFunctionBegin; 272 PetscAssert(newsize <= capacity(), PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "New size %zu larger than capacity %zu", newsize, capacity()); 273 size_ = newsize; 274 PetscFunctionReturn(PETSC_SUCCESS); 275 } 276 277 /* 278 MemoryChunk::contains - query whether a memory chunk contains a particular offset 279 280 Input Parameters: 281 . offset - The offset from the MemoryBlock start 282 283 Notes: 284 Returns true if the chunk contains the offset, false otherwise 285 */ 286 template <typename E> 287 inline bool MemoryChunk<E>::contains(size_type offset) const noexcept 288 { 289 return (offset >= start()) && (offset < total_offset()); 290 } 291 292 // ========================================================================================== 293 // MemoryBlock 294 // 295 // A "memory block" manager, which owns the pointer to a particular memory range. Retrieving 296 // and restoring a block is thread-safe (so may be used by multiple device streams). 297 // ========================================================================================== 298 299 template <typename T, typename AllocatorType, typename StreamType> 300 class MemoryBlock { 301 public: 302 using value_type = T; 303 using allocator_type = AllocatorType; 304 using stream_type = StreamType; 305 using event_type = typename stream_type::event_type; 306 using chunk_type = MemoryChunk<event_type>; 307 using size_type = typename chunk_type::size_type; 308 using chunk_list_type = std::vector<chunk_type>; 309 310 template <typename U> 311 MemoryBlock(allocator_type *, size_type, const device::StreamBase<U> *) noexcept; 312 313 ~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value); 314 315 MemoryBlock(MemoryBlock &&) noexcept; 316 MemoryBlock &operator=(MemoryBlock &&) noexcept; 317 318 // memory blocks are not copyable 319 MemoryBlock(const MemoryBlock &) = delete; 320 MemoryBlock &operator=(const MemoryBlock &) = delete; 321 322 /* --- actual functions --- */ 323 PetscErrorCode try_allocate_chunk(size_type, T **, const stream_type *, bool *) noexcept; 324 PetscErrorCode try_deallocate_chunk(T **, const stream_type *, bool *) noexcept; 325 PetscErrorCode try_find_chunk(const T *, chunk_type **) noexcept; 326 PETSC_NODISCARD bool owns_pointer(const T *) const noexcept; 327 328 PETSC_NODISCARD size_type size() const noexcept { return size_; } 329 PETSC_NODISCARD size_type bytes() const noexcept { return sizeof(value_type) * size(); } 330 PETSC_NODISCARD size_type num_chunks() const noexcept { return chunks_.size(); } 331 332 private: 333 value_type *mem_{}; 334 allocator_type *allocator_{}; 335 size_type size_{}; 336 chunk_list_type chunks_{}; 337 338 PetscErrorCode clear_(const stream_type *) noexcept; 339 }; 340 341 // ========================================================================================== 342 // MemoryBlock - Private API 343 // ========================================================================================== 344 345 // clear the memory block, called from destructors and move assignment/construction 346 template <typename T, typename A, typename S> 347 PetscErrorCode MemoryBlock<T, A, S>::clear_(const stream_type *stream) noexcept 348 { 349 PetscFunctionBegin; 350 if (PetscLikely(mem_)) { 351 PetscCall(allocator_->deallocate(mem_, stream)); 352 mem_ = nullptr; 353 } 354 size_ = 0; 355 PetscCallCXX(chunks_.clear()); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 // ========================================================================================== 360 // MemoryBlock - Public API 361 // ========================================================================================== 362 363 // default constructor, allocates memory immediately 364 template <typename T, typename A, typename S> 365 template <typename U> 366 MemoryBlock<T, A, S>::MemoryBlock(allocator_type *alloc, size_type s, const device::StreamBase<U> *stream) noexcept : allocator_(alloc), size_(s) 367 { 368 PetscFunctionBegin; 369 PetscCallAbort(PETSC_COMM_SELF, alloc->allocate(&mem_, s, stream)); 370 PetscAssertAbort(mem_, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to allocate memory block of size %zu", s); 371 PetscFunctionReturnVoid(); 372 } 373 374 template <typename T, typename A, typename S> 375 MemoryBlock<T, A, S>::~MemoryBlock() noexcept(std::is_nothrow_destructible<chunk_list_type>::value) 376 { 377 stream_type stream; 378 379 PetscFunctionBegin; 380 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 381 PetscFunctionReturnVoid(); 382 } 383 384 template <typename T, typename A, typename S> 385 MemoryBlock<T, A, S>::MemoryBlock(MemoryBlock &&other) noexcept : mem_(util::exchange(other.mem_, nullptr)), allocator_(other.allocator_), size_(util::exchange(other.size_, 0)), chunks_(std::move(other.chunks_)) 386 { 387 } 388 389 template <typename T, typename A, typename S> 390 MemoryBlock<T, A, S> &MemoryBlock<T, A, S>::operator=(MemoryBlock &&other) noexcept 391 { 392 PetscFunctionBegin; 393 if (this != &other) { 394 stream_type stream; 395 396 PetscCallAbort(PETSC_COMM_SELF, clear_(&stream)); 397 mem_ = util::exchange(other.mem_, nullptr); 398 allocator_ = other.allocator_; 399 size_ = util::exchange(other.size_, 0); 400 chunks_ = std::move(other.chunks_); 401 } 402 PetscFunctionReturn(*this); 403 } 404 405 /* 406 MemoryBock::owns_pointer - returns true if this block owns a pointer, false otherwise 407 */ 408 template <typename T, typename A, typename S> 409 inline bool MemoryBlock<T, A, S>::owns_pointer(const T *ptr) const noexcept 410 { 411 // each pool is linear in memory, so it suffices to check the bounds 412 return (ptr >= mem_) && (ptr < std::next(mem_, size())); 413 } 414 415 /* 416 MemoryBlock::try_allocate_chunk - try to get a chunk from this MemoryBlock 417 418 Input Parameters: 419 + req_size - the requested size of the allocation (in elements) 420 . ptr - ptr to fill 421 - stream - stream to fill the pointer on 422 423 Output Parameter: 424 . success - true if chunk was gotten, false otherwise 425 426 Notes: 427 If the current memory could not satisfy the memory request, ptr is unchanged 428 */ 429 template <typename T, typename A, typename S> 430 inline PetscErrorCode MemoryBlock<T, A, S>::try_allocate_chunk(size_type req_size, T **ptr, const stream_type *stream, bool *success) noexcept 431 { 432 PetscFunctionBegin; 433 *success = false; 434 if (req_size <= size()) { 435 const auto try_create_chunk = [&]() { 436 const auto was_empty = chunks_.empty(); 437 const auto block_alloced = was_empty ? 0 : chunks_.back().total_offset(); 438 439 PetscFunctionBegin; 440 if (block_alloced + req_size <= size()) { 441 PetscCallCXX(chunks_.emplace_back(block_alloced, req_size)); 442 PetscCall(chunks_.back().claim(stream, req_size, success)); 443 *ptr = mem_ + block_alloced; 444 if (was_empty) PetscAssert(*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to claim chunk (of size %zu) even though block (of size %zu) was empty!", req_size, size()); 445 } 446 PetscFunctionReturn(PETSC_SUCCESS); 447 }; 448 const auto try_find_open_chunk = [&](bool serialize = false) { 449 PetscFunctionBegin; 450 for (auto &chunk : chunks_) { 451 PetscCall(chunk.claim(stream, req_size, success, serialize)); 452 if (*success) { 453 *ptr = mem_ + chunk.start(); 454 break; 455 } 456 } 457 PetscFunctionReturn(PETSC_SUCCESS); 458 }; 459 const auto try_steal_other_stream_chunk = [&]() { 460 PetscFunctionBegin; 461 PetscCall(try_find_open_chunk(true)); 462 PetscFunctionReturn(PETSC_SUCCESS); 463 }; 464 465 // search previously distributed chunks, but only claim one if it is on the same stream 466 // as us 467 PetscCall(try_find_open_chunk()); 468 469 // if we are here we couldn't reuse one of our own chunks so check first if the pool 470 // has room for a new one 471 if (!*success) PetscCall(try_create_chunk()); 472 473 // try pruning dead chunks off the back, note we do this regardless of whether we are 474 // successful 475 while (chunks_.back().can_claim(stream, 0, false)) { 476 PetscCallCXX(chunks_.pop_back()); 477 if (chunks_.empty()) { 478 // if chunks are empty it implies we have managed to claim (and subsequently destroy) 479 // our own chunk twice! something has gone wrong 480 PetscAssert(!*success, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Successfully claimed a chunk (of size %zu, from block of size %zu) but have now managed to claim it for a second time (and destroyed it)!", req_size, size()); 481 break; 482 } 483 } 484 485 // if previously unsuccessful see if enough space has opened up due to pruning. note that 486 // if the chunk list was emptied from the pruning this call must succeed in allocating a 487 // chunk, otherwise something is wrong 488 if (!*success) PetscCall(try_create_chunk()); 489 490 // last resort, iterate over all chunks and see if we can steal one by waiting on the 491 // current owner to finish using it 492 if (!*success) PetscCall(try_steal_other_stream_chunk()); 493 } 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 /* 498 MemoryBlock::try_deallocate_chunk - try to restore a chunk to this MemoryBlock 499 500 Input Parameters: 501 + ptr - ptr to restore 502 - stream - stream to restore the pointer on 503 504 Output Parameter: 505 . success - true if chunk was restored, false otherwise 506 507 Notes: 508 ptr is set to nullptr on successful restore, and is unchanged otherwise. If the ptr is owned 509 by this MemoryBlock then it is restored on stream. The same stream may receive ptr again 510 without synchronization, but other streams may not do so until either serializing or the 511 stream is idle again. 512 */ 513 template <typename T, typename A, typename S> 514 inline PetscErrorCode MemoryBlock<T, A, S>::try_deallocate_chunk(T **ptr, const stream_type *stream, bool *success) noexcept 515 { 516 chunk_type *chunk = nullptr; 517 518 PetscFunctionBegin; 519 PetscCall(try_find_chunk(*ptr, &chunk)); 520 if (chunk) { 521 PetscCall(chunk->release(stream)); 522 *ptr = nullptr; 523 *success = true; 524 } else { 525 *success = false; 526 } 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 /* 531 MemoryBlock::try_find_chunk - try to find the chunk which owns ptr 532 533 Input Parameter: 534 . ptr - the pointer to look for 535 536 Output Parameter: 537 . ret_chunk - pointer to the owning chunk or nullptr if not found 538 */ 539 template <typename T, typename A, typename S> 540 inline PetscErrorCode MemoryBlock<T, A, S>::try_find_chunk(const T *ptr, chunk_type **ret_chunk) noexcept 541 { 542 PetscFunctionBegin; 543 *ret_chunk = nullptr; 544 if (owns_pointer(ptr)) { 545 const auto offset = static_cast<size_type>(ptr - mem_); 546 547 for (auto &chunk : chunks_) { 548 if (chunk.contains(offset)) { 549 *ret_chunk = &chunk; 550 break; 551 } 552 } 553 554 PetscAssert(*ret_chunk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed to find %zu in block, even though it is within block range [%zu, %zu)", reinterpret_cast<uintptr_t>(ptr), reinterpret_cast<uintptr_t>(mem_), reinterpret_cast<uintptr_t>(std::next(mem_, size()))); 555 } 556 PetscFunctionReturn(PETSC_SUCCESS); 557 } 558 559 namespace detail 560 { 561 562 template <typename T> 563 struct real_type { 564 using type = T; 565 }; 566 567 template <> 568 struct real_type<PetscScalar> { 569 using type = PetscReal; 570 }; 571 572 } // namespace detail 573 574 template <typename T> 575 struct SegmentedMemoryPoolAllocatorBase { 576 using value_type = T; 577 using size_type = std::size_t; 578 using real_value_type = typename detail::real_type<T>::type; 579 580 template <typename U> 581 static PetscErrorCode allocate(value_type **, size_type, const device::StreamBase<U> *) noexcept; 582 template <typename U> 583 static PetscErrorCode deallocate(value_type *, const device::StreamBase<U> *) noexcept; 584 template <typename U> 585 static PetscErrorCode zero(value_type *, size_type, const device::StreamBase<U> *) noexcept; 586 template <typename U> 587 static PetscErrorCode uninitialized_copy(value_type *, const value_type *, size_type, const device::StreamBase<U> *) noexcept; 588 template <typename U> 589 static PetscErrorCode set_canary(value_type *, size_type, const device::StreamBase<U> *) noexcept; 590 }; 591 592 template <typename T> 593 template <typename U> 594 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::allocate(value_type **ptr, size_type n, const device::StreamBase<U> *) noexcept 595 { 596 PetscFunctionBegin; 597 PetscCall(PetscMalloc1(n, ptr)); 598 PetscFunctionReturn(PETSC_SUCCESS); 599 } 600 601 template <typename T> 602 template <typename U> 603 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::deallocate(value_type *ptr, const device::StreamBase<U> *) noexcept 604 { 605 PetscFunctionBegin; 606 PetscCall(PetscFree(ptr)); 607 PetscFunctionReturn(PETSC_SUCCESS); 608 } 609 610 template <typename T> 611 template <typename U> 612 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::zero(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept 613 { 614 PetscFunctionBegin; 615 PetscCall(PetscArrayzero(ptr, n)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 template <typename T> 620 template <typename U> 621 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::uninitialized_copy(value_type *dest, const value_type *src, size_type n, const device::StreamBase<U> *) noexcept 622 { 623 PetscFunctionBegin; 624 PetscCall(PetscArraycpy(dest, src, n)); 625 PetscFunctionReturn(PETSC_SUCCESS); 626 } 627 628 template <typename T> 629 template <typename U> 630 inline PetscErrorCode SegmentedMemoryPoolAllocatorBase<T>::set_canary(value_type *ptr, size_type n, const device::StreamBase<U> *) noexcept 631 { 632 using limit_type = std::numeric_limits<real_value_type>; 633 constexpr value_type canary = limit_type::has_signaling_NaN ? limit_type::signaling_NaN() : limit_type::max(); 634 635 PetscFunctionBegin; 636 for (size_type i = 0; i < n; ++i) ptr[i] = canary; 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 } // namespace impl 641 642 // ========================================================================================== 643 // SegmentedMemoryPool 644 // 645 // Stream-aware async memory allocator. Holds a list of memory "blocks" which each control an 646 // allocated buffer. This buffer is further split into memory "chunks" which control 647 // consecutive, non-overlapping regions of the block. Chunks may be in 1 of 2 states: 648 // 649 // 1. Open: 650 // The chunk is free to be claimed by the next suitable allocation request. If the 651 // allocation request is made on the same stream as the chunk was deallocated on, no 652 // serialization needs to occur. If not, the allocating stream must wait for the 653 // event. Claiming the chunk "closes" the chunk. 654 // 655 // 2. Closed: 656 // The chunk has been claimed by an allocation request. It cannot be opened again until it 657 // is deallocated; doing so "opens" the chunk. 658 // 659 // Note that there does not need to be a chunk for every region, chunks are created to satisfy 660 // an allocation request. 661 // 662 // Thus there is usually a region of "unallocated" memory at the end of the buffer, which may 663 // be claimed by a newly created chunk if existing chunks cannot satisfy the allocation 664 // request. This region exists _only_ at the end, as there are no gaps between chunks. 665 // 666 // 667 // |----------------------------------------------------------------------------------------- 668 // | SegmentedMemoryPool 669 // | 670 // | ||-------------|| 671 // | || || ------------------------------------------------------------------- 672 // | || || | AAAAAAAAAAAAAABBBBBBBCCCCCCCCCCCCCCCCCCCCDDDDDDDDDDDDDXXXXXXXX... 673 // | || || | | | | | | 674 // | || || | x-----x-------x-----xx---------x---------x------x-----x 675 // | || MemoryBlock || -> | ------|-------------|----------|----------------|-------- 676 // | || || | | MemoryChunk | MemoryChunk | MemoryChunk | MemoryChunk | 677 // | || || | --------------------------------------------------------- 678 // | || || ------------------------------------------------------------------- 679 // | ||-------------|| 680 // | || || 681 // | || ... || 682 // | || || 683 // ========================================================================================== 684 685 template <typename MemType, typename StreamType = device::DefaultStream, typename AllocType = impl::SegmentedMemoryPoolAllocatorBase<MemType>, std::size_t DefaultChunkSize = 256> 686 class SegmentedMemoryPool; 687 688 // The actual memory pool class. It is in essence just a wrapper for a list of MemoryBlocks. 689 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 690 class SegmentedMemoryPool : public RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>> { 691 public: 692 using value_type = MemType; 693 using stream_type = StreamType; 694 using allocator_type = AllocType; 695 using block_type = impl::MemoryBlock<value_type, allocator_type, stream_type>; 696 using pool_type = std::deque<block_type>; 697 using size_type = typename block_type::size_type; 698 699 explicit SegmentedMemoryPool(AllocType = AllocType{}, std::size_t = DefaultChunkSize) noexcept(std::is_nothrow_default_constructible<pool_type>::value); 700 701 PetscErrorCode allocate(PetscInt, value_type **, const stream_type *, size_type = std::alignment_of<MemType>::value) noexcept; 702 PetscErrorCode deallocate(value_type **, const stream_type *) noexcept; 703 PetscErrorCode reallocate(PetscInt, value_type **, const stream_type *) noexcept; 704 705 private: 706 pool_type pool_; 707 allocator_type allocator_; 708 size_type chunk_size_; 709 710 PetscErrorCode make_block_(size_type, const stream_type *) noexcept; 711 712 friend class RegisterFinalizeable<SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>>; 713 PetscErrorCode register_finalize_(const stream_type *) noexcept; 714 PetscErrorCode finalize_() noexcept; 715 716 PetscErrorCode allocate_(size_type, value_type **, const stream_type *) noexcept; 717 }; 718 719 // ========================================================================================== 720 // SegmentedMemoryPool - Private API 721 // ========================================================================================== 722 723 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 724 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::make_block_(size_type size, const stream_type *stream) noexcept 725 { 726 const auto block_size = std::max(size, chunk_size_); 727 728 PetscFunctionBegin; 729 PetscCallCXX(pool_.emplace_back(&allocator_, block_size, stream)); 730 PetscCall(PetscInfo(nullptr, "Allocated new block of size %zu, total %zu blocks\n", block_size, pool_.size())); 731 PetscFunctionReturn(PETSC_SUCCESS); 732 } 733 734 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 735 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::register_finalize_(const stream_type *stream) noexcept 736 { 737 PetscFunctionBegin; 738 PetscCall(make_block_(chunk_size_, stream)); 739 PetscFunctionReturn(PETSC_SUCCESS); 740 } 741 742 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 743 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::finalize_() noexcept 744 { 745 PetscFunctionBegin; 746 PetscCallCXX(pool_.clear()); 747 chunk_size_ = DefaultChunkSize; 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750 751 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 752 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate_(size_type size, value_type **ptr, const stream_type *stream) noexcept 753 { 754 auto found = false; 755 756 PetscFunctionBegin; 757 PetscCall(this->register_finalize(stream)); 758 for (auto &block : pool_) { 759 PetscCall(block.try_allocate_chunk(size, ptr, stream, &found)); 760 if (PetscLikely(found)) PetscFunctionReturn(PETSC_SUCCESS); 761 } 762 763 PetscCall(PetscInfo(nullptr, "Could not find an open block in the pool (%zu blocks) (requested size %zu), allocating new block\n", pool_.size(), size)); 764 // if we are here we couldn't find an open block in the pool, so make a new block 765 PetscCall(make_block_(size, stream)); 766 // and assign it 767 PetscCall(pool_.back().try_allocate_chunk(size, ptr, stream, &found)); 768 PetscAssert(found, PETSC_COMM_SELF, PETSC_ERR_MEM, "Failed to get a suitable memory chunk (of size %zu) from newly allocated memory block (size %zu)", size, pool_.back().size()); 769 PetscFunctionReturn(PETSC_SUCCESS); 770 } 771 772 // ========================================================================================== 773 // SegmentedMemoryPool - Public API 774 // ========================================================================================== 775 776 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 777 inline SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::SegmentedMemoryPool(AllocType alloc, std::size_t size) noexcept(std::is_nothrow_default_constructible<pool_type>::value) : allocator_(std::move(alloc)), chunk_size_(size) 778 { 779 } 780 781 /* 782 SegmentedMemoryPool::allocate - get an allocation from the memory pool 783 784 Input Parameters: 785 + req_size - size (in elements) to get 786 . ptr - the pointer to hold the allocation 787 - stream - the stream on which to get the allocation 788 789 Output Parameter: 790 . ptr - the pointer holding the allocation 791 792 Notes: 793 req_size cannot be negative. If req_size if zero, ptr is set to nullptr 794 */ 795 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 796 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::allocate(PetscInt req_size, value_type **ptr, const stream_type *stream, size_type alignment) noexcept 797 { 798 value_type *ret_ptr = nullptr; 799 800 PetscFunctionBegin; 801 PetscAssert(req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", req_size); 802 PetscAssertPointer(ptr, 2); 803 PetscAssertPointer(stream, 3); 804 if (req_size) { 805 const auto size = static_cast<size_type>(req_size); 806 auto aligned_size = alignment == alignof(char) ? size : size + alignment; 807 void *vptr = nullptr; 808 809 PetscCall(allocate_(aligned_size, &ret_ptr, stream)); 810 vptr = ret_ptr; 811 std::align(alignment, size, vptr, aligned_size); 812 ret_ptr = reinterpret_cast<value_type *>(vptr); 813 // sets memory to NaN or infinity depending on the type to catch out uninitialized memory 814 // accesses. 815 if (PetscDefined(USE_DEBUG)) PetscCall(allocator_.set_canary(ret_ptr, size, stream)); 816 } 817 *ptr = ret_ptr; 818 PetscFunctionReturn(PETSC_SUCCESS); 819 } 820 821 /* 822 SegmentedMemoryPool::deallocate - release a pointer back to the memory pool 823 824 Input Parameters: 825 + ptr - the pointer to release 826 - stream - the stream to release it on 827 828 Notes: 829 If ptr is not owned by the pool it is unchanged. 830 */ 831 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 832 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::deallocate(value_type **ptr, const stream_type *stream) noexcept 833 { 834 PetscFunctionBegin; 835 PetscAssertPointer(ptr, 1); 836 PetscAssertPointer(stream, 2); 837 // nobody owns a nullptr, and if they do then they have bigger problems 838 if (!*ptr) PetscFunctionReturn(PETSC_SUCCESS); 839 for (auto &block : pool_) { 840 auto found = false; 841 842 PetscCall(block.try_deallocate_chunk(ptr, stream, &found)); 843 if (PetscLikely(found)) break; 844 } 845 PetscFunctionReturn(PETSC_SUCCESS); 846 } 847 848 /* 849 SegmentedMemoryPool::reallocate - Resize an allocated buffer 850 851 Input Parameters: 852 + new_req_size - the new buffer size 853 . ptr - pointer to the buffer 854 - stream - stream to resize with 855 856 Output Parameter: 857 . ptr - pointer to the new region 858 859 Notes: 860 ptr must have been allocated by the pool. 861 862 It's OK to shrink the buffer, even down to 0 (in which case it is just deallocated). 863 */ 864 template <typename MemType, typename StreamType, typename AllocType, std::size_t DefaultChunkSize> 865 inline PetscErrorCode SegmentedMemoryPool<MemType, StreamType, AllocType, DefaultChunkSize>::reallocate(PetscInt new_req_size, value_type **ptr, const stream_type *stream) noexcept 866 { 867 using chunk_type = typename block_type::chunk_type; 868 869 const auto new_size = static_cast<size_type>(new_req_size); 870 const auto old_ptr = *ptr; 871 chunk_type *chunk = nullptr; 872 873 PetscFunctionBegin; 874 PetscAssert(new_req_size >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested memory amount (%" PetscInt_FMT ") must be >= 0", new_req_size); 875 PetscAssertPointer(ptr, 2); 876 PetscAssertPointer(stream, 3); 877 878 // if reallocating to zero, just free 879 if (PetscUnlikely(new_size == 0)) { 880 PetscCall(deallocate(ptr, stream)); 881 PetscFunctionReturn(PETSC_SUCCESS); 882 } 883 884 // search the blocks for the owning chunk 885 for (auto &block : pool_) { 886 PetscCall(block.try_find_chunk(old_ptr, &chunk)); 887 if (chunk) break; // found 888 } 889 PetscAssert(chunk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Memory pool does not own %p, so cannot reallocate it", *ptr); 890 891 if (chunk->capacity() < new_size) { 892 // chunk does not have enough room, need to grab a fresh chunk and copy to it 893 *ptr = nullptr; 894 PetscCall(chunk->release(stream)); 895 PetscCall(allocate(new_size, ptr, stream)); 896 PetscCall(allocator_.uninitialized_copy(*ptr, old_ptr, new_size, stream)); 897 } else { 898 // chunk had enough room we can simply grow (or shrink) to fit the new size 899 PetscCall(chunk->resize(new_size)); 900 } 901 PetscFunctionReturn(PETSC_SUCCESS); 902 } 903 904 } // namespace memory 905 906 } // namespace Petsc 907