diff --git a/tests/tt_metal/tt_metal/unit_tests/allocator/test_free_list_allocator.cpp b/tests/tt_metal/tt_metal/unit_tests/allocator/test_free_list_allocator.cpp index feabe1d2a2d..d7b5ffdf52f 100644 --- a/tests/tt_metal/tt_metal/unit_tests/allocator/test_free_list_allocator.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/allocator/test_free_list_allocator.cpp @@ -23,7 +23,6 @@ TEST_F(BasicFixture, TestDirectedSeriesOfAllocDealloc) { tt::tt_metal::allocator::FreeList::SearchPolicy::FIRST ); - bool allocate_bottom_up = true; std::optional addr_0 = free_list_allocator.allocate(32, true); ASSERT_TRUE(addr_0.has_value()); EXPECT_EQ(addr_0.value(), 0); @@ -132,3 +131,104 @@ TEST_F(BasicFixture, TestDirectedSeriesOfAllocDealloc) { ASSERT_TRUE(addr_20.has_value()); EXPECT_EQ(addr_20.value(), 64); } + +TEST_F(BasicFixture, TestResizeAllocator) { + constexpr uint32_t max_size_bytes = 1024; + constexpr uint32_t min_allocation_size_bytes = 32; + constexpr uint32_t alignment = 32; + + tt::tt_metal::allocator::FreeList free_list_allocator = tt::tt_metal::allocator::FreeList( + max_size_bytes, + /*offset*/0, + min_allocation_size_bytes, + alignment, + tt::tt_metal::allocator::FreeList::SearchPolicy::FIRST + ); + + std::optional addr_0 = free_list_allocator.allocate(32, false); + ASSERT_TRUE(addr_0.has_value()); + EXPECT_EQ(addr_0.value(), 992); + + free_list_allocator.shrink_size(64, true); + + std::optional addr_1 = free_list_allocator.allocate(32, false); + ASSERT_TRUE(addr_1.has_value()); + EXPECT_EQ(addr_1.value(), 960); + + std::optional addr_2 = free_list_allocator.allocate(32, true); + ASSERT_TRUE(addr_2.has_value()); + EXPECT_EQ(addr_2.value(), 64); + + free_list_allocator.reset_size(); + + std::optional addr_3 = free_list_allocator.allocate(32, true); + ASSERT_TRUE(addr_3.has_value()); + EXPECT_EQ(addr_3.value(), 0); + + std::optional addr_4 = free_list_allocator.allocate(32, true); + ASSERT_TRUE(addr_4.has_value()); + EXPECT_EQ(addr_4.value(), 32); + + free_list_allocator.deallocate(0); + + std::optional addr_5 = free_list_allocator.allocate(64, true); + ASSERT_TRUE(addr_5.has_value()); + EXPECT_EQ(addr_5.value(), 96); + + free_list_allocator.shrink_size(32, true); + + free_list_allocator.deallocate(32); + + std::optional addr_6 = free_list_allocator.allocate(32, true); + ASSERT_TRUE(addr_6.has_value()); + EXPECT_EQ(addr_6.value(), 32); +} + +TEST_F(BasicFixture, TestDirectedResizeAllocator) { + constexpr uint32_t max_size_bytes = 1024; + constexpr uint32_t min_allocation_size_bytes = 32; + constexpr uint32_t alignment = 32; + + tt::tt_metal::allocator::FreeList free_list_allocator = tt::tt_metal::allocator::FreeList( + max_size_bytes, + /*offset*/0, + min_allocation_size_bytes, + alignment, + tt::tt_metal::allocator::FreeList::SearchPolicy::FIRST + ); + + std::optional addr_0 = free_list_allocator.allocate_at_address(32, 992); + ASSERT_TRUE(addr_0.has_value()); + EXPECT_EQ(addr_0.value(), 32); + + free_list_allocator.shrink_size(32, true); + + std::optional addr_1 = free_list_allocator.allocate(32, false); + ASSERT_TRUE(!addr_1.has_value()); + + std::optional addr_2 = free_list_allocator.allocate_at_address(0, 32); + ASSERT_TRUE(!addr_2.has_value()); + + free_list_allocator.deallocate(32); + + std::optional addr_3 = free_list_allocator.allocate(32, true); + ASSERT_TRUE(addr_3.has_value()); + EXPECT_EQ(addr_3.value(), 32); + + std::optional addr_4 = free_list_allocator.allocate(32, false); + ASSERT_TRUE(addr_4.has_value()); + EXPECT_EQ(addr_4.value(), 992); + + free_list_allocator.reset_size(); + + std::optional addr_5 = free_list_allocator.allocate(32, true); + ASSERT_TRUE(addr_5.has_value()); + EXPECT_EQ(addr_5.value(), 0); + + free_list_allocator.deallocate(32); + + std::optional addr_6 = free_list_allocator.allocate(32, true); + ASSERT_TRUE(addr_6.has_value()); + EXPECT_EQ(addr_6.value(), 32); + +} diff --git a/tests/tt_metal/tt_metal/unit_tests/allocator/test_l1_banking_allocator.cpp b/tests/tt_metal/tt_metal/unit_tests/allocator/test_l1_banking_allocator.cpp index 6c68aeeeae7..27134acd303 100644 --- a/tests/tt_metal/tt_metal/unit_tests/allocator/test_l1_banking_allocator.cpp +++ b/tests/tt_metal/tt_metal/unit_tests/allocator/test_l1_banking_allocator.cpp @@ -26,14 +26,12 @@ uint64_t get_alloc_limit(const tt::tt_metal::Device *device) { } // namespace unit_tests::test_l1_banking_allocator -// TODO: Uplift to DeviceFixture once it does not skip GS -TEST_F(BasicFixture, TestL1BuffersAllocatedTopDown) { - tt::tt_metal::Device *device = tt::tt_metal::CreateDevice(0, 1, 0); +TEST_F(DeviceSingleCardFixture, TestL1BuffersAllocatedTopDown) { std::vector alloc_sizes = {32 * 1024, 64 * 1024, 128 * 1024}; size_t total_size_bytes = 0; - uint64_t alloc_limit = unit_tests::test_l1_banking_allocator::get_alloc_limit(device); + uint64_t alloc_limit = unit_tests::test_l1_banking_allocator::get_alloc_limit(this->device_); std::vector> buffers; int alloc_size_idx = 0; @@ -44,23 +42,19 @@ TEST_F(BasicFixture, TestL1BuffersAllocatedTopDown) { if (total_buffer_size + buffer_size >= alloc_limit) { break; } - auto buffer = tt::tt_metal::Buffer::create(device, buffer_size, buffer_size, tt::tt_metal::BufferType::L1); + auto buffer = tt::tt_metal::Buffer::create(this->device_, buffer_size, buffer_size, tt::tt_metal::BufferType::L1); buffers.emplace_back(std::move(buffer)); total_buffer_size += buffer_size; - EXPECT_EQ(buffers.back()->address(), device->l1_size_per_core() - total_buffer_size); + EXPECT_EQ(buffers.back()->address(), this->device_->l1_size_per_core() - total_buffer_size); } buffers.clear(); - - tt::tt_metal::CloseDevice(device); } -// TODO: Uplift to DeviceFixture once it does not skip GS -TEST_F(BasicFixture, TestL1BuffersDoNotGrowBeyondBankSize) { - tt::tt_metal::Device *device = tt::tt_metal::CreateDevice(0, 1, 0); - uint64_t alloc_limit = unit_tests::test_l1_banking_allocator::get_alloc_limit(device); +TEST_F(DeviceSingleCardFixture, TestL1BuffersDoNotGrowBeyondBankSize) { + uint64_t alloc_limit = unit_tests::test_l1_banking_allocator::get_alloc_limit(this->device_); tt::tt_metal::InterleavedBufferConfig l1_config{ - .device=device, + .device=this->device_, .size = alloc_limit + 64, .page_size = alloc_limit + 64, .buffer_type = tt::tt_metal::BufferType::L1 @@ -69,6 +63,4 @@ TEST_F(BasicFixture, TestL1BuffersDoNotGrowBeyondBankSize) { EXPECT_ANY_THROW( auto buffer = tt::tt_metal::CreateBuffer(l1_config); ); - - tt::tt_metal::CloseDevice(device); } diff --git a/tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp b/tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp index eb556627272..0249a086e2b 100644 --- a/tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp +++ b/tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp @@ -58,11 +58,16 @@ class Algorithm { virtual void dump_blocks(std::ofstream &out) const = 0; + virtual void shrink_size(DeviceAddr shrink_size, bool bottom_up=true) = 0; + + virtual void reset_size() = 0; + protected: DeviceAddr max_size_bytes_; DeviceAddr offset_bytes_; DeviceAddr min_allocation_size_; DeviceAddr alignment_; + DeviceAddr shrink_size_ = 0; std::optional lowest_occupied_address_; }; diff --git a/tt_metal/impl/allocator/algorithms/free_list.cpp b/tt_metal/impl/allocator/algorithms/free_list.cpp index 35c564c8d78..4e8af7fa361 100644 --- a/tt_metal/impl/allocator/algorithms/free_list.cpp +++ b/tt_metal/impl/allocator/algorithms/free_list.cpp @@ -424,6 +424,83 @@ void FreeList::dump_blocks(std::ofstream &out) const { out << "\n"; } +void FreeList::shrink_size(DeviceAddr shrink_size, bool bottom_up) { + if (shrink_size == 0) { + return; + } + TT_FATAL(bottom_up, "Shrinking from the top is currently not supported"); + TT_FATAL( + shrink_size <= this->max_size_bytes_, + "Shrink size {} must be smaller than max size {}", + shrink_size, + this->max_size_bytes_); + if (this->lowest_occupied_address_.has_value()) { + TT_FATAL( + shrink_size <= *this->lowest_occupied_address_, + "Shrinking size by {} that would cut into allocated memory at address {} and is not supported", + shrink_size, + *this->lowest_occupied_address_); + } + TT_FATAL(this->shrink_size_ == 0, "Can only shrink size if it is not already shrunk"); + + // Since we know the lowest occupied addr is greater or equal to shrink size, there should be a free block at start + // with size of at least shrink size + // Case 1: There is a free block at head and its size is greater than shrink size, + // so we just need to modify its attributes + TT_ASSERT(this->free_block_head_ != nullptr, "Free block head should not be null"); + if (this->free_block_head_->size > shrink_size) { + TT_ASSERT(this->free_block_head_->address == 0, "Free block head should start at 0"); + this->free_block_head_->address = shrink_size; + this->free_block_head_->size -= shrink_size; + } + // Case 2: The free block at head is the exact shrink size, so we need to remove it + else { + // Free block head is also the block head + this->block_head_ = this->block_head_->next_block; + this->block_head_->prev_block = nullptr; + // Free block head is also the free block tail when there is only 1 free block + if (this->free_block_head_->next_free == nullptr) { + this->free_block_tail_ = nullptr; + this->free_block_head_ = nullptr; + } else { + this->free_block_head_->next_free->prev_free = nullptr; + this->free_block_head_ = this->free_block_head_->next_free; + } + } + this->max_size_bytes_ -= shrink_size; + this->shrink_size_ = shrink_size; +} + +void FreeList::reset_size() { + if (shrink_size_ == 0) { + return; + } + // Case 1: No free blocks exist + // We create a new free block which will be the free head and tail, and will also be our new block head + if (this->free_block_head_ == nullptr) { + this->free_block_head_ = boost::make_local_shared(0, this->shrink_size_); + this->free_block_head_->next_block = this->block_head_; + this->free_block_tail_ = this->free_block_head_; + this->block_head_ = this->free_block_head_; + } + // Case 2: Free blocks exist but not at the start + else if (this->free_block_head_->address != this->shrink_size_) { + auto new_free_block = boost::make_local_shared(0, this->shrink_size_); + new_free_block->next_block = this->block_head_; + new_free_block->next_free = this->free_block_head_; + this->free_block_head_->prev_free = new_free_block; + this->free_block_head_ = new_free_block; + this->block_head_ = this->free_block_head_; + } + // Case 3: There is a free block at the start and we just need to modify its attributes + else { + this->free_block_head_->address = 0; + this->free_block_head_->size += this->shrink_size_; + } + this->max_size_bytes_ += this->shrink_size_; + this->shrink_size_ = 0; +} + } // namespace allocator } // namespace tt_metal diff --git a/tt_metal/impl/allocator/algorithms/free_list.hpp b/tt_metal/impl/allocator/algorithms/free_list.hpp index 5ea371bd00a..f0756dd7803 100644 --- a/tt_metal/impl/allocator/algorithms/free_list.hpp +++ b/tt_metal/impl/allocator/algorithms/free_list.hpp @@ -35,6 +35,10 @@ class FreeList : public Algorithm { void dump_blocks(std::ofstream &out) const; + void shrink_size(DeviceAddr shrink_size, bool bottom_up=true); + + void reset_size(); + private: struct Block { Block(DeviceAddr address, DeviceAddr size) : address(address), size(size) {} diff --git a/tt_metal/impl/allocator/allocator.cpp b/tt_metal/impl/allocator/allocator.cpp index 7e760b3bf37..cfc00f31d3b 100644 --- a/tt_metal/impl/allocator/allocator.cpp +++ b/tt_metal/impl/allocator/allocator.cpp @@ -191,8 +191,21 @@ Statistics BankManager::get_statistics() const { } void BankManager::dump_blocks(std::ofstream &out) const { - if (this->allocator_) + if (this->allocator_) { this->allocator_->dump_blocks(out); + } +} + +void BankManager::shrink_size(DeviceAddr shrink_size, bool bottom_up) { + if (this->allocator_) { + this->allocator_->shrink_size(shrink_size, bottom_up); + } +} + +void BankManager::reset_size() { + if (this->allocator_) { + this->allocator_->reset_size(); + } } DeviceAddr get_unreserved_base_address(const Allocator &allocator, const HalMemType &mem_type) { @@ -379,6 +392,30 @@ void verify_safe_allocation(Allocator& allocator) { const std::unordered_set &get_allocated_buffers(const Allocator &allocator) { return allocator.allocated_buffers; } +void shrink_allocator_size( + Allocator &allocator, + const BufferType &buffer_type, + DeviceAddr shrink_size, + bool bottom_up) { + switch (buffer_type) { + case BufferType::DRAM: + allocator.dram_manager.shrink_size(shrink_size, bottom_up); + break; + case BufferType::L1: + allocator.l1_manager.shrink_size(shrink_size, bottom_up); + break; + case BufferType::L1_SMALL: + allocator.l1_small_manager.shrink_size(shrink_size, bottom_up); + break; + case BufferType::TRACE: + allocator.trace_buffer_manager.shrink_size(shrink_size, bottom_up); + break; + default: { + TT_THROW("Unsupported buffer type!"); + } + } +} + DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer) { DeviceAddr address = 0; auto page_size = buffer->page_size(); diff --git a/tt_metal/impl/allocator/allocator.hpp b/tt_metal/impl/allocator/allocator.hpp index 60e4c97f0b9..500dd42c062 100644 --- a/tt_metal/impl/allocator/allocator.hpp +++ b/tt_metal/impl/allocator/allocator.hpp @@ -58,6 +58,9 @@ class BankManager { void dump_blocks(std::ofstream &out) const; + void shrink_size(DeviceAddr shrink_size, bool bottom_up=true); + void reset_size(); + private: void deallocate_buffer_(DeviceAddr address); @@ -105,6 +108,8 @@ std::optional lowest_occupied_l1_address(const Allocator &allocator, DeviceAddr base_alloc(const AllocatorConfig & config, BankManager &bank_manager, DeviceAddr size, DeviceAddr page_size, bool bottom_up, std::optional num_shards); +void shrink_allocator_size(Allocator &allocator, const BufferType &buffer_type, DeviceAddr shrink_size, bool bottom_up=true); + DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer); void mark_allocations_unsafe(Allocator &allocator); diff --git a/tt_metal/impl/allocator/l1_banking_allocator.hpp b/tt_metal/impl/allocator/l1_banking_allocator.hpp index f133160294a..553103a4793 100644 --- a/tt_metal/impl/allocator/l1_banking_allocator.hpp +++ b/tt_metal/impl/allocator/l1_banking_allocator.hpp @@ -23,7 +23,7 @@ uint64_t alloc_at_addr_in_compute_and_storage(const AllocatorConfig &config, Ban } // namespace allocator -// Currently only designed for Grayskull. +// For Grayskull: // There are 108 (9x12) compute and storage cores where each core has one 1 MB bank with top 512 KB (non-exclusively) dedicated to L1 buffer storage. // Circular buffers can grow into L1 buffer storage space but L1 buffers cannot grow past 512 KB. // There are an additional 10 storage cores where each core has two banks of 512 KB dedicated solely to L1 buffer storage.