Skip to content

Commit

Permalink
#13655: Add limited ability to shrink allocator size from the bottom
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-aho committed Nov 1, 2024
1 parent 8ee4146 commit d52a958
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ TEST_F(BasicFixture, TestDirectedSeriesOfAllocDealloc) {
tt::tt_metal::allocator::FreeList::SearchPolicy::FIRST
);

bool allocate_bottom_up = true;
std::optional<uint64_t> addr_0 = free_list_allocator.allocate(32, true);
ASSERT_TRUE(addr_0.has_value());
EXPECT_EQ(addr_0.value(), 0);
Expand Down Expand Up @@ -132,3 +131,104 @@ TEST_F(BasicFixture, TestDirectedSeriesOfAllocDealloc) {
ASSERT_TRUE(addr_20.has_value());
EXPECT_EQ(addr_20.value(), 64);
}

TEST_F(BasicFixture, TestResizeAllocator) {
constexpr uint32_t max_size_bytes = 1024;
constexpr uint32_t min_allocation_size_bytes = 32;
constexpr uint32_t alignment = 32;

tt::tt_metal::allocator::FreeList free_list_allocator = tt::tt_metal::allocator::FreeList(
max_size_bytes,
/*offset*/0,
min_allocation_size_bytes,
alignment,
tt::tt_metal::allocator::FreeList::SearchPolicy::FIRST
);

std::optional<uint64_t> addr_0 = free_list_allocator.allocate(32, false);
ASSERT_TRUE(addr_0.has_value());
EXPECT_EQ(addr_0.value(), 992);

free_list_allocator.shrink_size(64, true);

std::optional<uint64_t> addr_1 = free_list_allocator.allocate(32, false);
ASSERT_TRUE(addr_1.has_value());
EXPECT_EQ(addr_1.value(), 960);

std::optional<uint64_t> addr_2 = free_list_allocator.allocate(32, true);
ASSERT_TRUE(addr_2.has_value());
EXPECT_EQ(addr_2.value(), 64);

free_list_allocator.reset_size();

std::optional<uint64_t> addr_3 = free_list_allocator.allocate(32, true);
ASSERT_TRUE(addr_3.has_value());
EXPECT_EQ(addr_3.value(), 0);

std::optional<uint64_t> addr_4 = free_list_allocator.allocate(32, true);
ASSERT_TRUE(addr_4.has_value());
EXPECT_EQ(addr_4.value(), 32);

free_list_allocator.deallocate(0);

std::optional<uint64_t> addr_5 = free_list_allocator.allocate(64, true);
ASSERT_TRUE(addr_5.has_value());
EXPECT_EQ(addr_5.value(), 96);

free_list_allocator.shrink_size(32, true);

free_list_allocator.deallocate(32);

std::optional<uint64_t> addr_6 = free_list_allocator.allocate(32, true);
ASSERT_TRUE(addr_6.has_value());
EXPECT_EQ(addr_6.value(), 32);
}

TEST_F(BasicFixture, TestDirectedResizeAllocator) {
constexpr uint32_t max_size_bytes = 1024;
constexpr uint32_t min_allocation_size_bytes = 32;
constexpr uint32_t alignment = 32;

tt::tt_metal::allocator::FreeList free_list_allocator = tt::tt_metal::allocator::FreeList(
max_size_bytes,
/*offset*/0,
min_allocation_size_bytes,
alignment,
tt::tt_metal::allocator::FreeList::SearchPolicy::FIRST
);

std::optional<uint64_t> addr_0 = free_list_allocator.allocate_at_address(32, 992);
ASSERT_TRUE(addr_0.has_value());
EXPECT_EQ(addr_0.value(), 32);

free_list_allocator.shrink_size(32, true);

std::optional<uint64_t> addr_1 = free_list_allocator.allocate(32, false);
ASSERT_TRUE(!addr_1.has_value());

std::optional<uint64_t> addr_2 = free_list_allocator.allocate_at_address(0, 32);
ASSERT_TRUE(!addr_2.has_value());

free_list_allocator.deallocate(32);

std::optional<uint64_t> addr_3 = free_list_allocator.allocate(32, true);
ASSERT_TRUE(addr_3.has_value());
EXPECT_EQ(addr_3.value(), 32);

std::optional<uint64_t> addr_4 = free_list_allocator.allocate(32, false);
ASSERT_TRUE(addr_4.has_value());
EXPECT_EQ(addr_4.value(), 992);

free_list_allocator.reset_size();

std::optional<uint64_t> addr_5 = free_list_allocator.allocate(32, true);
ASSERT_TRUE(addr_5.has_value());
EXPECT_EQ(addr_5.value(), 0);

free_list_allocator.deallocate(32);

std::optional<uint64_t> addr_6 = free_list_allocator.allocate(32, true);
ASSERT_TRUE(addr_6.has_value());
EXPECT_EQ(addr_6.value(), 32);

}
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,12 @@ uint64_t get_alloc_limit(const tt::tt_metal::Device *device) {

} // namespace unit_tests::test_l1_banking_allocator

// TODO: Uplift to DeviceFixture once it does not skip GS
TEST_F(BasicFixture, TestL1BuffersAllocatedTopDown) {
tt::tt_metal::Device *device = tt::tt_metal::CreateDevice(0, 1, 0);
TEST_F(DeviceSingleCardFixture, TestL1BuffersAllocatedTopDown) {

std::vector<uint32_t> alloc_sizes = {32 * 1024, 64 * 1024, 128 * 1024};
size_t total_size_bytes = 0;

uint64_t alloc_limit = unit_tests::test_l1_banking_allocator::get_alloc_limit(device);
uint64_t alloc_limit = unit_tests::test_l1_banking_allocator::get_alloc_limit(this->device_);

std::vector<std::shared_ptr<Buffer>> buffers;
int alloc_size_idx = 0;
Expand All @@ -44,23 +42,19 @@ TEST_F(BasicFixture, TestL1BuffersAllocatedTopDown) {
if (total_buffer_size + buffer_size >= alloc_limit) {
break;
}
auto buffer = tt::tt_metal::Buffer::create(device, buffer_size, buffer_size, tt::tt_metal::BufferType::L1);
auto buffer = tt::tt_metal::Buffer::create(this->device_, buffer_size, buffer_size, tt::tt_metal::BufferType::L1);
buffers.emplace_back(std::move(buffer));
total_buffer_size += buffer_size;
EXPECT_EQ(buffers.back()->address(), device->l1_size_per_core() - total_buffer_size);
EXPECT_EQ(buffers.back()->address(), this->device_->l1_size_per_core() - total_buffer_size);
}
buffers.clear();

tt::tt_metal::CloseDevice(device);
}

// TODO: Uplift to DeviceFixture once it does not skip GS
TEST_F(BasicFixture, TestL1BuffersDoNotGrowBeyondBankSize) {
tt::tt_metal::Device *device = tt::tt_metal::CreateDevice(0, 1, 0);
uint64_t alloc_limit = unit_tests::test_l1_banking_allocator::get_alloc_limit(device);
TEST_F(DeviceSingleCardFixture, TestL1BuffersDoNotGrowBeyondBankSize) {
uint64_t alloc_limit = unit_tests::test_l1_banking_allocator::get_alloc_limit(this->device_);

tt::tt_metal::InterleavedBufferConfig l1_config{
.device=device,
.device=this->device_,
.size = alloc_limit + 64,
.page_size = alloc_limit + 64,
.buffer_type = tt::tt_metal::BufferType::L1
Expand All @@ -69,6 +63,4 @@ TEST_F(BasicFixture, TestL1BuffersDoNotGrowBeyondBankSize) {
EXPECT_ANY_THROW(
auto buffer = tt::tt_metal::CreateBuffer(l1_config);
);

tt::tt_metal::CloseDevice(device);
}
5 changes: 5 additions & 0 deletions tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,16 @@ class Algorithm {

virtual void dump_blocks(std::ofstream &out) const = 0;

virtual void shrink_size(DeviceAddr shrink_size, bool bottom_up=true) = 0;

virtual void reset_size() = 0;

protected:
DeviceAddr max_size_bytes_;
DeviceAddr offset_bytes_;
DeviceAddr min_allocation_size_;
DeviceAddr alignment_;
DeviceAddr shrink_size_ = 0;
std::optional<DeviceAddr> lowest_occupied_address_;
};

Expand Down
77 changes: 77 additions & 0 deletions tt_metal/impl/allocator/algorithms/free_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,83 @@ void FreeList::dump_blocks(std::ofstream &out) const {
out << "\n";
}

void FreeList::shrink_size(DeviceAddr shrink_size, bool bottom_up) {
if (shrink_size == 0) {
return;
}
TT_FATAL(bottom_up, "Shrinking from the top is currently not supported");
TT_FATAL(
shrink_size <= this->max_size_bytes_,
"Shrink size {} must be smaller than max size {}",
shrink_size,
this->max_size_bytes_);
if (this->lowest_occupied_address_.has_value()) {
TT_FATAL(
shrink_size <= *this->lowest_occupied_address_,
"Shrinking size by {} that would cut into allocated memory at address {} and is not supported",
shrink_size,
*this->lowest_occupied_address_);
}
TT_FATAL(this->shrink_size_ == 0, "Can only shrink size if it is not already shrunk");

// Since we know the lowest occupied addr is greater or equal to shrink size, there should be a free block at start
// with size of at least shrink size
// Case 1: There is a free block at head and its size is greater than shrink size,
// so we just need to modify its attributes
TT_ASSERT(this->free_block_head_ != nullptr, "Free block head should not be null");
if (this->free_block_head_->size > shrink_size) {
TT_ASSERT(this->free_block_head_->address == 0, "Free block head should start at 0");
this->free_block_head_->address = shrink_size;
this->free_block_head_->size -= shrink_size;
}
// Case 2: The free block at head is the exact shrink size, so we need to remove it
else {
// Free block head is also the block head
this->block_head_ = this->block_head_->next_block;
this->block_head_->prev_block = nullptr;
// Free block head is also the free block tail when there is only 1 free block
if (this->free_block_head_->next_free == nullptr) {
this->free_block_tail_ = nullptr;
this->free_block_head_ = nullptr;
} else {
this->free_block_head_->next_free->prev_free = nullptr;
this->free_block_head_ = this->free_block_head_->next_free;
}
}
this->max_size_bytes_ -= shrink_size;
this->shrink_size_ = shrink_size;
}

void FreeList::reset_size() {
if (shrink_size_ == 0) {
return;
}
// Case 1: No free blocks exist
// We create a new free block which will be the free head and tail, and will also be our new block head
if (this->free_block_head_ == nullptr) {
this->free_block_head_ = boost::make_local_shared<FreeList::Block>(0, this->shrink_size_);
this->free_block_head_->next_block = this->block_head_;
this->free_block_tail_ = this->free_block_head_;
this->block_head_ = this->free_block_head_;
}
// Case 2: Free blocks exist but not at the start
else if (this->free_block_head_->address != this->shrink_size_) {
auto new_free_block = boost::make_local_shared<FreeList::Block>(0, this->shrink_size_);
new_free_block->next_block = this->block_head_;
new_free_block->next_free = this->free_block_head_;
this->free_block_head_->prev_free = new_free_block;
this->free_block_head_ = new_free_block;
this->block_head_ = this->free_block_head_;
}
// Case 3: There is a free block at the start and we just need to modify its attributes
else {
this->free_block_head_->address = 0;
this->free_block_head_->size += this->shrink_size_;
}
this->max_size_bytes_ += this->shrink_size_;
this->shrink_size_ = 0;
}

} // namespace allocator

} // namespace tt_metal
Expand Down
4 changes: 4 additions & 0 deletions tt_metal/impl/allocator/algorithms/free_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ class FreeList : public Algorithm {

void dump_blocks(std::ofstream &out) const;

void shrink_size(DeviceAddr shrink_size, bool bottom_up=true);

void reset_size();

private:
struct Block {
Block(DeviceAddr address, DeviceAddr size) : address(address), size(size) {}
Expand Down
39 changes: 38 additions & 1 deletion tt_metal/impl/allocator/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,21 @@ Statistics BankManager::get_statistics() const {
}

void BankManager::dump_blocks(std::ofstream &out) const {
if (this->allocator_)
if (this->allocator_) {
this->allocator_->dump_blocks(out);
}
}

void BankManager::shrink_size(DeviceAddr shrink_size, bool bottom_up) {
if (this->allocator_) {
this->allocator_->shrink_size(shrink_size, bottom_up);
}
}

void BankManager::reset_size() {
if (this->allocator_) {
this->allocator_->reset_size();
}
}

DeviceAddr get_unreserved_base_address(const Allocator &allocator, const HalMemType &mem_type) {
Expand Down Expand Up @@ -379,6 +392,30 @@ void verify_safe_allocation(Allocator& allocator) {

const std::unordered_set<Buffer *> &get_allocated_buffers(const Allocator &allocator) { return allocator.allocated_buffers; }

void shrink_allocator_size(
Allocator &allocator,
const BufferType &buffer_type,
DeviceAddr shrink_size,
bool bottom_up) {
switch (buffer_type) {
case BufferType::DRAM:
allocator.dram_manager.shrink_size(shrink_size, bottom_up);
break;
case BufferType::L1:
allocator.l1_manager.shrink_size(shrink_size, bottom_up);
break;
case BufferType::L1_SMALL:
allocator.l1_small_manager.shrink_size(shrink_size, bottom_up);
break;
case BufferType::TRACE:
allocator.trace_buffer_manager.shrink_size(shrink_size, bottom_up);
break;
default: {
TT_THROW("Unsupported buffer type!");
}
}
}

DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer) {
DeviceAddr address = 0;
auto page_size = buffer->page_size();
Expand Down
5 changes: 5 additions & 0 deletions tt_metal/impl/allocator/allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class BankManager {

void dump_blocks(std::ofstream &out) const;

void shrink_size(DeviceAddr shrink_size, bool bottom_up=true);
void reset_size();

private:
void deallocate_buffer_(DeviceAddr address);

Expand Down Expand Up @@ -105,6 +108,8 @@ std::optional<DeviceAddr> lowest_occupied_l1_address(const Allocator &allocator,

DeviceAddr base_alloc(const AllocatorConfig & config, BankManager &bank_manager, DeviceAddr size, DeviceAddr page_size, bool bottom_up, std::optional<uint32_t> num_shards);

void shrink_allocator_size(Allocator &allocator, const BufferType &buffer_type, DeviceAddr shrink_size, bool bottom_up=true);

DeviceAddr allocate_buffer(Allocator &allocator, DeviceAddr size, Buffer *buffer);

void mark_allocations_unsafe(Allocator &allocator);
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/allocator/l1_banking_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ uint64_t alloc_at_addr_in_compute_and_storage(const AllocatorConfig &config, Ban
} // namespace allocator


// Currently only designed for Grayskull.
// For Grayskull:
// There are 108 (9x12) compute and storage cores where each core has one 1 MB bank with top 512 KB (non-exclusively) dedicated to L1 buffer storage.
// Circular buffers can grow into L1 buffer storage space but L1 buffers cannot grow past 512 KB.
// There are an additional 10 storage cores where each core has two banks of 512 KB dedicated solely to L1 buffer storage.
Expand Down

0 comments on commit d52a958

Please sign in to comment.