diff --git a/include/tvm/tir/usmp/algo/greedy.h b/include/tvm/tir/usmp/algo/greedy.h new file mode 100644 index 000000000000..f8219b64a954 --- /dev/null +++ b/include/tvm/tir/usmp/algo/greedy.h @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file include/tvm/tir/usmp/algo/greedy.h + * \brief This header file contains helper methods used in greedy algorithms + * for planning memory for USMP + */ +#pragma once +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace tvm { +namespace tir { +namespace usmp { +namespace algo { + +/*! + * \brief This is the base class for Greedy Algorithms where the sorting + * is specialized in the extended classes based on the greedy criteria. + */ +class GreedyBase { + public: + GreedyBase() {} + /*! + * \brief This function should be implemented by the extended classes to sort the BufferInfo + * objects based on a criteria and then calling PostSortAllocation. + */ + virtual Map PlanMemory(const Array& buffer_info_arr) = 0; + + protected: + /*! + * \brief Rounds up the offset to satisfy the alignement requirement + */ + size_t round_up_to_byte_alignment(const size_t& non_aligned_byte_offset, + const int& byte_alignment) { + return ((non_aligned_byte_offset + byte_alignment - 1) / byte_alignment) * byte_alignment; + } + + /*! + * \brief A helper function check whether a offset is valid given the constraints + */ + bool IsValidPlacement(const PoolInfo& candidate_pool, const size_t& next_offset, + const size_t& size_bytes) { + if (candidate_pool->size_hint_bytes == -1) { + // this means pool is not bounded + return true; + } + auto pool_size = static_cast(candidate_pool->size_hint_bytes->value); + auto max_address = next_offset + size_bytes; + if (max_address <= pool_size) { + return true; + } + return false; + } + + /*! + * \brief Selects a pool for placement in the given set of ordered pool candidates + */ + PoolInfo SelectPlacementPool( + const BufferInfo& buf_info, + const std::unordered_map& pool_offsets) { + // Here the pool candidates are ordered when it is consumed by the algorithm. + // This could be from order the user has specified. However, schedulers are + // welcome to change the order for performance reasons. + for (const auto& pool_info : buf_info->pool_candidates) { + if (pool_offsets.count(pool_info)) { + return pool_info; + } + } + CHECK(false) << "TVM USMP Error: the space available in the provided pools exceeded when " + "trying to allocate the buffer : " + << buf_info << "\n. Please increase the size_hints for memory pools."; + return PoolInfo(); + } + + /*! + * \brief This is the base allocation function that works on sorted BufferInfo objects based + * on the greedy heuristic. The sorting algorithm has to be called before calling this. + */ + Map PostSortAllocation( + const std::vector& buffer_info_vec) { + Map pool_allocations; + for (const auto& buf_info : buffer_info_vec) { + std::unordered_map pool_offset_candidates; + for (const auto& pool_info : buf_info->pool_candidates) { + // Mark pool candidates that satisfy the size constraints. + if (IsValidPlacement(pool_info, 0, buf_info->size_bytes->value)) { + pool_offset_candidates[pool_info] = 0; + } + } + + for (const auto& conflict_buf_info_obj : buf_info->conflicts) { + auto conflict_buf_info = Downcast(conflict_buf_info_obj); + size_t next_offset = 0; + // We only look at already allocated BufferInfo in-terms of conflicts. + if (pool_allocations.count(conflict_buf_info)) { + auto pool_allocation = pool_allocations[conflict_buf_info]; + next_offset = pool_allocation->byte_offset + conflict_buf_info->size_bytes; + next_offset = + round_up_to_byte_alignment(next_offset, conflict_buf_info->alignment->value); + // Checks whether the next offset in the same pool as the conflicting BufferInfo is valid. + if (IsValidPlacement(pool_allocation->pool_info, next_offset, + buf_info->size_bytes->value)) { + // There could be multiple conflicting BufferInfo in the same pool. + // Thus, we need to make sure we pick the largest offset of them all. + if (next_offset > pool_offset_candidates[pool_allocation->pool_info]) { + pool_offset_candidates[pool_allocation->pool_info] = next_offset; + } + } else { + pool_offset_candidates.erase(pool_allocation->pool_info); + } + } + } + auto selected_pool = SelectPlacementPool(buf_info, pool_offset_candidates); + pool_allocations.Set( + buf_info, PoolAllocation(selected_pool, Integer(pool_offset_candidates[selected_pool]))); + } + return pool_allocations; + } +}; + +} // namespace algo +} // namespace usmp +} // namespace tir +} // namespace tvm diff --git a/src/tir/usmp/algo/greedy.cc b/src/tir/usmp/algo/greedy.cc index 5e1ce5f289c1..70591d6a9ef9 100644 --- a/src/tir/usmp/algo/greedy.cc +++ b/src/tir/usmp/algo/greedy.cc @@ -39,6 +39,7 @@ #include #include #include +#include #include namespace tvm { @@ -46,111 +47,6 @@ namespace tir { namespace usmp { namespace algo { -/*! - * \brief This is the base class for Greedy Algorithms where the sorting - * is specialized in the extended classes based on the greedy criteria. - */ -class GreedyBase { - public: - GreedyBase() {} - /*! - * \brief This function should be implemented by the extended classes to sort the BufferInfo - * objects based on a criteria and then calling PostSortAllocation. - */ - virtual Map PlanMemory(const Array& buffer_info_arr) = 0; - - protected: - /*! - * \brief Rounds up the offset to satisfy the alignement requirement - */ - size_t round_up_to_byte_alignment(const size_t& non_aligned_byte_offset, - const int& byte_alignment) { - return ((non_aligned_byte_offset + byte_alignment - 1) / byte_alignment) * byte_alignment; - } - - /*! - * \brief A helper function check whether a offset is valid given the constraints - */ - bool IsValidPlacement(const PoolInfo& candidate_pool, const size_t& next_offset, - const size_t& size_bytes) { - if (candidate_pool->size_hint_bytes == -1) { - // this means pool is not bounded - return true; - } - auto pool_size = static_cast(candidate_pool->size_hint_bytes->value); - auto max_address = next_offset + size_bytes; - if (max_address <= pool_size) { - return true; - } - return false; - } - - /*! - * \brief Selects a pool for placement in the given set of ordered pool candidates - */ - PoolInfo SelectPlacementPool( - const BufferInfo& buf_info, - const std::unordered_map& pool_offsets) { - // Here the pool candidates are ordered when it is consumed by the algorithm. - // This could be from order the user has specified. However, schedulers are - // welcome to change the order for performance reasons. - for (const auto& pool_info : buf_info->pool_candidates) { - if (pool_offsets.count(pool_info)) { - return pool_info; - } - } - CHECK(false) << "TVM USMP Error: the space available in the provided pools exceeded when " - "trying to allocate the buffer : " - << buf_info << "\n. Please increase the size_hints for memory pools."; - return PoolInfo(); - } - - /*! - * \brief This is the base allocation function that works on sorted BufferInfo objects based - * on the greedy heuristic. The sorting algorithm has to be called before calling this. - */ - Map PostSortAllocation( - const std::vector& buffer_info_vec) { - Map pool_allocations; - for (const auto& buf_info : buffer_info_vec) { - std::unordered_map pool_offset_candidates; - for (const auto& pool_info : buf_info->pool_candidates) { - // Mark pool candidates that satisfy the size constraints. - if (IsValidPlacement(pool_info, 0, buf_info->size_bytes->value)) { - pool_offset_candidates[pool_info] = 0; - } - } - - for (const auto& conflict_buf_info_obj : buf_info->conflicts) { - auto conflict_buf_info = Downcast(conflict_buf_info_obj); - size_t next_offset = 0; - // We only look at already allocated BufferInfo in-terms of conflicts. - if (pool_allocations.count(conflict_buf_info)) { - auto pool_allocation = pool_allocations[conflict_buf_info]; - next_offset = pool_allocation->byte_offset + conflict_buf_info->size_bytes; - next_offset = - round_up_to_byte_alignment(next_offset, conflict_buf_info->alignment->value); - // Checks whether the next offset in the same pool as the conflicting BufferInfo is valid. - if (IsValidPlacement(pool_allocation->pool_info, next_offset, - buf_info->size_bytes->value)) { - // There could be multiple conflicting BufferInfo in the same pool. - // Thus, we need to make sure we pick the largest offset of them all. - if (next_offset > pool_offset_candidates[pool_allocation->pool_info]) { - pool_offset_candidates[pool_allocation->pool_info] = next_offset; - } - } else { - pool_offset_candidates.erase(pool_allocation->pool_info); - } - } - } - auto selected_pool = SelectPlacementPool(buf_info, pool_offset_candidates); - pool_allocations.Set( - buf_info, PoolAllocation(selected_pool, Integer(pool_offset_candidates[selected_pool]))); - } - return pool_allocations; - } -}; - /*! * \brief This class implements Greedy by the size of BufferInfo * greedy algorithm. Please refer to main documentation of the file diff --git a/src/tir/usmp/algo/hill_climb.cc b/src/tir/usmp/algo/hill_climb.cc index 659dff25274b..c642bb964c7f 100644 --- a/src/tir/usmp/algo/hill_climb.cc +++ b/src/tir/usmp/algo/hill_climb.cc @@ -18,7 +18,7 @@ */ /*! - * \file tir/analysis/usmp/algo/greedy_by_size.cc + * \file tir/analysis/usmp/algo/hill_climb.cc * \brief Implement greedy by size memory planning algorithm */ #include @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -36,313 +37,296 @@ namespace tvm { namespace tir { namespace usmp { namespace algo { -/*! - * \brief Rounds up the offset to satisfy the alignement requirement - */ -static size_t round_up_to_byte_alignment(const size_t& non_aligned_byte_offset, - const int& byte_alignment) { - return ((non_aligned_byte_offset + byte_alignment - 1) / byte_alignment) * byte_alignment; -} - -/*! - * \brief A helper function check whether a offset is valid given the constraints - */ -static bool IsValidPlacement(const PoolInfo& candidate_pool, const size_t& next_offset, - const size_t& size_bytes) { - if (candidate_pool->size_hint_bytes == -1) { - // this means pool is not bounded - return true; - } - auto pool_size = static_cast(candidate_pool->size_hint_bytes->value); - auto max_address = next_offset + size_bytes; - if (max_address <= pool_size) { - return true; - } - return false; -} -/*! - * \brief Selects a pool for placement in the given set of ordered pool candidates +/* + * Simulated annealing / Hill climb + * + * Works by continiously invoking 'greedy-by-size' allocation, + * assessing the result, and introducing permutations to the allocation + * order which hopefully will led to more 'compact' memory allocation. */ -static PoolInfo SelectPlacementPool( - const BufferInfo& buf_info, - const std::unordered_map& pool_offsets) { - // Here the pool candidates are ordered when it is consumed by the algorithm. - // This could be from order the user has specified. However, schedulers are - // welcome to change the order for performance reasons. - for (const auto& pool_info : buf_info->pool_candidates) { - if (pool_offsets.count(pool_info)) { - return pool_info; - } +class HillClimbAllocator : public GreedyBase { + private: + size_t memory_pressure_ = 0; + + public: + explicit HillClimbAllocator(size_t memory_pressure) + : GreedyBase(), memory_pressure_(memory_pressure) {} + + protected: + using alloc_map_t = std::unordered_map; + + /* + * Initial sorting routine + */ + void sort_vector(std::vector* buffer_info_vec) { + std::sort(buffer_info_vec->begin(), buffer_info_vec->end(), + [](const BufferInfo& a, const BufferInfo& b) { + if (a->size_bytes->value == b->size_bytes->value) { + if (a->conflicts.size() == b->conflicts.size()) { + auto a_name_hash = std::hash{}(a->name_hint->data); + auto b_name_hash = std::hash{}(b->name_hint->data); + return a_name_hash > b_name_hash; + } else { + return a->conflicts.size() > b->conflicts.size(); + } + } + return a->size_bytes->value > b->size_bytes->value; + }); } - CHECK(false) << "TVM USMP Error: the space available in the provided pools exceeded when " - "trying to allocate the buffer : " - << buf_info << "\n. Please increase the size_hints for memory pools."; - return PoolInfo(); -} -struct _ptr_hash { - template - size_t operator()(const T& a) const { - return std::hash()(a); - } -}; + /* + * HillClimb's version of greedy allocation + * \param buffer_info_vec - buffers in specific order for allocation + */ + alloc_map_t greedy(const std::vector& buffer_info_vec) { + alloc_map_t pool_allocations(buffer_info_vec.size()); + for (const auto& buf_info : buffer_info_vec) { + std::unordered_map pool_offset_candidates; + for (const auto& pool_info : buf_info->pool_candidates) { + if (IsValidPlacement(pool_info, 0, buf_info->size_bytes->value)) { + pool_offset_candidates[pool_info] = 0; + } + } -using alloc_map_t = std::unordered_map; - -static void sort_vector(std::vector* buffer_info_vec) { - std::sort(buffer_info_vec->begin(), buffer_info_vec->end(), - [](const BufferInfo& a, const BufferInfo& b) { - if (a->size_bytes->value == b->size_bytes->value) { - if (a->conflicts.size() == b->conflicts.size()) { - auto a_name_hash = std::hash{}(a->name_hint->data); - auto b_name_hash = std::hash{}(b->name_hint->data); - return a_name_hash > b_name_hash; - } else { - return a->conflicts.size() > b->conflicts.size(); - } - } - return a->size_bytes->value > b->size_bytes->value; - }); -} + std::vector buf_conf; + for (const auto& conflict_buf_info_obj : buf_info->conflicts) { + const BufferInfoNode* conflict_buf_info = conflict_buf_info_obj.as(); + if (pool_allocations.end() != pool_allocations.find(conflict_buf_info)) { + buf_conf.push_back(conflict_buf_info); + } + } -/* - * Modified version of greedy allocation from greedy_by_size.cc - */ -static void greedy(std::vector* buffer_info_vec, alloc_map_t* pool_allocations) { - for (const auto& buf_info : *buffer_info_vec) { - std::unordered_map pool_offset_candidates; - for (const auto& pool_info : buf_info->pool_candidates) { - if (algo::IsValidPlacement(pool_info, 0, buf_info->size_bytes->value)) { - pool_offset_candidates[pool_info] = 0; + // extra sorting for pool offsets + std::sort(buf_conf.begin(), buf_conf.end(), + [&pool_allocations](const auto* a, const auto* b) { + return pool_allocations[a]->byte_offset->value < + pool_allocations[b]->byte_offset->value; + }); + + for (const auto* conflict_buf_info : buf_conf) { + size_t next_offset = 0; + auto pool_allocation = pool_allocations[conflict_buf_info]; + next_offset = pool_allocation->byte_offset + conflict_buf_info->size_bytes; + next_offset = round_up_to_byte_alignment(next_offset, conflict_buf_info->alignment->value); + if (!pool_offset_candidates.count(pool_allocation->pool_info)) { + continue; + } + if (IsValidPlacement(pool_allocation->pool_info, next_offset, + buf_info->size_bytes->value)) { + if (next_offset > pool_offset_candidates[pool_allocation->pool_info] && + pool_offset_candidates[pool_allocation->pool_info] + + static_cast(buf_info->size_bytes) > + static_cast(pool_allocation->byte_offset)) { + pool_offset_candidates[pool_allocation->pool_info] = next_offset; + } + } else { + pool_offset_candidates.erase(pool_allocation->pool_info); + } } + auto selected_pool = SelectPlacementPool(buf_info, pool_offset_candidates); + pool_allocations[buf_info.as()] = + PoolAllocation(selected_pool, Integer(pool_offset_candidates[selected_pool])); } + return pool_allocations; + } - std::vector buf_conf; - for (const auto& conflict_buf_info_obj : buf_info->conflicts) { - const BufferInfoNode* conflict_buf_info = conflict_buf_info_obj.as(); - if (pool_allocations->end() != pool_allocations->find(conflict_buf_info)) { - buf_conf.push_back(conflict_buf_info); + /* + * Finds highest allocated memory address for each pool + */ + std::unordered_map find_highest( + alloc_map_t* pool_allocations) { + std::unordered_map pool_sizes; + for (const auto it : *pool_allocations) { + const BufferInfoNode* buf = it.first; + const PoolAllocation& pa = it.second; + size_t high_sz = pa->byte_offset + buf->size_bytes; + if (pool_sizes[pa->pool_info] <= high_sz) { + pool_sizes[pa->pool_info] = high_sz; } } + return pool_sizes; + } - // extra sorting for pool offsets - std::sort(buf_conf.begin(), buf_conf.end(), [&pool_allocations](const auto* a, const auto* b) { - return pool_allocations->operator[](a)->byte_offset->value < - pool_allocations->operator[](b)->byte_offset->value; - }); - - for (const auto* conflict_buf_info : buf_conf) { - size_t next_offset = 0; - auto pool_allocation = pool_allocations->operator[](conflict_buf_info); - next_offset = pool_allocation->byte_offset + conflict_buf_info->size_bytes; - next_offset = round_up_to_byte_alignment(next_offset, conflict_buf_info->alignment->value); - if (!pool_offset_candidates.count(pool_allocation->pool_info)) { - continue; + /* + * Collects lists of first and secind level neigbors for provided buf. + * First level are the immediate neighbors of the buf and + * second level are the immediate neighbors of the first level nodes + */ + void collect_neighbor_lists(const BufferInfoNode* buf, + std::vector* first_level, + std::vector* second_level, const auto& _pos) { + std::unordered_map first_level_set; + std::unordered_map second_level_set; + + auto buf_pos = _pos(buf); + for (const auto& c1 : buf->conflicts) { + const auto* c1_buf = c1.as(); + int c1_pos = _pos(c1_buf); + if (buf_pos > c1_pos) { + first_level_set[c1_pos] = c1_buf; } - if (IsValidPlacement(pool_allocation->pool_info, next_offset, buf_info->size_bytes->value)) { - if (next_offset > pool_offset_candidates[pool_allocation->pool_info] && - pool_offset_candidates[pool_allocation->pool_info] + - static_cast(buf_info->size_bytes) > - static_cast(pool_allocation->byte_offset)) { - pool_offset_candidates[pool_allocation->pool_info] = next_offset; + int c2_pos = -1; + for (const auto& c2 : c1_buf->conflicts) { + const auto c2_buf = c2.as(); + if (c1_pos > (c2_pos = _pos(c2_buf))) { + second_level_set[c2_pos] = c2_buf; } - } else { - pool_offset_candidates.erase(pool_allocation->pool_info); } } - auto selected_pool = algo::SelectPlacementPool(buf_info, pool_offset_candidates); - pool_allocations->operator[](buf_info.as()) = - PoolAllocation(selected_pool, Integer(pool_offset_candidates[selected_pool])); - } -} -/* - * Finds highes allocated memory address for each pool - */ -static std::unordered_map find_highest( - alloc_map_t* pool_allocations) { - std::unordered_map max_pool_size; - for (const auto it : *pool_allocations) { - const BufferInfoNode* buf = it.first; - const PoolAllocation& pa = it.second; - size_t high_sz = pa->byte_offset + buf->size_bytes; - if (max_pool_size[pa->pool_info] <= high_sz) { - max_pool_size[pa->pool_info] = high_sz; + // std::vector first_level; + for (const auto& i : first_level_set) { + first_level->push_back(i.second); + } + // std::vector second_level; + for (const auto& i : second_level_set) { + second_level->push_back(i.second); } } - return max_pool_size; -} -/* - * Simulated annealing / Hill climb - * - * Works by continiously invoking modified 'greedy-by-size' allocation - * assessing the result and introduce permutations which hopefully - * will led to more 'compact' memory allocation. - */ -Map HillClimb(const Array& buffer_info_arr, - const Integer& desired_bytes) { + public: + Map PlanMemory(const Array& buffer_info_arr) { // rand_r does not exist on Windows platform #if defined(__linux__) || defined(__ANDROID__) - unsigned int _seedp = 0; + unsigned int _seedp = 0; #define rnd_func() rand_r(&_seedp) #else #define rnd_func() rand() #endif - std::vector buffer_info_vec; - for (const auto& buffer_info : buffer_info_arr) { - ICHECK(buffer_info->pool_candidates.size()) - << "Cannot process buffer \"" << buffer_info->name_hint << "\" with no pool candidates"; - buffer_info_vec.push_back(std::move(buffer_info)); - } - - sort_vector(&buffer_info_vec); - - // populate positional index map - std::unordered_map _pos_map; - for (size_t index = 0; index < buffer_info_vec.size(); ++index) { - _pos_map[buffer_info_vec[index].as()] = index; - } - - // size_t first_attempt_size = 0; - size_t total_size = 0; - int attempts = 0; - // int successful_iteration = 0; - - int swap_i1 = -1; - int swap_i2 = -1; - size_t desired_bytes_ = desired_bytes; - constexpr auto _max_attempts = 500; - alloc_map_t rollback_pool_allocations; - alloc_map_t result_pool_allocations; - alloc_map_t pool_allocations; - - auto swap_buffers = [&buffer_info_vec, &_pos_map](int i1, int i2) { - if (i1 == i2) return; - auto b1 = buffer_info_vec[i1]; - auto b2 = buffer_info_vec[i2]; - buffer_info_vec[i1] = b2; - buffer_info_vec[i2] = b1; - - _pos_map[b1.as()] = i2; - _pos_map[b2.as()] = i1; - }; - - auto _pos = [&_pos_map](const auto* e) { - auto it = _pos_map.find(e); - if (it != _pos_map.end()) { - return it->second; + std::vector buffer_info_vec; + for (const auto& buffer_info : buffer_info_arr) { + ICHECK(buffer_info->pool_candidates.size()) + << "Cannot process buffer \"" << buffer_info->name_hint << "\" with no pool candidates"; + buffer_info_vec.push_back(std::move(buffer_info)); } - LOG(FATAL) << "not indexed"; - return -1; - }; - - for (; attempts < _max_attempts; ++attempts) { - rollback_pool_allocations = std::move(pool_allocations); - greedy(&buffer_info_vec, &pool_allocations); - // estimate result buffers - auto max_pool_size = find_highest(&pool_allocations); + sort_vector(&buffer_info_vec); - // calculate summary - size_t total = 0; - for (const auto& el : max_pool_size) { - total += el.second; + // populate positional index map + std::unordered_map _pos_map; + for (size_t index = 0; index < buffer_info_vec.size(); ++index) { + _pos_map[buffer_info_vec[index].as()] = index; } - // accept/reject result heuristic - if (!total_size || - (total_size > total || - rnd_func() % 100 < static_cast(300 * (total - total_size) / total / attempts))) { - // remember winning combination - result_pool_allocations = pool_allocations; - total_size = total; - - // reached desired size - if (total_size <= desired_bytes_) { - break; + + size_t total_size = 0; + int attempts = 0; + + int swap_i1 = -1; + int swap_i2 = -1; + size_t desired_bytes_ = memory_pressure_; + constexpr auto _max_attempts = 500; + alloc_map_t rollback_pool_allocations; + alloc_map_t result_pool_allocations; + alloc_map_t pool_allocations; + + auto swap_buffers = [&buffer_info_vec, &_pos_map](int i1, int i2) { + if (i1 == i2) return; + auto b1 = buffer_info_vec[i1]; + auto b2 = buffer_info_vec[i2]; + buffer_info_vec[i1] = b2; + buffer_info_vec[i2] = b1; + + _pos_map[b1.as()] = i2; + _pos_map[b2.as()] = i1; + }; + + auto _pos = [&_pos_map](const auto* e) { + auto it = _pos_map.find(e); + if (it != _pos_map.end()) { + return it->second; + } + LOG(FATAL) << "node is not indexed in the _pos_map"; + return -1; + }; + + for (; attempts < _max_attempts; ++attempts) { + rollback_pool_allocations = std::move(pool_allocations); + pool_allocations = std::move(greedy(buffer_info_vec)); + + // estimate result buffers + std::unordered_map pool_sizes = + find_highest(&pool_allocations); + // calculate summary + size_t total = 0; + for (const auto& el : pool_sizes) { + total += el.second; } + // accept/reject result heuristic + if (!total_size || /* first run */ + (total_size > total || /* always accept if better or with some probability */ + rnd_func() % 100 < static_cast(50 * (total - total_size) / total / attempts))) { + // remember winning combination + result_pool_allocations = pool_allocations; + total_size = total; + + // reached desired size + if (total_size <= desired_bytes_) { + break; + } - } else { - // rollback - swap_buffers(swap_i2, swap_i1); - pool_allocations = std::move(rollback_pool_allocations); - max_pool_size = find_highest(&pool_allocations); - } + } else { + // rollback + swap_buffers(swap_i2, swap_i1); + pool_allocations = std::move(rollback_pool_allocations); + pool_sizes = find_highest(&pool_allocations); + } - std::vector max_pool_buf; + std::vector max_pool_buf; - for (const auto& it : pool_allocations) { - const auto* buf = it.first; - const auto pa = it.second; - size_t high_sz = pa->byte_offset + buf->size_bytes; - if (max_pool_size[pa->pool_info] == high_sz) { - max_pool_buf.push_back(buf); + for (const auto& it : pool_allocations) { + const auto* buf = it.first; + const auto pa = it.second; + size_t high_sz = pa->byte_offset + buf->size_bytes; + if (pool_sizes[pa->pool_info] == high_sz) { + max_pool_buf.push_back(buf); + } } - } - // pick highest - const BufferInfoNode* suspect = max_pool_buf[rand() % max_pool_buf.size()]; - PoolAllocation suspect_pa = pool_allocations[suspect]; + // pick highest + const BufferInfoNode* node = max_pool_buf[rnd_func() % max_pool_buf.size()]; + std::vector first_level; + std::vector second_level; + collect_neighbor_lists(node, &first_level, &second_level, _pos); - std::unordered_map first_level_set; - std::unordered_map second_level_set; + // retry if no first level neightbors were collected + if (!first_level.size()) { + continue; + } - auto suspect_pos = _pos(suspect); - for (const auto& c1 : suspect->conflicts) { - const auto* c1_buf = c1.as(); - int c1_pos = _pos(c1_buf); - if (suspect_pos > c1_pos) { - first_level_set[c1_pos] = c1_buf; + // pick the buffers + const BufferInfoNode* swap_buf1 = first_level[rnd_func() % first_level.size()]; + const BufferInfoNode* swap_buf2 = swap_buf1; + while (swap_buf2 == swap_buf1) { + swap_buf2 = second_level.size() && (!first_level.size() || (rnd_func() % 100 > 25)) + ? second_level[rnd_func() % second_level.size()] + : first_level[rnd_func() % first_level.size()]; + + if (second_level.size() < 2 && first_level.size() < 2) break; } - int c2_pos = -1; - for (const auto& c2 : c1_buf->conflicts) { - const auto c2_buf = c2.as(); - if (c1_pos > (c2_pos = _pos(c2_buf))) { - second_level_set[c2_pos] = c2_buf; - } + if (swap_buf1 == swap_buf2) { + continue; } - } - std::vector first_level; - for (const auto& i : first_level_set) { - first_level.push_back(i.second); - } - std::vector second_level; - for (const auto& i : second_level_set) { - second_level.push_back(i.second); + swap_i1 = _pos(swap_buf1); + swap_i2 = _pos(swap_buf2); + // do swap + swap_buffers(swap_i1, swap_i2); } - if (!(first_level.size() + second_level.size())) { - continue; + Map result; + // return winning combination + for (auto it : result_pool_allocations) { + result.Set(GetRef(it.first), it.second); } - - // pick the buffers - const BufferInfoNode* swap_buf2 = - second_level.size() && (!first_level.size() || (rnd_func() % 100 > 30)) - ? second_level[rand() % second_level.size()] - : first_level[rand() % first_level.size()]; - const BufferInfoNode* swap_buf1 = - second_level.size() && (!first_level.size() || (rnd_func() % 100 > 30)) - ? second_level[rand() % second_level.size()] - : first_level[rand() % first_level.size()]; - - if (swap_buf1 == swap_buf2) { - continue; - } - - swap_i1 = _pos(swap_buf1); - swap_i2 = _pos(swap_buf2); - // do swap - swap_buffers(swap_i1, swap_i2); + return result; } +}; - Map result; - for (auto it : pool_allocations) { - result.Set(GetRef(it.first), it.second); - } - return result; +Map HillClimb(const Array& buffer_info_arr, + const Integer& memory_pressure) { + return HillClimbAllocator(memory_pressure).PlanMemory(buffer_info_arr); } TVM_REGISTER_GLOBAL("tir.usmp.algo.hill_climb") diff --git a/tests/python/unittest/test_tir_usmp_algo_hill_climb.py b/tests/python/unittest/test_tir_usmp_algo_hill_climb.py index e5473023fc55..a5f1158a90c1 100644 --- a/tests/python/unittest/test_tir_usmp_algo_hill_climb.py +++ b/tests/python/unittest/test_tir_usmp_algo_hill_climb.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import sys import pytest import random import tvm @@ -21,6 +22,7 @@ def _check_max_workspace_size(buffer_pool_allocations, pool_info, size): + """Helper to check maximum allocated memory size""" max_workspace_size = 0 for buffer_info, pool_allocation in buffer_pool_allocations.items(): if pool_allocation.pool_info == pool_info: @@ -28,15 +30,16 @@ def _check_max_workspace_size(buffer_pool_allocations, pool_info, size): if size_candidate > max_workspace_size: max_workspace_size = size_candidate _diff = max_workspace_size.value - size - assert ( - max_workspace_size.value == size - ), "'{}': expected {} got {}, diff {:0.2f}% ({} bytes)".format( - pool_info.pool_name, size, max_workspace_size, 100 * _diff / size, _diff + return ( + (max_workspace_size.value == size), + "'{}': expected {} got {}, diff {:0.2f}% ({} bytes)".format( + pool_info.pool_name, size, max_workspace_size, 100 * _diff / size, _diff + ), ) def _verify_conflicts(buffer_info, pool_allocation, buffer_info_map): - """helper to check expected liveness conflicts""" + """Helper to check expected liveness conflicts""" for conflict in buffer_info.conflicts: conflict_pool_allocation = buffer_info_map[conflict] @@ -57,12 +60,13 @@ def _verify_conflicts(buffer_info, pool_allocation, buffer_info_map): def _verify_all_conflicts(buffer_pool_allocations): + """Helper to verify liveness conflicts""" for buffer_info, pool_allocation in buffer_pool_allocations.items(): - # print( "Processing ", name ) _verify_conflicts(buffer_info, pool_allocation, buffer_pool_allocations) def test_bounded(random_len=150, pools=[PoolInfo("default", {}, 65535), PoolInfo("slow", {})]): + """Tests two pools, one is bounded and one is not limited""" random.seed(0) mem_range = [BufferInfo(str(i), random.randrange(1, 65535), pools) for i in range(random_len)] for mr in mem_range: @@ -79,6 +83,7 @@ def test_bounded(random_len=150, pools=[PoolInfo("default", {}, 65535), PoolInfo def __test_data_alloc_max(): + """Test data""" intervals = [ (0, 159, 2048), (0, 13, 7904), @@ -90,6 +95,7 @@ def __test_data_alloc_max(): def __test_data_deep_speech(): + """Test data""" intervals = [ (0, 159, 2048), (0, 151, 2048), @@ -280,6 +286,7 @@ def __test_data_deep_speech(): def __test_data_five(): + """Test data""" return [ (4, 5, 95), (1, 4, 52135), @@ -290,6 +297,7 @@ def __test_data_five(): def __test_data_simple(): + """Test data""" return [ (0, 23, 131072), # 0 (4, 5, 65568), # 1 @@ -304,8 +312,8 @@ def __test_data_simple(): ] -def maximumFromIntervals(intervals): - # expected list of intervals of (start, end, size) +def find_maximum_from_intervals(intervals): + """Expected list of intervals of (start, end, size)""" sorted_list = sorted(intervals, key=lambda _: _[0]) max_mem = 0 for t in range(sorted_list[0][0], sorted_list[-1][1] + 1): @@ -320,11 +328,14 @@ def maximumFromIntervals(intervals): [__test_data_alloc_max(), __test_data_simple(), __test_data_deep_speech(), __test_data_five()], ) def test_intervals(intervals): + """Tests supplied intervals""" + random.seed(0) result = run_intervals(intervals) assert result["tir.usmp.algo.hill_climb"] == True, f" {result}" def generate_range(sz, max_segment_sz=65535): + """Helper func to generate list of size sz of ranges of random size max_segment_sz""" for i in range(0, sz): start = random.randrange(i, sz) stop = random.randrange(start + 1, start + 2 + ((sz - start) // 2)) @@ -332,37 +343,16 @@ def generate_range(sz, max_segment_sz=65535): yield (start, stop, random.randrange(1, max_segment_sz)) -@pytest.mark.skip() -def test_10_random_intervals(): - __test_n_random_intervals(10) - - -@pytest.mark.skip() -def test_100_random_intervals(): - __test_n_random_intervals(100) - - -def __test_n_random_intervals(n=1): - result = {} - for i in range(n): - result["total_runs"] = i + 1 - r = test_random_intervals(100) - for k, v in r.items(): - if k in result.keys(): - result[k] += int(v) - else: - result[k] = int(v) - - print(result) - - def test_random_intervals(interval_len=16): + """Tests randomly generated interval of length interval_len""" + random.seed(0) intervals = list(generate_range(interval_len)) return run_intervals(intervals) def run_intervals(intervals): - expected_mem = maximumFromIntervals(intervals) + """Helper to run intervals""" + expected_mem = find_maximum_from_intervals(intervals) pools = [PoolInfo("default", {})] buffers = [] # populate @@ -396,14 +386,12 @@ def run_intervals(intervals): print() _verify_all_conflicts(buffer_info_arr) - try: - _check_max_workspace_size(buffer_info_arr, pools[0], expected_mem) - result[alg] = True - except AssertionError as e: - print(alg, e) - result[alg] = False + result[alg], msg = _check_max_workspace_size(buffer_info_arr, pools[0], expected_mem) + if not result[alg]: + print(alg, msg) + return result if __name__ == "__main__": - pytest.main([__file__]) + sys.exit(pytest.main([__file__] + sys.argv[1:]))