diff --git a/src/v/cluster/BUILD b/src/v/cluster/BUILD index 2ef8fa4e32038..8cc31b7e9d62e 100644 --- a/src/v/cluster/BUILD +++ b/src/v/cluster/BUILD @@ -550,7 +550,6 @@ redpanda_cc_library( "scheduling/constraints.h", "scheduling/leader_balancer.h", "scheduling/leader_balancer_constraints.h", - "scheduling/leader_balancer_greedy.h", "scheduling/leader_balancer_probe.h", "scheduling/leader_balancer_random.h", "scheduling/leader_balancer_strategy.h", diff --git a/src/v/cluster/scheduling/leader_balancer.cc b/src/v/cluster/scheduling/leader_balancer.cc index cb7edafb0c1c7..94cacb29dbc15 100644 --- a/src/v/cluster/scheduling/leader_balancer.cc +++ b/src/v/cluster/scheduling/leader_balancer.cc @@ -17,7 +17,6 @@ #include "cluster/logger.h" #include "cluster/members_table.h" #include "cluster/partition_leaders_table.h" -#include "cluster/scheduling/leader_balancer_greedy.h" #include "cluster/scheduling/leader_balancer_random.h" #include "cluster/scheduling/leader_balancer_strategy.h" #include "cluster/scheduling/leader_balancer_types.h" @@ -578,30 +577,12 @@ ss::future leader_balancer::balance() { auto muted_nodes = collect_muted_nodes(health_report.value()); - auto mode = config::shard_local_cfg().leader_balancer_mode(); - std::unique_ptr strategy; - - switch (mode) { - case model::leader_balancer_mode::random_hill_climbing: { - vlog(clusterlog.debug, "using random_hill_climbing"); - - strategy = std::make_unique< - leader_balancer_types::random_hill_climbing_strategy>( - std::move(index), - std::move(group_id_to_topic), - leader_balancer_types::muted_index{std::move(muted_nodes), {}}, - std::move(preference_index)); - break; - } - case model::leader_balancer_mode::greedy_balanced_shards: - vlog(clusterlog.debug, "using greedy_balanced_shards"); - strategy = std::make_unique( - std::move(index), std::move(muted_nodes)); - break; - default: - vlog(clusterlog.error, "unexpected mode value: {}", mode); - co_return ss::stop_iteration::no; - } + std::unique_ptr strategy + = std::make_unique( + std::move(index), + std::move(group_id_to_topic), + leader_balancer_types::muted_index{std::move(muted_nodes), {}}, + std::move(preference_index)); auto cores = strategy->stats(); diff --git a/src/v/cluster/scheduling/leader_balancer_constraints.cc b/src/v/cluster/scheduling/leader_balancer_constraints.cc index afc73cdbb8f04..b6b45b5b29e23 100644 --- a/src/v/cluster/scheduling/leader_balancer_constraints.cc +++ b/src/v/cluster/scheduling/leader_balancer_constraints.cc @@ -46,15 +46,6 @@ void even_topic_distribution_constraint::update_index(const reassignment& r) { _topic_shard_index.at(topic_id).at(r.to) += 1; } -std::optional -even_topic_distribution_constraint::recommended_reassignment() { - // This method is deprecated and is ony used in `leader_balancer_greedy` - // which doesn't use the `even_topic_distributon_constraint`. Hence there is - // no need to implement it here. Once the greedy balancer has been removed - // this should be removed as well. - vassert(false, "not implemented"); -} - void even_topic_distribution_constraint::rebuild_indexes() { _topic_shard_index.clear(); _topic_replica_index.clear(); @@ -153,64 +144,6 @@ double even_topic_distribution_constraint::adjusted_error( return current_error; } -std::optional -even_shard_load_constraint::recommended_reassignment() { - auto [load, load_map] = build_load_indexes(); - const auto curr_error = error(); - - // Consider each group from high load core, and record the reassignment - // involving the lowest load "to" core. - for (const auto& from : boost::adaptors::reverse(load)) { - if (mi().muted_nodes().contains(from->first.node_id)) { - continue; - } - - constexpr size_t load_unset = std::numeric_limits::max(); - size_t lowest_load = load_unset; - reassignment lowest_reassign{}; - - // Consider each group from high load core, and record the - // reassignment involving the lowest load "to" core. - for (const auto& group : from->second) { - if (mi().muted_groups().contains( - static_cast(group.first))) { - continue; - } - - // iterate over all the replicas and look for the lowest load - // shard in the replica list - for (const auto& to_shard : group.second) { - auto load = load_map.at(to_shard); - if (likely(load >= lowest_load)) { - // there is no point in evaluating this move, it is - // worse than the best one we've found so far. - continue; - } - - if (mi().muted_nodes().contains(to_shard.node_id)) { - continue; - } - - lowest_load = load; - lowest_reassign = {group.first, from->first, to_shard}; - } - } - - if (lowest_load != load_unset) { - // We found a possible reassignment while looking at the current - // "from" shard, and while it is the best possible reassignment - // found it may not improve the error - auto new_error = adjusted_error( - curr_error, lowest_reassign.from, lowest_reassign.to); - if (new_error + error_jitter < curr_error) { - return lowest_reassign; - } - } - } - - return std::nullopt; -} - double even_shard_load_constraint::adjusted_error( double current_error, const model::broker_shard& from, @@ -320,12 +253,4 @@ double pinning_constraint::evaluate_internal(const reassignment& r) { return diff; } -std::optional pinning_constraint::recommended_reassignment() { - // This method is deprecated and is ony used in `leader_balancer_greedy` - // which doesn't use the `even_topic_distributon_constraint`. Hence there is - // no need to implement it here. Once the greedy balancer has been removed - // this should be removed as well. - vassert(false, "not implemented"); -} - } // namespace cluster::leader_balancer_types diff --git a/src/v/cluster/scheduling/leader_balancer_constraints.h b/src/v/cluster/scheduling/leader_balancer_constraints.h index eeb578c632257..f53f3809acb62 100644 --- a/src/v/cluster/scheduling/leader_balancer_constraints.h +++ b/src/v/cluster/scheduling/leader_balancer_constraints.h @@ -133,20 +133,6 @@ class even_topic_distribution_constraint final void update_index(const reassignment& r) override; - /* - * Uses a greedy algorithm to generate a recommended reassignment - * that will reduce error for this constraint. - * - * The gist is that it searches topics in order of how skewed they are. - * Then for each topic it finds a partiton on the node with the most - * leadership and tries to move it to one of its replica nodes. If this - * reassignment reduces error its returned. - * - * If no reassignment can reduce error for this constraint std::nullopt is - * returned. - */ - std::optional recommended_reassignment() override; - private: std::reference_wrapper _si; std::reference_wrapper _mi; @@ -233,23 +219,6 @@ class even_shard_load_constraint final _error = adjusted_error(_error, r.from, r.to); } - /* - * Find a group reassignment that improves overall error. The general - * approach is to select a group from the highest loaded shard and move - * leadership for that group to the least loaded shard that the group is - * compatible with. - * - * Clearly this is a costly method in terms of runtime complexity. - * Measurements for clusters with several thousand partitions indicate a - * real time execution cost of at most a couple hundred micros. Other - * strategies are sure to improve this as we tackle larger configurations. - * - * Muted nodes are nodes that should be treated as if they have no available - * capacity. So do not move leadership to a muted node, but any leaders on a - * muted node should not be touched in case the mute is temporary. - */ - std::optional recommended_reassignment() override; - std::vector stats() const; double calc_target_load() const; @@ -314,8 +283,6 @@ class pinning_constraint final : public soft_constraint { private: double evaluate_internal(const reassignment& r) override; - std::optional recommended_reassignment() override; - private: std::reference_wrapper _group2topic; preference_index _preference_idx; diff --git a/src/v/cluster/scheduling/leader_balancer_greedy.h b/src/v/cluster/scheduling/leader_balancer_greedy.h deleted file mode 100644 index 29e9ca7bf1503..0000000000000 --- a/src/v/cluster/scheduling/leader_balancer_greedy.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2021 Redpanda Data, Inc. - * - * Use of this software is governed by the Business Source License - * included in the file licenses/BSL.md - * - * As of the Change Date specified in that file, in accordance with - * the Business Source License, use of this software will be governed - * by the Apache License, Version 2.0 - */ -#pragma once - -#include "cluster/scheduling/leader_balancer_constraints.h" -#include "cluster/scheduling/leader_balancer_strategy.h" -#include "cluster/scheduling/leader_balancer_types.h" -#include "model/metadata.h" - -#include -#include - -#include -#include - -/* - * Greedy shard balancer strategy is to move leaders from the most loaded core - * to the least loaded core. The strategy treats all cores equally, ignoring - * node-level balancing. - */ -namespace cluster { - -class greedy_balanced_shards final : public leader_balancer_strategy { -public: - explicit greedy_balanced_shards( - index_type cores, absl::flat_hash_set muted_nodes) - : _mi(std::make_unique( - std::move(muted_nodes), leader_balancer_types::muted_groups_t{})) - , _si(std::make_unique( - std::move(cores))) - , _even_shard_load_c(*_si, *_mi) {} - - double error() const final { return _even_shard_load_c.error(); } - - /* - * Find a group reassignment that improves overall error. The general - * approach is to select a group from the highest loaded shard and move - * leadership for that group to the least loaded shard that the group is - * compatible with. - * - * Clearly this is a costly method in terms of runtime complexity. - * Measurements for clusters with several thousand partitions indicate a - * real time execution cost of at most a couple hundred micros. Other - * strategies are sure to improve this as we tackle larger configurations. - * - * Muted nodes are nodes that should be treated as if they have no available - * capacity. So do not move leadership to a muted node, but any leaders on a - * muted node should not be touched in case the mute is temporary. - */ - std::optional - find_movement(const leader_balancer_types::muted_groups_t& skip) final { - _mi->update_muted_groups(skip); - return _even_shard_load_c.recommended_reassignment(); - } - - void apply_movement(const reassignment& r) final { - _even_shard_load_c.update_index(r); - _si->update_index(r); - } - - std::vector stats() const final { - return _even_shard_load_c.stats(); - } - -private: - std::unique_ptr _mi; - std::unique_ptr _si; - leader_balancer_types::even_shard_load_constraint _even_shard_load_c; -}; - -} // namespace cluster diff --git a/src/v/cluster/scheduling/leader_balancer_types.h b/src/v/cluster/scheduling/leader_balancer_types.h index d472b374ba8fa..a044fee586199 100644 --- a/src/v/cluster/scheduling/leader_balancer_types.h +++ b/src/v/cluster/scheduling/leader_balancer_types.h @@ -97,8 +97,6 @@ class soft_constraint { auto ret = evaluate_internal(r); return ret; } - - virtual std::optional recommended_reassignment() = 0; }; } // namespace cluster::leader_balancer_types diff --git a/src/v/cluster/tests/BUILD b/src/v/cluster/tests/BUILD index 9973715c2b6bb..40299d081c697 100644 --- a/src/v/cluster/tests/BUILD +++ b/src/v/cluster/tests/BUILD @@ -110,6 +110,7 @@ redpanda_cc_btest( "//src/v/cluster", "//src/v/model", "//src/v/test_utils:seastar_boost", + "//src/v/utils:to_string", "@abseil-cpp//absl/container:flat_hash_map", "@abseil-cpp//absl/container:flat_hash_set", "@boost//:test", diff --git a/src/v/cluster/tests/leader_balancer_bench.cc b/src/v/cluster/tests/leader_balancer_bench.cc index 4ff87b5732397..5ce7e61817c2c 100644 --- a/src/v/cluster/tests/leader_balancer_bench.cc +++ b/src/v/cluster/tests/leader_balancer_bench.cc @@ -9,7 +9,6 @@ * by the Apache License, Version 2.0 */ #include "cluster/scheduling/leader_balancer_constraints.h" -#include "cluster/scheduling/leader_balancer_greedy.h" #include "cluster/scheduling/leader_balancer_random.h" #include "cluster/scheduling/leader_balancer_types.h" #include "leader_balancer_test_utils.h" @@ -25,19 +24,6 @@ constexpr int shards_per_node = 16; // i.e., cores per node constexpr int groups_per_shard = 80; // group == partition in this context constexpr int replicas = 3; // number of replicas -cluster::leader_balancer_types::group_id_to_topic_id make_gid_to_topic_index( - const cluster::leader_balancer_types::index_type& index) { - cluster::leader_balancer_types::group_id_to_topic_id ret; - - for (const auto& [bs, leaders] : index) { - for (const auto& [group, replicas] : leaders) { - ret.emplace(group, 0); - } - } - - return ret; -} - /* * Measures the time it takes to randomly generate and evaluate every possible * reassignment for a given cluster. The reassignments are evaluated by the @@ -54,7 +40,8 @@ void random_search_eval_bench(bool measure_all) { cluster::leader_balancer_types::muted_index mi{{}, {}}; cluster::leader_balancer_types::shard_index si( leader_balancer_test_utils::copy_cluster_index(index)); - auto gid_topic = make_gid_to_topic_index(si.shards()); + auto gid_topic = leader_balancer_test_utils::make_gid_to_topic_index( + si.shards()); if (measure_all) { perf_tests::start_measuring_time(); @@ -115,34 +102,8 @@ void random_bench() { perf_tests::stop_measuring_time(); } -void balancer_bench(bool measure_all) { - cluster::leader_balancer_strategy::index_type index - = leader_balancer_test_utils::make_cluster_index( - node_count, shards_per_node, groups_per_shard, replicas); - - if (measure_all) { - perf_tests::start_measuring_time(); - } - - auto greed = cluster::greedy_balanced_shards(std::move(index), {}); - vassert(greed.error() == 0, ""); - vassert(greed.error() == 0, "e > 0"); - - // movement should be _from_ the overloaded shard - if (!measure_all) { - perf_tests::start_measuring_time(); - } - auto movement = greed.find_movement({}); - vassert(!movement, "movemement "); - perf_tests::do_not_optimize(movement); - perf_tests::stop_measuring_time(); -} - } // namespace -PERF_TEST(lb, even_shard_load_movement) { balancer_bench(false); } -PERF_TEST(lb, even_shard_load_all) { balancer_bench(true); } - PERF_TEST(lb, random_eval_movement) { random_search_eval_bench< cluster::leader_balancer_types::random_reassignments>(false); diff --git a/src/v/cluster/tests/leader_balancer_constraints_test.cc b/src/v/cluster/tests/leader_balancer_constraints_test.cc index 467fb5e210e3a..a6ddec80e83d2 100644 --- a/src/v/cluster/tests/leader_balancer_constraints_test.cc +++ b/src/v/cluster/tests/leader_balancer_constraints_test.cc @@ -33,184 +33,15 @@ #include #include -BOOST_AUTO_TEST_CASE(greedy_empty) { - // empty spec, expect no movement - BOOST_REQUIRE(no_movement({})); -} - -BOOST_AUTO_TEST_CASE(greedy_balanced) { - // single node is already balanced, expect no movement - BOOST_REQUIRE(no_movement({ - {{1, 2, 3}, {}}, - })); - BOOST_REQUIRE(no_movement({ - {{1}, {2}}, - {{2}, {1}}, - })); - - // not exactly balanced, but as balanced as possible, - // i.e., lead loaded node has 1 less than the most loaded node - BOOST_REQUIRE(no_movement({ - {{1, 3}, {2}}, - {{2}, {1, 3}}, - })); -} - -BOOST_AUTO_TEST_CASE(greedy_movement) { - // 10 nodes - // 2 cores x node - // 10 partitions per shard - // r=3 (3 replicas) - auto index = leader_balancer_test_utils::make_cluster_index(10, 2, 10, 3); - auto shard_index = lbt::shard_index( - leader_balancer_test_utils::copy_cluster_index(index)); - auto mute_index = lbt::muted_index({}, {}); - - auto greed = lbt::even_shard_load_constraint(shard_index, mute_index); - BOOST_REQUIRE_EQUAL(greed.error(), 0); - - // new groups on shard {2, 0} - auto shard20 = model::broker_shard{model::node_id{2}, 0}; - index[shard20][raft::group_id(20)] = index[shard20][raft::group_id(3)]; - index[shard20][raft::group_id(21)] = index[shard20][raft::group_id(3)]; - index[shard20][raft::group_id(22)] = index[shard20][raft::group_id(3)]; - - shard_index = lbt::shard_index( - leader_balancer_test_utils::copy_cluster_index(index)); - auto greed2 = lbt::even_shard_load_constraint(shard_index, mute_index); - BOOST_REQUIRE_GT(greed2.error(), 0); - - // movement should be _from_ the overloaded shard - auto movement = greed2.recommended_reassignment(); - BOOST_REQUIRE(movement); - check_valid(shard_index.shards(), *movement); - BOOST_REQUIRE_EQUAL(movement->from, shard20); -} - -BOOST_AUTO_TEST_CASE(greedy_obeys_replica) { - // unbalanced, but the overloaded node has no replicas it can - // send its groups to - BOOST_REQUIRE(no_movement({{{1, 2, 3}, {}}, {{4}, {}}, {{}, {4}}})); -} - -BOOST_AUTO_TEST_CASE(greedy_simple_movement) { - // simple balancing from {2, 0} leader counts to {1, 1} - BOOST_REQUIRE_EQUAL( - expect_movement( - { - // clang-format off - {{1, 2}, {}}, - {{}, {1, 2}} - // clang-format on - }, - re(1, 0, 1)), - ""); -} - -BOOST_AUTO_TEST_CASE(greedy_highest_to_lowest) { - // balancing should occur from the most to least loaded node if that is - // possible - BOOST_REQUIRE_EQUAL( - expect_movement( - { - // clang-format off - {{1, 2}, {-1}}, - {{3, 4, 5}, {-1}}, // from 1 - {{6, 7}, {-1}}, - {{}, {1, 2, 3, 4, 5, 6, 7}} // to 3 - - // clang-format on - }, - re(3, 1, 3)), - ""); - - // like the previous case but group 3 is not replicated on the to node, so - // we check that group 4 goes instead - BOOST_REQUIRE_EQUAL( - expect_movement( - { - // clang-format off - {{1, 2}, {-1}}, - {{3, 4, 5}, {-1}}, // from 1 - {{6, 7}, {-1}}, - {{}, {1, 2, 4, 5, 6, 7}} // to 3 - - // clang-format on - }, - re(4, 1, 3)), - ""); -} - -BOOST_AUTO_TEST_CASE(greedy_low_to_lower) { - // balancing can occur even from a shard with less than average load, - // if there is a shard with even lower load and the higher loaded shards - // cannot be rebalanced from because of a lack of replicas for their - // groups on the lower load nodes - BOOST_REQUIRE_EQUAL( - expect_movement( - { - // clang-format off - {{1, 2, 3, 10}, {}}, - {{4, 5, 6, 11}, {}}, - {{7, 8}, {}}, // from 2 - {{}, {8}} // to 3 - - // clang-format on - }, - re(8, 2, 3)), - ""); -} - -BOOST_AUTO_TEST_CASE(greedy_muted) { - // base spec without high, medium and low (zero) load nodes - auto spec = cluster_spec{ - // clang-format off - {{1, 2, 3, 4}, {-1}}, - {{5, 6}, {-1}}, - {{}, {-1}}, - // clang-format on - }; - - // base case, move from high to low - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(1, 0, 2)), ""); - - // mute from the "from" node (high), so moves from mid to low - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(5, 1, 2), {0}), ""); - - // mute from the "to" node (low), so moves from high to mid - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(1, 0, 1), {2}), ""); - - // mute any 2 nodes and there should be no movement - BOOST_REQUIRE(no_movement(spec, {0, 1})); - BOOST_REQUIRE(no_movement(spec, {0, 2})); - BOOST_REQUIRE(no_movement(spec, {1, 2})); -} - -BOOST_AUTO_TEST_CASE(greedy_skip) { - // base spec without high, medium and low load nodes - - auto spec = cluster_spec{ - // clang-format off - {{1, 2, 3, 4}, {-1}}, - {{5, 6}, {-1}}, - {{}, {-1}}, - // clang-format on - }; - - // base case, move from high to low - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(1, 0, 2)), ""); - - // skip group 1, we move group 2 instead from same node - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(2, 0, 2), {}, {1}), ""); - - // skip all groups on node 0, we move mid to low - BOOST_REQUIRE_EQUAL( - expect_movement(spec, re(5, 1, 2), {}, {1, 2, 3, 4}), ""); - - // mute node 0 and skip all groups on node 1, no movement - absl::flat_hash_set skip{ - raft::group_id(5), raft::group_id(6)}; - BOOST_REQUIRE(no_movement(spec, {0}, skip)); +/** + * @brief Helper to create a reassignment from group, from and to passed as + * integers. + */ +inline reassignment re(unsigned group, int from, int to) { + return { + raft::group_id(group), + model::broker_shard{model::node_id(from)}, + model::broker_shard{model::node_id(to)}}; } BOOST_AUTO_TEST_CASE(even_topic_distribution_empty) { @@ -397,7 +228,6 @@ BOOST_AUTO_TEST_CASE(even_topic_no_error_even_shard_error) { std::move(g_id_to_t_id), shard_index, muted_index); BOOST_REQUIRE(even_shard_con.error() > 0); - BOOST_REQUIRE(even_shard_con.recommended_reassignment().has_value()); } #include "cluster/scheduling/leader_balancer_random.h" diff --git a/src/v/cluster/tests/leader_balancer_constraints_utils.h b/src/v/cluster/tests/leader_balancer_constraints_utils.h index ca233e3e20a52..37c5ee42890cb 100644 --- a/src/v/cluster/tests/leader_balancer_constraints_utils.h +++ b/src/v/cluster/tests/leader_balancer_constraints_utils.h @@ -14,7 +14,6 @@ #include "base/vassert.h" #include "cluster/scheduling/leader_balancer.h" #include "cluster/scheduling/leader_balancer_constraints.h" -#include "cluster/scheduling/leader_balancer_greedy.h" #include "cluster/scheduling/leader_balancer_types.h" #include "model/metadata.h" #include "raft/fundamental.h" @@ -41,7 +40,7 @@ using reassignment = lbt::reassignment; /** * @brief Create a greedy balancer from a cluster_spec and set of muted nodes. */ -static auto from_spec( +inline auto from_spec( const cluster_spec& spec, const absl::flat_hash_set& muted = {}) { index_type index; @@ -124,93 +123,3 @@ inline auto group_to_topic_from_spec(const group_to_ntp_spec& spec) { return group_to_topic; } - -/** - * @brief Basic validity checks on a reassignment. - */ -static void check_valid(const index_type& index, const reassignment& movement) { - // from == to is not valid (error should not decrease) - vassert(movement.from != movement.to, ""); - - auto& from_groups = index.at(movement.from); - // check that the from shard was the leader - vassert(from_groups.contains(movement.group), ""); - - // check that the to shard is in the replica set - auto& replicas = from_groups.at(movement.group); - vassert(std::count(replicas.begin(), replicas.end(), movement.to) == 1, ""); -} - -/** - * @brief returns true iff the passed spec had no movement on balance - */ -inline bool no_movement( - const cluster_spec& spec, - const absl::flat_hash_set& muted = {}, - const absl::flat_hash_set& skip = {}) { - auto [shard_index, muted_index] = from_spec(spec, muted); - cluster::leader_balancer_types::muted_groups_t skip_typed; - for (auto& s : skip) { - skip_typed.add(static_cast(s)); - } - muted_index.update_muted_groups(skip_typed); - auto balancer = lbt::even_shard_load_constraint(shard_index, muted_index); - - return !balancer.recommended_reassignment(); -} - -static bool operator==(const lbt::reassignment& l, const lbt::reassignment& r) { - return l.group == r.group && l.from == r.from && l.to == r.to; -} - -static ss::sstring to_string(const reassignment& r) { - return fmt::format("{{group: {} from: {} to: {}}}", r.group, r.from, r.to); -} - -/** - * @brief Test helper which checks that the given spec and other parameters - * result in the expected resassignment. - */ -inline ss::sstring expect_movement( - const cluster_spec& spec, - const reassignment& expected_reassignment, - const absl::flat_hash_set& muted = {}, - const absl::flat_hash_set& skip = {}) { - auto [shard_index, mute_index] = from_spec(spec, muted); - - cluster::leader_balancer_types::muted_groups_t skip_typed; - for (auto& s : skip) { - skip_typed.add(static_cast(s)); - } - - mute_index.update_muted_groups(std::move(skip_typed)); - auto balancer = lbt::even_shard_load_constraint(shard_index, mute_index); - - auto reassignment = balancer.recommended_reassignment(); - - if (!reassignment) { - return "no reassignment occurred"; - } - - check_valid(shard_index.shards(), *reassignment); - - if (!(*reassignment == expected_reassignment)) { - return fmt::format( - "Reassignment not as expected.\nExpected: {}\nActual: {}\n", - to_string(expected_reassignment), - to_string(*reassignment)); - } - - return ""; -} - -/** - * @brief Helper to create a reassignment from group, from and to passed as - * integers. - */ -inline reassignment re(unsigned group, int from, int to) { - return { - raft::group_id(group), - model::broker_shard{model::node_id(from)}, - model::broker_shard{model::node_id(to)}}; -} diff --git a/src/v/cluster/tests/leader_balancer_test.cc b/src/v/cluster/tests/leader_balancer_test.cc index 444b1c839cf1e..ea2a7761c28c3 100644 --- a/src/v/cluster/tests/leader_balancer_test.cc +++ b/src/v/cluster/tests/leader_balancer_test.cc @@ -14,9 +14,10 @@ #define BOOST_TEST_MODULE leader_balancer #include "absl/container/flat_hash_map.h" -#include "cluster/scheduling/leader_balancer_greedy.h" +#include "cluster/scheduling/leader_balancer_random.h" #include "leader_balancer_test_utils.h" #include "model/metadata.h" +#include "utils/to_string.h" #include #include @@ -27,7 +28,7 @@ #include using index_type = cluster::leader_balancer_strategy::index_type; -using gbs = cluster::greedy_balanced_shards; +using strategy = cluster::leader_balancer_types::random_hill_climbing_strategy; using reassignment = cluster::leader_balancer_strategy::reassignment; /** @@ -47,36 +48,6 @@ static void check_valid(const index_type& index, const reassignment& movement) { std::count(replicas.begin(), replicas.end(), movement.to), 1); } -BOOST_AUTO_TEST_CASE(greedy_movement) { - // 10 nodes - // 2 cores x node - // 10 partitions per shard - // r=3 (3 replicas) - - auto index = leader_balancer_test_utils::make_cluster_index(10, 2, 10, 3); - - auto greed = cluster::greedy_balanced_shards( - leader_balancer_test_utils::copy_cluster_index(index), {}); - BOOST_REQUIRE_EQUAL(greed.error(), 0); - - // new groups on shard {2, 0} - auto shard20 = model::broker_shard{model::node_id{2}, 0}; - - index[shard20][raft::group_id(20)] = index[shard20][raft::group_id(3)]; - index[shard20][raft::group_id(21)] = index[shard20][raft::group_id(3)]; - index[shard20][raft::group_id(22)] = index[shard20][raft::group_id(3)]; - - greed = cluster::greedy_balanced_shards( - leader_balancer_test_utils::copy_cluster_index(index), {}); - BOOST_REQUIRE_GT(greed.error(), 0); - - // movement should be _from_ the overloaded shard - auto movement = greed.find_movement({}); - check_valid(index, *movement); - BOOST_REQUIRE(movement); - BOOST_REQUIRE_EQUAL(movement->from, shard20); -} - struct node_spec { std::vector groups_led, groups_followed; }; @@ -87,7 +58,7 @@ struct node_spec { using cluster_spec = std::vector; /** - * @brief Create a greedy balancer from a cluster_spec and set of muted nodes. + * @brief Create a balancer strategy from a cluster_spec and set of muted nodes. */ static auto from_spec( const cluster_spec& spec, const absl::flat_hash_set& muted = {}) { @@ -151,8 +122,15 @@ static auto from_spec( [](auto id) { return model::node_id{id}; }); auto index_cp = leader_balancer_test_utils::copy_cluster_index(index); + auto g_to_topic = leader_balancer_test_utils::make_gid_to_topic_index( + index); return std::make_tuple( - std::move(index_cp), gbs{std::move(index), muted_bs}); + std::move(index_cp), + strategy{ + std::move(index), + std::move(g_to_topic), + cluster::leader_balancer_types::muted_index{muted_bs, {}}, + std::nullopt}); }; /** @@ -166,12 +144,12 @@ bool no_movement( return !balancer.find_movement(skip); } -BOOST_AUTO_TEST_CASE(greedy_empty) { +BOOST_AUTO_TEST_CASE(empty) { // empty spec, expect no movement BOOST_REQUIRE(no_movement({})); } -BOOST_AUTO_TEST_CASE(greedy_balanced) { +BOOST_AUTO_TEST_CASE(balanced) { // single node is already balanced, expect no movement BOOST_REQUIRE(no_movement({ {{1, 2, 3}, {}}, @@ -190,18 +168,48 @@ BOOST_AUTO_TEST_CASE(greedy_balanced) { })); } -BOOST_AUTO_TEST_CASE(greedy_obeys_replica) { +BOOST_AUTO_TEST_CASE(obeys_replica) { // unbalanced, but the overloaded node has no replicas it can // send its groups to BOOST_REQUIRE(no_movement({{{1, 2, 3}, {}}, {{4}, {}}, {{}, {4}}})); } -bool operator==(const reassignment& l, const reassignment& r) { - return l.group == r.group && l.from == r.from && l.to == r.to; +struct expected_reassignment { + std::optional from; + std::optional to; + std::optional group; +}; + +/** + * @brief Helper to create a reassignment from from, to and group passed as + * integers. -1 means "any" + */ +static expected_reassignment re(int from, int to, int group = -1) { + expected_reassignment ret; + if (from != -1) { + ret.from = model::node_id(from); + } + if (to != -1) { + ret.to = model::node_id(to); + } + if (group != -1) { + ret.group = raft::group_id(group); + } + return ret; +} + +bool operator==(const reassignment& r, const expected_reassignment& e) { + return (!e.from || e.from == r.from.node_id) + && (!e.to || e.to == r.to.node_id) + && (!e.group || e.group == r.group); } ss::sstring to_string(const reassignment& r) { - return fmt::format("{{group: {} from: {} to: {}}}", r.group, r.from, r.to); + return fmt::format("{{from: {} to: {} group: {}}}", r.from, r.to, r.group); +} + +ss::sstring to_string(const expected_reassignment& r) { + return fmt::format("{{from: {} to: {} group: {}}}", r.from, r.to, r.group); } /** @@ -210,7 +218,7 @@ ss::sstring to_string(const reassignment& r) { */ ss::sstring expect_movement( const cluster_spec& spec, - const reassignment& expected_reassignment, + const expected_reassignment& expected, const absl::flat_hash_set& muted = {}, const absl::flat_hash_set& skip = {}) { auto [index, balancer] = from_spec(spec, muted); @@ -228,28 +236,17 @@ ss::sstring expect_movement( check_valid(index, *reassignment); - if (!(*reassignment == expected_reassignment)) { + if (!(*reassignment == expected)) { return fmt::format( "Reassignment not as expected.\nExpected: {}\nActual: {}\n", - to_string(expected_reassignment), + to_string(expected), to_string(*reassignment)); } return ""; } -/** - * @brief Helper to create a reassignment from group, from and to passed as - * integers. - */ -static reassignment re(unsigned group, int from, int to) { - return { - raft::group_id(group), - model::broker_shard{model::node_id(from)}, - model::broker_shard{model::node_id(to)}}; -} - -BOOST_AUTO_TEST_CASE(greedy_simple_movement) { +BOOST_AUTO_TEST_CASE(simple_movement) { // simple balancing from {2, 0} leader counts to {1, 1} BOOST_REQUIRE_EQUAL( expect_movement( @@ -259,45 +256,26 @@ BOOST_AUTO_TEST_CASE(greedy_simple_movement) { {{}, {1, 2}} // clang-format on }, - re(1, 0, 1)), + re(0, 1)), ""); -} -BOOST_AUTO_TEST_CASE(greedy_highest_to_lowest) { - // balancing should occur from the most to least loaded node if that is - // possible + // balancing should occur from any node to node 3 BOOST_REQUIRE_EQUAL( expect_movement( { // clang-format off {{1, 2}, {-1}}, - {{3, 4, 5}, {-1}}, // from 1 + {{3, 4, 5}, {-1}}, {{6, 7}, {-1}}, {{}, {1, 2, 3, 4, 5, 6, 7}} // to 3 // clang-format on }, - re(3, 1, 3)), - ""); - - // like the previous case but group 3 is not replicated on the to node, so - // we check that group 4 goes instead - BOOST_REQUIRE_EQUAL( - expect_movement( - { - // clang-format off - {{1, 2}, {-1}}, - {{3, 4, 5}, {-1}}, // from 1 - {{6, 7}, {-1}}, - {{}, {1, 2, 4, 5, 6, 7}} // to 3 - - // clang-format on - }, - re(4, 1, 3)), + re(-1, 3)), ""); } -BOOST_AUTO_TEST_CASE(greedy_low_to_lower) { +BOOST_AUTO_TEST_CASE(low_to_lower) { // balancing can occur even from a shard with less than average load, // if there is a shard with even lower load and the higher loaded shards // cannot be rebalanced from because of a lack of replicas for their @@ -313,11 +291,11 @@ BOOST_AUTO_TEST_CASE(greedy_low_to_lower) { // clang-format on }, - re(8, 2, 3)), + re(2, 3, 8)), ""); } -BOOST_AUTO_TEST_CASE(greedy_muted) { +BOOST_AUTO_TEST_CASE(muted) { // base spec without high, medium and low (zero) load nodes auto spec = cluster_spec{ // clang-format off @@ -327,14 +305,14 @@ BOOST_AUTO_TEST_CASE(greedy_muted) { // clang-format on }; - // base case, move from high to low - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(1, 0, 2)), ""); + // base case, some move expected + BOOST_REQUIRE_EQUAL(expect_movement(spec, re(-1, -1)), ""); - // mute from the "from" node (high), so moves from mid to low - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(5, 1, 2), {0}), ""); + // mute from the "high" node, so moves from mid to low + BOOST_REQUIRE_EQUAL(expect_movement(spec, re(1, 2), {0}), ""); - // mute from the "to" node (low), so moves from high to mid - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(1, 0, 1), {2}), ""); + // mute from the "low" node, so moves from high to mid + BOOST_REQUIRE_EQUAL(expect_movement(spec, re(0, 1), {2}), ""); // mute any 2 nodes and there should be no movement BOOST_REQUIRE(no_movement(spec, {0, 1})); @@ -342,7 +320,7 @@ BOOST_AUTO_TEST_CASE(greedy_muted) { BOOST_REQUIRE(no_movement(spec, {1, 2})); } -BOOST_AUTO_TEST_CASE(greedy_skip) { +BOOST_AUTO_TEST_CASE(skip) { // base spec without high, medium and low load nodes auto spec = cluster_spec{ @@ -353,15 +331,8 @@ BOOST_AUTO_TEST_CASE(greedy_skip) { // clang-format on }; - // base case, move from high to low - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(1, 0, 2)), ""); - - // skip group 1, we move group 2 instead from same node - BOOST_REQUIRE_EQUAL(expect_movement(spec, re(2, 0, 2), {}, {1}), ""); - // skip all groups on node 0, we move mid to low - BOOST_REQUIRE_EQUAL( - expect_movement(spec, re(5, 1, 2), {}, {1, 2, 3, 4}), ""); + BOOST_REQUIRE_EQUAL(expect_movement(spec, re(1, 2), {}, {1, 2, 3, 4}), ""); // mute node 0 and skip all groups on node 1, no movement cluster::leader_balancer_types::muted_groups_t skip{5, 6}; diff --git a/src/v/cluster/tests/leader_balancer_test_utils.h b/src/v/cluster/tests/leader_balancer_test_utils.h index bd573724b8491..d17729ed9943e 100644 --- a/src/v/cluster/tests/leader_balancer_test_utils.h +++ b/src/v/cluster/tests/leader_balancer_test_utils.h @@ -9,7 +9,7 @@ * by the Apache License, Version 2.0 */ -#include "cluster/scheduling/leader_balancer_greedy.h" +#include "cluster/scheduling/leader_balancer_strategy.h" #include "cluster/scheduling/leader_balancer_types.h" namespace leader_balancer_test_utils { @@ -64,4 +64,18 @@ inline cluster::leader_balancer_strategy::index_type copy_cluster_index( return index; } +inline cluster::leader_balancer_types::group_id_to_topic_id +make_gid_to_topic_index( + const cluster::leader_balancer_types::index_type& index) { + cluster::leader_balancer_types::group_id_to_topic_id ret; + + for (const auto& [bs, leaders] : index) { + for (const auto& [group, replicas] : leaders) { + ret.emplace(group, 0); + } + } + + return ret; +} + } // namespace leader_balancer_test_utils diff --git a/src/v/config/configuration.cc b/src/v/config/configuration.cc index f8d3c2df38afe..029aa65e8a14c 100644 --- a/src/v/config/configuration.cc +++ b/src/v/config/configuration.cc @@ -3052,29 +3052,10 @@ configuration::configuration() , enable_leader_balancer( *this, "enable_leader_balancer", - "Enable automatic leadership rebalancing. Mode is set by " - "`leader_balancer_mode`.", + "Enable automatic leadership rebalancing.", {.needs_restart = needs_restart::no, .visibility = visibility::user}, true) - , leader_balancer_mode( - *this, - "leader_balancer_mode", - "Mode of the leader balancer for optimizing movements of leadership " - "between shards (logical CPU cores). Enabled by " - "`enable_leader_balancer.` Accepted Values: * `random_hill_climbing`: a " - "shard is randomly chosen and leadership is moved to it if the load on " - "the original shard is reduced. * `greedy_balanced_shards`: leadership " - "movement is based on a greedy heuristic of moving leaders from the most " - "loaded shard to the least loaded shard.", - {.needs_restart = needs_restart::no, - .example = model::leader_balancer_mode_to_string( - model::leader_balancer_mode::random_hill_climbing), - .visibility = visibility::user}, - model::leader_balancer_mode::random_hill_climbing, - { - model::leader_balancer_mode::greedy_balanced_shards, - model::leader_balancer_mode::random_hill_climbing, - }) + , leader_balancer_mode(*this, "leader_balancer_mode") , leader_balancer_idle_timeout( *this, "leader_balancer_idle_timeout", diff --git a/src/v/config/configuration.h b/src/v/config/configuration.h index 1a6d40d373bda..4b97ec6661ca0 100644 --- a/src/v/config/configuration.h +++ b/src/v/config/configuration.h @@ -574,7 +574,7 @@ struct configuration final : public config_store { property partition_autobalancing_topic_aware; property enable_leader_balancer; - enum_property leader_balancer_mode; + deprecated_property leader_balancer_mode; property leader_balancer_idle_timeout; property leader_balancer_mute_timeout; property leader_balancer_node_mute_timeout; diff --git a/src/v/config/convert.h b/src/v/config/convert.h index 4e1fb949e774a..3509d9f2f4602 100644 --- a/src/v/config/convert.h +++ b/src/v/config/convert.h @@ -403,39 +403,6 @@ struct convert { } }; -template<> -struct convert { - using type = model::leader_balancer_mode; - - static constexpr auto acceptable_values = std::to_array( - {model::leader_balancer_mode_to_string(type::random_hill_climbing), - model::leader_balancer_mode_to_string(type::greedy_balanced_shards)}); - - static Node encode(const type& rhs) { return Node(fmt::format("{}", rhs)); } - - static bool decode(const Node& node, type& rhs) { - auto value = node.as(); - - if ( - std::find(acceptable_values.begin(), acceptable_values.end(), value) - == acceptable_values.end()) { - return false; - } - - rhs = string_switch(std::string_view{value}) - .match( - model::leader_balancer_mode_to_string( - type::random_hill_climbing), - type::random_hill_climbing) - .match( - model::leader_balancer_mode_to_string( - type::greedy_balanced_shards), - type::greedy_balanced_shards); - - return true; - } -}; - template<> struct convert { using type = std::filesystem::path; diff --git a/src/v/config/property.h b/src/v/config/property.h index c7662b0129ba8..1a5c516fbbe81 100644 --- a/src/v/config/property.h +++ b/src/v/config/property.h @@ -634,8 +634,6 @@ consteval std::string_view property_type_name() { return "string"; } else if constexpr (std::is_same_v) { return "string"; - } else if constexpr (std::is_same_v) { - return "string"; } else if constexpr (std::is_same_v) { return "string"; } else if constexpr (std::is_same_v< diff --git a/src/v/config/rjson_serialization.cc b/src/v/config/rjson_serialization.cc index 099da4bc1bc69..52ce9c2cce69b 100644 --- a/src/v/config/rjson_serialization.cc +++ b/src/v/config/rjson_serialization.cc @@ -177,11 +177,6 @@ void rjson_serialize( stringize(w, v); } -void rjson_serialize( - json::Writer& w, const model::leader_balancer_mode& v) { - stringize(w, v); -} - void rjson_serialize( json::Writer& w, const model::fetch_read_strategy& v) { stringize(w, v); diff --git a/src/v/config/rjson_serialization.h b/src/v/config/rjson_serialization.h index f77606ef40c79..1b253d08212f9 100644 --- a/src/v/config/rjson_serialization.h +++ b/src/v/config/rjson_serialization.h @@ -95,9 +95,6 @@ void rjson_serialize( void rjson_serialize( json::Writer& w, const model::cloud_storage_backend& v); -void rjson_serialize( - json::Writer& w, const model::leader_balancer_mode& v); - void rjson_serialize( json::Writer& w, const model::fetch_read_strategy& v); diff --git a/src/v/model/metadata.h b/src/v/model/metadata.h index 45ece905c619d..8a99a06ede1fd 100644 --- a/src/v/model/metadata.h +++ b/src/v/model/metadata.h @@ -494,26 +494,6 @@ inline std::ostream& operator<<(std::ostream& os, cloud_storage_backend csb) { } } -enum class leader_balancer_mode : uint8_t { - greedy_balanced_shards = 0, - random_hill_climbing = 1, -}; - -constexpr const char* -leader_balancer_mode_to_string(leader_balancer_mode mode) { - switch (mode) { - case leader_balancer_mode::greedy_balanced_shards: - return "greedy_balanced_shards"; - case leader_balancer_mode::random_hill_climbing: - return "random_hill_climbing"; - default: - throw std::invalid_argument("unknown leader_balancer_mode"); - } -} - -std::ostream& operator<<(std::ostream&, leader_balancer_mode); -std::istream& operator>>(std::istream&, leader_balancer_mode&); - enum class cloud_storage_chunk_eviction_strategy { eager = 0, capped = 1, diff --git a/src/v/model/model.cc b/src/v/model/model.cc index 8951df74d662a..3550f082482ee 100644 --- a/src/v/model/model.cc +++ b/src/v/model/model.cc @@ -462,26 +462,6 @@ std::ostream& operator<<(std::ostream& o, const shadow_indexing_mode& si) { return o; } -std::ostream& operator<<(std::ostream& o, leader_balancer_mode lbt) { - o << leader_balancer_mode_to_string(lbt); - return o; -} - -std::istream& operator>>(std::istream& i, leader_balancer_mode& lbt) { - ss::sstring s; - i >> s; - lbt = string_switch(s) - .match( - leader_balancer_mode_to_string( - leader_balancer_mode::random_hill_climbing), - leader_balancer_mode::random_hill_climbing) - .match( - leader_balancer_mode_to_string( - leader_balancer_mode::greedy_balanced_shards), - leader_balancer_mode::greedy_balanced_shards); - return i; -} - std::ostream& operator<<(std::ostream& o, const control_record_type& crt) { switch (crt) { case control_record_type::tx_abort: