Merge pull request #23780 from ztlpn/deprecate-leader-balancer-mode

Deprecate leader balancer mode
redpanda-data · Oct 15, 2024 · ef579f2 · ef579f2
2 parents 117a5e4 + 202bf2f
commit ef579f2
Show file tree

Hide file tree

Showing 20 changed files with 104 additions and 729 deletions.
diff --git a/src/v/cluster/BUILD b/src/v/cluster/BUILD
@@ -550,7 +550,6 @@ redpanda_cc_library(
         "scheduling/constraints.h",
         "scheduling/leader_balancer.h",
         "scheduling/leader_balancer_constraints.h",
-        "scheduling/leader_balancer_greedy.h",
         "scheduling/leader_balancer_probe.h",
         "scheduling/leader_balancer_random.h",
         "scheduling/leader_balancer_strategy.h",

diff --git a/src/v/cluster/scheduling/leader_balancer.cc b/src/v/cluster/scheduling/leader_balancer.cc
@@ -17,7 +17,6 @@
 #include "cluster/logger.h"
 #include "cluster/members_table.h"
 #include "cluster/partition_leaders_table.h"
-#include "cluster/scheduling/leader_balancer_greedy.h"
 #include "cluster/scheduling/leader_balancer_random.h"
 #include "cluster/scheduling/leader_balancer_strategy.h"
 #include "cluster/scheduling/leader_balancer_types.h"
@@ -578,30 +577,12 @@ ss::future<ss::stop_iteration> leader_balancer::balance() {
 
     auto muted_nodes = collect_muted_nodes(health_report.value());
 
-    auto mode = config::shard_local_cfg().leader_balancer_mode();
-    std::unique_ptr<leader_balancer_strategy> strategy;
-
-    switch (mode) {
-    case model::leader_balancer_mode::random_hill_climbing: {
-        vlog(clusterlog.debug, "using random_hill_climbing");
-
-        strategy = std::make_unique<
-          leader_balancer_types::random_hill_climbing_strategy>(
-          std::move(index),
-          std::move(group_id_to_topic),
-          leader_balancer_types::muted_index{std::move(muted_nodes), {}},
-          std::move(preference_index));
-        break;
-    }
-    case model::leader_balancer_mode::greedy_balanced_shards:
-        vlog(clusterlog.debug, "using greedy_balanced_shards");
-        strategy = std::make_unique<greedy_balanced_shards>(
-          std::move(index), std::move(muted_nodes));
-        break;
-    default:
-        vlog(clusterlog.error, "unexpected mode value: {}", mode);
-        co_return ss::stop_iteration::no;
-    }
+    std::unique_ptr<leader_balancer_strategy> strategy
+      = std::make_unique<leader_balancer_types::random_hill_climbing_strategy>(
+        std::move(index),
+        std::move(group_id_to_topic),
+        leader_balancer_types::muted_index{std::move(muted_nodes), {}},
+        std::move(preference_index));
 
     auto cores = strategy->stats();
 

diff --git a/src/v/cluster/scheduling/leader_balancer_constraints.cc b/src/v/cluster/scheduling/leader_balancer_constraints.cc
@@ -46,15 +46,6 @@ void even_topic_distribution_constraint::update_index(const reassignment& r) {
     _topic_shard_index.at(topic_id).at(r.to) += 1;
 }
 
-std::optional<reassignment>
-even_topic_distribution_constraint::recommended_reassignment() {
-    // This method is deprecated and is ony used in `leader_balancer_greedy`
-    // which doesn't use the `even_topic_distributon_constraint`. Hence there is
-    // no need to implement it here. Once the greedy balancer has been removed
-    // this should be removed as well.
-    vassert(false, "not implemented");
-}
-
 void even_topic_distribution_constraint::rebuild_indexes() {
     _topic_shard_index.clear();
     _topic_replica_index.clear();
@@ -153,64 +144,6 @@ double even_topic_distribution_constraint::adjusted_error(
     return current_error;
 }
 
-std::optional<reassignment>
-even_shard_load_constraint::recommended_reassignment() {
-    auto [load, load_map] = build_load_indexes();
-    const auto curr_error = error();
-
-    // Consider each group from high load core, and record the reassignment
-    // involving the lowest load "to" core.
-    for (const auto& from : boost::adaptors::reverse(load)) {
-        if (mi().muted_nodes().contains(from->first.node_id)) {
-            continue;
-        }
-
-        constexpr size_t load_unset = std::numeric_limits<size_t>::max();
-        size_t lowest_load = load_unset;
-        reassignment lowest_reassign{};
-
-        // Consider each group from high load core, and record the
-        // reassignment involving the lowest load "to" core.
-        for (const auto& group : from->second) {
-            if (mi().muted_groups().contains(
-                  static_cast<uint64_t>(group.first))) {
-                continue;
-            }
-
-            // iterate over all the replicas and look for the lowest load
-            // shard in the replica list
-            for (const auto& to_shard : group.second) {
-                auto load = load_map.at(to_shard);
-                if (likely(load >= lowest_load)) {
-                    // there is no point in evaluating this move, it is
-                    // worse than the best one we've found so far.
-                    continue;
-                }
-
-                if (mi().muted_nodes().contains(to_shard.node_id)) {
-                    continue;
-                }
-
-                lowest_load = load;
-                lowest_reassign = {group.first, from->first, to_shard};
-            }
-        }
-
-        if (lowest_load != load_unset) {
-            // We found a possible reassignment while looking at the current
-            // "from" shard, and while it is the best possible reassignment
-            // found it may not improve the error
-            auto new_error = adjusted_error(
-              curr_error, lowest_reassign.from, lowest_reassign.to);
-            if (new_error + error_jitter < curr_error) {
-                return lowest_reassign;
-            }
-        }
-    }
-
-    return std::nullopt;
-}
-
 double even_shard_load_constraint::adjusted_error(
   double current_error,
   const model::broker_shard& from,
@@ -320,12 +253,4 @@ double pinning_constraint::evaluate_internal(const reassignment& r) {
     return diff;
 }
 
-std::optional<reassignment> pinning_constraint::recommended_reassignment() {
-    // This method is deprecated and is ony used in `leader_balancer_greedy`
-    // which doesn't use the `even_topic_distributon_constraint`. Hence there is
-    // no need to implement it here. Once the greedy balancer has been removed
-    // this should be removed as well.
-    vassert(false, "not implemented");
-}
-
 } // namespace cluster::leader_balancer_types
diff --git a/src/v/cluster/scheduling/leader_balancer_constraints.h b/src/v/cluster/scheduling/leader_balancer_constraints.h
@@ -133,20 +133,6 @@ class even_topic_distribution_constraint final
 
     void update_index(const reassignment& r) override;
 
-    /*
-     * Uses a greedy algorithm to generate a recommended reassignment
-     * that will reduce error for this constraint.
-     *
-     * The gist is that it searches topics in order of how skewed they are.
-     * Then for each topic it finds a partiton on the node with the most
-     * leadership and tries to move it to one of its replica nodes. If this
-     * reassignment reduces error its returned.
-     *
-     * If no reassignment can reduce error for this constraint std::nullopt is
-     * returned.
-     */
-    std::optional<reassignment> recommended_reassignment() override;
-
 private:
     std::reference_wrapper<const shard_index> _si;
     std::reference_wrapper<const muted_index> _mi;
@@ -233,23 +219,6 @@ class even_shard_load_constraint final
         _error = adjusted_error(_error, r.from, r.to);
     }
 
-    /*
-     * Find a group reassignment that improves overall error. The general
-     * approach is to select a group from the highest loaded shard and move
-     * leadership for that group to the least loaded shard that the group is
-     * compatible with.
-     *
-     * Clearly this is a costly method in terms of runtime complexity.
-     * Measurements for clusters with several thousand partitions indicate a
-     * real time execution cost of at most a couple hundred micros. Other
-     * strategies are sure to improve this as we tackle larger configurations.
-     *
-     * Muted nodes are nodes that should be treated as if they have no available
-     * capacity. So do not move leadership to a muted node, but any leaders on a
-     * muted node should not be touched in case the mute is temporary.
-     */
-    std::optional<reassignment> recommended_reassignment() override;
-
     std::vector<shard_load> stats() const;
 
     double calc_target_load() const;
@@ -314,8 +283,6 @@ class pinning_constraint final : public soft_constraint {
 private:
     double evaluate_internal(const reassignment& r) override;
 
-    std::optional<reassignment> recommended_reassignment() override;
-
 private:
     std::reference_wrapper<const group_id_to_topic_id> _group2topic;
     preference_index _preference_idx;

diff --git a/src/v/cluster/scheduling/leader_balancer_greedy.h b/src/v/cluster/scheduling/leader_balancer_greedy.h
diff --git a/src/v/cluster/scheduling/leader_balancer_types.h b/src/v/cluster/scheduling/leader_balancer_types.h
@@ -97,8 +97,6 @@ class soft_constraint {
         auto ret = evaluate_internal(r);
         return ret;
     }
-
-    virtual std::optional<reassignment> recommended_reassignment() = 0;
 };
 
 } // namespace cluster::leader_balancer_types
diff --git a/src/v/cluster/tests/BUILD b/src/v/cluster/tests/BUILD
@@ -110,6 +110,7 @@ redpanda_cc_btest(
         "//src/v/cluster",
         "//src/v/model",
         "//src/v/test_utils:seastar_boost",
+        "//src/v/utils:to_string",
         "@abseil-cpp//absl/container:flat_hash_map",
         "@abseil-cpp//absl/container:flat_hash_set",
         "@boost//:test",

diff --git a/src/v/cluster/tests/leader_balancer_bench.cc b/src/v/cluster/tests/leader_balancer_bench.cc
@@ -9,7 +9,6 @@
  * by the Apache License, Version 2.0
  */
 #include "cluster/scheduling/leader_balancer_constraints.h"
-#include "cluster/scheduling/leader_balancer_greedy.h"
 #include "cluster/scheduling/leader_balancer_random.h"
 #include "cluster/scheduling/leader_balancer_types.h"
 #include "leader_balancer_test_utils.h"
@@ -25,19 +24,6 @@ constexpr int shards_per_node = 16;  // i.e., cores per node
 constexpr int groups_per_shard = 80; // group == partition in this context
 constexpr int replicas = 3;          // number of replicas
 
-cluster::leader_balancer_types::group_id_to_topic_id make_gid_to_topic_index(
-  const cluster::leader_balancer_types::index_type& index) {
-    cluster::leader_balancer_types::group_id_to_topic_id ret;
-
-    for (const auto& [bs, leaders] : index) {
-        for (const auto& [group, replicas] : leaders) {
-            ret.emplace(group, 0);
-        }
-    }
-
-    return ret;
-}
-
 /*
  * Measures the time it takes to randomly generate and evaluate every possible
  * reassignment for a given cluster. The reassignments are evaluated by the
@@ -54,7 +40,8 @@ void random_search_eval_bench(bool measure_all) {
     cluster::leader_balancer_types::muted_index mi{{}, {}};
     cluster::leader_balancer_types::shard_index si(
       leader_balancer_test_utils::copy_cluster_index(index));
-    auto gid_topic = make_gid_to_topic_index(si.shards());
+    auto gid_topic = leader_balancer_test_utils::make_gid_to_topic_index(
+      si.shards());
 
     if (measure_all) {
         perf_tests::start_measuring_time();
@@ -115,34 +102,8 @@ void random_bench() {
     perf_tests::stop_measuring_time();
 }
 
-void balancer_bench(bool measure_all) {
-    cluster::leader_balancer_strategy::index_type index
-      = leader_balancer_test_utils::make_cluster_index(
-        node_count, shards_per_node, groups_per_shard, replicas);
-
-    if (measure_all) {
-        perf_tests::start_measuring_time();
-    }
-
-    auto greed = cluster::greedy_balanced_shards(std::move(index), {});
-    vassert(greed.error() == 0, "");
-    vassert(greed.error() == 0, "e > 0");
-
-    // movement should be _from_ the overloaded shard
-    if (!measure_all) {
-        perf_tests::start_measuring_time();
-    }
-    auto movement = greed.find_movement({});
-    vassert(!movement, "movemement ");
-    perf_tests::do_not_optimize(movement);
-    perf_tests::stop_measuring_time();
-}
-
 } // namespace
 
-PERF_TEST(lb, even_shard_load_movement) { balancer_bench(false); }
-PERF_TEST(lb, even_shard_load_all) { balancer_bench(true); }
-
 PERF_TEST(lb, random_eval_movement) {
     random_search_eval_bench<
       cluster::leader_balancer_types::random_reassignments>(false);