Skip to content

Commit

Permalink
Merge pull request #23780 from ztlpn/deprecate-leader-balancer-mode
Browse files Browse the repository at this point in the history
Deprecate leader balancer mode
  • Loading branch information
mmaslankaprv authored Oct 15, 2024
2 parents 117a5e4 + 202bf2f commit ef579f2
Show file tree
Hide file tree
Showing 20 changed files with 104 additions and 729 deletions.
1 change: 0 additions & 1 deletion src/v/cluster/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,6 @@ redpanda_cc_library(
"scheduling/constraints.h",
"scheduling/leader_balancer.h",
"scheduling/leader_balancer_constraints.h",
"scheduling/leader_balancer_greedy.h",
"scheduling/leader_balancer_probe.h",
"scheduling/leader_balancer_random.h",
"scheduling/leader_balancer_strategy.h",
Expand Down
31 changes: 6 additions & 25 deletions src/v/cluster/scheduling/leader_balancer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "cluster/logger.h"
#include "cluster/members_table.h"
#include "cluster/partition_leaders_table.h"
#include "cluster/scheduling/leader_balancer_greedy.h"
#include "cluster/scheduling/leader_balancer_random.h"
#include "cluster/scheduling/leader_balancer_strategy.h"
#include "cluster/scheduling/leader_balancer_types.h"
Expand Down Expand Up @@ -578,30 +577,12 @@ ss::future<ss::stop_iteration> leader_balancer::balance() {

auto muted_nodes = collect_muted_nodes(health_report.value());

auto mode = config::shard_local_cfg().leader_balancer_mode();
std::unique_ptr<leader_balancer_strategy> strategy;

switch (mode) {
case model::leader_balancer_mode::random_hill_climbing: {
vlog(clusterlog.debug, "using random_hill_climbing");

strategy = std::make_unique<
leader_balancer_types::random_hill_climbing_strategy>(
std::move(index),
std::move(group_id_to_topic),
leader_balancer_types::muted_index{std::move(muted_nodes), {}},
std::move(preference_index));
break;
}
case model::leader_balancer_mode::greedy_balanced_shards:
vlog(clusterlog.debug, "using greedy_balanced_shards");
strategy = std::make_unique<greedy_balanced_shards>(
std::move(index), std::move(muted_nodes));
break;
default:
vlog(clusterlog.error, "unexpected mode value: {}", mode);
co_return ss::stop_iteration::no;
}
std::unique_ptr<leader_balancer_strategy> strategy
= std::make_unique<leader_balancer_types::random_hill_climbing_strategy>(
std::move(index),
std::move(group_id_to_topic),
leader_balancer_types::muted_index{std::move(muted_nodes), {}},
std::move(preference_index));

auto cores = strategy->stats();

Expand Down
75 changes: 0 additions & 75 deletions src/v/cluster/scheduling/leader_balancer_constraints.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,6 @@ void even_topic_distribution_constraint::update_index(const reassignment& r) {
_topic_shard_index.at(topic_id).at(r.to) += 1;
}

std::optional<reassignment>
even_topic_distribution_constraint::recommended_reassignment() {
// This method is deprecated and is ony used in `leader_balancer_greedy`
// which doesn't use the `even_topic_distributon_constraint`. Hence there is
// no need to implement it here. Once the greedy balancer has been removed
// this should be removed as well.
vassert(false, "not implemented");
}

void even_topic_distribution_constraint::rebuild_indexes() {
_topic_shard_index.clear();
_topic_replica_index.clear();
Expand Down Expand Up @@ -153,64 +144,6 @@ double even_topic_distribution_constraint::adjusted_error(
return current_error;
}

std::optional<reassignment>
even_shard_load_constraint::recommended_reassignment() {
auto [load, load_map] = build_load_indexes();
const auto curr_error = error();

// Consider each group from high load core, and record the reassignment
// involving the lowest load "to" core.
for (const auto& from : boost::adaptors::reverse(load)) {
if (mi().muted_nodes().contains(from->first.node_id)) {
continue;
}

constexpr size_t load_unset = std::numeric_limits<size_t>::max();
size_t lowest_load = load_unset;
reassignment lowest_reassign{};

// Consider each group from high load core, and record the
// reassignment involving the lowest load "to" core.
for (const auto& group : from->second) {
if (mi().muted_groups().contains(
static_cast<uint64_t>(group.first))) {
continue;
}

// iterate over all the replicas and look for the lowest load
// shard in the replica list
for (const auto& to_shard : group.second) {
auto load = load_map.at(to_shard);
if (likely(load >= lowest_load)) {
// there is no point in evaluating this move, it is
// worse than the best one we've found so far.
continue;
}

if (mi().muted_nodes().contains(to_shard.node_id)) {
continue;
}

lowest_load = load;
lowest_reassign = {group.first, from->first, to_shard};
}
}

if (lowest_load != load_unset) {
// We found a possible reassignment while looking at the current
// "from" shard, and while it is the best possible reassignment
// found it may not improve the error
auto new_error = adjusted_error(
curr_error, lowest_reassign.from, lowest_reassign.to);
if (new_error + error_jitter < curr_error) {
return lowest_reassign;
}
}
}

return std::nullopt;
}

double even_shard_load_constraint::adjusted_error(
double current_error,
const model::broker_shard& from,
Expand Down Expand Up @@ -320,12 +253,4 @@ double pinning_constraint::evaluate_internal(const reassignment& r) {
return diff;
}

std::optional<reassignment> pinning_constraint::recommended_reassignment() {
// This method is deprecated and is ony used in `leader_balancer_greedy`
// which doesn't use the `even_topic_distributon_constraint`. Hence there is
// no need to implement it here. Once the greedy balancer has been removed
// this should be removed as well.
vassert(false, "not implemented");
}

} // namespace cluster::leader_balancer_types
33 changes: 0 additions & 33 deletions src/v/cluster/scheduling/leader_balancer_constraints.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,20 +133,6 @@ class even_topic_distribution_constraint final

void update_index(const reassignment& r) override;

/*
* Uses a greedy algorithm to generate a recommended reassignment
* that will reduce error for this constraint.
*
* The gist is that it searches topics in order of how skewed they are.
* Then for each topic it finds a partiton on the node with the most
* leadership and tries to move it to one of its replica nodes. If this
* reassignment reduces error its returned.
*
* If no reassignment can reduce error for this constraint std::nullopt is
* returned.
*/
std::optional<reassignment> recommended_reassignment() override;

private:
std::reference_wrapper<const shard_index> _si;
std::reference_wrapper<const muted_index> _mi;
Expand Down Expand Up @@ -233,23 +219,6 @@ class even_shard_load_constraint final
_error = adjusted_error(_error, r.from, r.to);
}

/*
* Find a group reassignment that improves overall error. The general
* approach is to select a group from the highest loaded shard and move
* leadership for that group to the least loaded shard that the group is
* compatible with.
*
* Clearly this is a costly method in terms of runtime complexity.
* Measurements for clusters with several thousand partitions indicate a
* real time execution cost of at most a couple hundred micros. Other
* strategies are sure to improve this as we tackle larger configurations.
*
* Muted nodes are nodes that should be treated as if they have no available
* capacity. So do not move leadership to a muted node, but any leaders on a
* muted node should not be touched in case the mute is temporary.
*/
std::optional<reassignment> recommended_reassignment() override;

std::vector<shard_load> stats() const;

double calc_target_load() const;
Expand Down Expand Up @@ -314,8 +283,6 @@ class pinning_constraint final : public soft_constraint {
private:
double evaluate_internal(const reassignment& r) override;

std::optional<reassignment> recommended_reassignment() override;

private:
std::reference_wrapper<const group_id_to_topic_id> _group2topic;
preference_index _preference_idx;
Expand Down
79 changes: 0 additions & 79 deletions src/v/cluster/scheduling/leader_balancer_greedy.h

This file was deleted.

2 changes: 0 additions & 2 deletions src/v/cluster/scheduling/leader_balancer_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ class soft_constraint {
auto ret = evaluate_internal(r);
return ret;
}

virtual std::optional<reassignment> recommended_reassignment() = 0;
};

} // namespace cluster::leader_balancer_types
1 change: 1 addition & 0 deletions src/v/cluster/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ redpanda_cc_btest(
"//src/v/cluster",
"//src/v/model",
"//src/v/test_utils:seastar_boost",
"//src/v/utils:to_string",
"@abseil-cpp//absl/container:flat_hash_map",
"@abseil-cpp//absl/container:flat_hash_set",
"@boost//:test",
Expand Down
43 changes: 2 additions & 41 deletions src/v/cluster/tests/leader_balancer_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
* by the Apache License, Version 2.0
*/
#include "cluster/scheduling/leader_balancer_constraints.h"
#include "cluster/scheduling/leader_balancer_greedy.h"
#include "cluster/scheduling/leader_balancer_random.h"
#include "cluster/scheduling/leader_balancer_types.h"
#include "leader_balancer_test_utils.h"
Expand All @@ -25,19 +24,6 @@ constexpr int shards_per_node = 16; // i.e., cores per node
constexpr int groups_per_shard = 80; // group == partition in this context
constexpr int replicas = 3; // number of replicas

cluster::leader_balancer_types::group_id_to_topic_id make_gid_to_topic_index(
const cluster::leader_balancer_types::index_type& index) {
cluster::leader_balancer_types::group_id_to_topic_id ret;

for (const auto& [bs, leaders] : index) {
for (const auto& [group, replicas] : leaders) {
ret.emplace(group, 0);
}
}

return ret;
}

/*
* Measures the time it takes to randomly generate and evaluate every possible
* reassignment for a given cluster. The reassignments are evaluated by the
Expand All @@ -54,7 +40,8 @@ void random_search_eval_bench(bool measure_all) {
cluster::leader_balancer_types::muted_index mi{{}, {}};
cluster::leader_balancer_types::shard_index si(
leader_balancer_test_utils::copy_cluster_index(index));
auto gid_topic = make_gid_to_topic_index(si.shards());
auto gid_topic = leader_balancer_test_utils::make_gid_to_topic_index(
si.shards());

if (measure_all) {
perf_tests::start_measuring_time();
Expand Down Expand Up @@ -115,34 +102,8 @@ void random_bench() {
perf_tests::stop_measuring_time();
}

void balancer_bench(bool measure_all) {
cluster::leader_balancer_strategy::index_type index
= leader_balancer_test_utils::make_cluster_index(
node_count, shards_per_node, groups_per_shard, replicas);

if (measure_all) {
perf_tests::start_measuring_time();
}

auto greed = cluster::greedy_balanced_shards(std::move(index), {});
vassert(greed.error() == 0, "");
vassert(greed.error() == 0, "e > 0");

// movement should be _from_ the overloaded shard
if (!measure_all) {
perf_tests::start_measuring_time();
}
auto movement = greed.find_movement({});
vassert(!movement, "movemement ");
perf_tests::do_not_optimize(movement);
perf_tests::stop_measuring_time();
}

} // namespace

PERF_TEST(lb, even_shard_load_movement) { balancer_bench(false); }
PERF_TEST(lb, even_shard_load_all) { balancer_bench(true); }

PERF_TEST(lb, random_eval_movement) {
random_search_eval_bench<
cluster::leader_balancer_types::random_reassignments>(false);
Expand Down
Loading

0 comments on commit ef579f2

Please sign in to comment.