Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate leader balancer mode #23780

Merged
merged 7 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/v/cluster/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,6 @@ redpanda_cc_library(
"scheduling/constraints.h",
"scheduling/leader_balancer.h",
"scheduling/leader_balancer_constraints.h",
"scheduling/leader_balancer_greedy.h",
"scheduling/leader_balancer_probe.h",
"scheduling/leader_balancer_random.h",
"scheduling/leader_balancer_strategy.h",
Expand Down
31 changes: 6 additions & 25 deletions src/v/cluster/scheduling/leader_balancer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "cluster/logger.h"
#include "cluster/members_table.h"
#include "cluster/partition_leaders_table.h"
#include "cluster/scheduling/leader_balancer_greedy.h"
#include "cluster/scheduling/leader_balancer_random.h"
#include "cluster/scheduling/leader_balancer_strategy.h"
#include "cluster/scheduling/leader_balancer_types.h"
Expand Down Expand Up @@ -578,30 +577,12 @@ ss::future<ss::stop_iteration> leader_balancer::balance() {

auto muted_nodes = collect_muted_nodes(health_report.value());

auto mode = config::shard_local_cfg().leader_balancer_mode();
std::unique_ptr<leader_balancer_strategy> strategy;

switch (mode) {
case model::leader_balancer_mode::random_hill_climbing: {
vlog(clusterlog.debug, "using random_hill_climbing");

strategy = std::make_unique<
leader_balancer_types::random_hill_climbing_strategy>(
std::move(index),
std::move(group_id_to_topic),
leader_balancer_types::muted_index{std::move(muted_nodes), {}},
std::move(preference_index));
break;
}
case model::leader_balancer_mode::greedy_balanced_shards:
vlog(clusterlog.debug, "using greedy_balanced_shards");
strategy = std::make_unique<greedy_balanced_shards>(
std::move(index), std::move(muted_nodes));
break;
default:
vlog(clusterlog.error, "unexpected mode value: {}", mode);
co_return ss::stop_iteration::no;
}
std::unique_ptr<leader_balancer_strategy> strategy
= std::make_unique<leader_balancer_types::random_hill_climbing_strategy>(
std::move(index),
std::move(group_id_to_topic),
leader_balancer_types::muted_index{std::move(muted_nodes), {}},
std::move(preference_index));

auto cores = strategy->stats();

Expand Down
75 changes: 0 additions & 75 deletions src/v/cluster/scheduling/leader_balancer_constraints.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,6 @@ void even_topic_distribution_constraint::update_index(const reassignment& r) {
_topic_shard_index.at(topic_id).at(r.to) += 1;
}

std::optional<reassignment>
even_topic_distribution_constraint::recommended_reassignment() {
// This method is deprecated and is ony used in `leader_balancer_greedy`
// which doesn't use the `even_topic_distributon_constraint`. Hence there is
// no need to implement it here. Once the greedy balancer has been removed
// this should be removed as well.
vassert(false, "not implemented");
}

void even_topic_distribution_constraint::rebuild_indexes() {
_topic_shard_index.clear();
_topic_replica_index.clear();
Expand Down Expand Up @@ -153,64 +144,6 @@ double even_topic_distribution_constraint::adjusted_error(
return current_error;
}

std::optional<reassignment>
even_shard_load_constraint::recommended_reassignment() {
auto [load, load_map] = build_load_indexes();
const auto curr_error = error();

// Consider each group from high load core, and record the reassignment
// involving the lowest load "to" core.
for (const auto& from : boost::adaptors::reverse(load)) {
if (mi().muted_nodes().contains(from->first.node_id)) {
continue;
}

constexpr size_t load_unset = std::numeric_limits<size_t>::max();
size_t lowest_load = load_unset;
reassignment lowest_reassign{};

// Consider each group from high load core, and record the
// reassignment involving the lowest load "to" core.
for (const auto& group : from->second) {
if (mi().muted_groups().contains(
static_cast<uint64_t>(group.first))) {
continue;
}

// iterate over all the replicas and look for the lowest load
// shard in the replica list
for (const auto& to_shard : group.second) {
auto load = load_map.at(to_shard);
if (likely(load >= lowest_load)) {
// there is no point in evaluating this move, it is
// worse than the best one we've found so far.
continue;
}

if (mi().muted_nodes().contains(to_shard.node_id)) {
continue;
}

lowest_load = load;
lowest_reassign = {group.first, from->first, to_shard};
}
}

if (lowest_load != load_unset) {
// We found a possible reassignment while looking at the current
// "from" shard, and while it is the best possible reassignment
// found it may not improve the error
auto new_error = adjusted_error(
curr_error, lowest_reassign.from, lowest_reassign.to);
if (new_error + error_jitter < curr_error) {
return lowest_reassign;
}
}
}

return std::nullopt;
}

double even_shard_load_constraint::adjusted_error(
double current_error,
const model::broker_shard& from,
Expand Down Expand Up @@ -320,12 +253,4 @@ double pinning_constraint::evaluate_internal(const reassignment& r) {
return diff;
}

std::optional<reassignment> pinning_constraint::recommended_reassignment() {
// This method is deprecated and is ony used in `leader_balancer_greedy`
// which doesn't use the `even_topic_distributon_constraint`. Hence there is
// no need to implement it here. Once the greedy balancer has been removed
// this should be removed as well.
vassert(false, "not implemented");
}

} // namespace cluster::leader_balancer_types
33 changes: 0 additions & 33 deletions src/v/cluster/scheduling/leader_balancer_constraints.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,20 +133,6 @@ class even_topic_distribution_constraint final

void update_index(const reassignment& r) override;

/*
* Uses a greedy algorithm to generate a recommended reassignment
* that will reduce error for this constraint.
*
* The gist is that it searches topics in order of how skewed they are.
* Then for each topic it finds a partiton on the node with the most
* leadership and tries to move it to one of its replica nodes. If this
* reassignment reduces error its returned.
*
* If no reassignment can reduce error for this constraint std::nullopt is
* returned.
*/
std::optional<reassignment> recommended_reassignment() override;

private:
std::reference_wrapper<const shard_index> _si;
std::reference_wrapper<const muted_index> _mi;
Expand Down Expand Up @@ -233,23 +219,6 @@ class even_shard_load_constraint final
_error = adjusted_error(_error, r.from, r.to);
}

/*
* Find a group reassignment that improves overall error. The general
* approach is to select a group from the highest loaded shard and move
* leadership for that group to the least loaded shard that the group is
* compatible with.
*
* Clearly this is a costly method in terms of runtime complexity.
* Measurements for clusters with several thousand partitions indicate a
* real time execution cost of at most a couple hundred micros. Other
* strategies are sure to improve this as we tackle larger configurations.
*
* Muted nodes are nodes that should be treated as if they have no available
* capacity. So do not move leadership to a muted node, but any leaders on a
* muted node should not be touched in case the mute is temporary.
*/
std::optional<reassignment> recommended_reassignment() override;

std::vector<shard_load> stats() const;

double calc_target_load() const;
Expand Down Expand Up @@ -314,8 +283,6 @@ class pinning_constraint final : public soft_constraint {
private:
double evaluate_internal(const reassignment& r) override;

std::optional<reassignment> recommended_reassignment() override;

private:
std::reference_wrapper<const group_id_to_topic_id> _group2topic;
preference_index _preference_idx;
Expand Down
79 changes: 0 additions & 79 deletions src/v/cluster/scheduling/leader_balancer_greedy.h

This file was deleted.

2 changes: 0 additions & 2 deletions src/v/cluster/scheduling/leader_balancer_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ class soft_constraint {
auto ret = evaluate_internal(r);
return ret;
}

virtual std::optional<reassignment> recommended_reassignment() = 0;
};

} // namespace cluster::leader_balancer_types
1 change: 1 addition & 0 deletions src/v/cluster/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ redpanda_cc_btest(
"//src/v/cluster",
"//src/v/model",
"//src/v/test_utils:seastar_boost",
"//src/v/utils:to_string",
"@abseil-cpp//absl/container:flat_hash_map",
"@abseil-cpp//absl/container:flat_hash_set",
"@boost//:test",
Expand Down
43 changes: 2 additions & 41 deletions src/v/cluster/tests/leader_balancer_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
* by the Apache License, Version 2.0
*/
#include "cluster/scheduling/leader_balancer_constraints.h"
#include "cluster/scheduling/leader_balancer_greedy.h"
#include "cluster/scheduling/leader_balancer_random.h"
#include "cluster/scheduling/leader_balancer_types.h"
#include "leader_balancer_test_utils.h"
Expand All @@ -25,19 +24,6 @@ constexpr int shards_per_node = 16; // i.e., cores per node
constexpr int groups_per_shard = 80; // group == partition in this context
constexpr int replicas = 3; // number of replicas

cluster::leader_balancer_types::group_id_to_topic_id make_gid_to_topic_index(
const cluster::leader_balancer_types::index_type& index) {
cluster::leader_balancer_types::group_id_to_topic_id ret;

for (const auto& [bs, leaders] : index) {
for (const auto& [group, replicas] : leaders) {
ret.emplace(group, 0);
}
}

return ret;
}

/*
* Measures the time it takes to randomly generate and evaluate every possible
* reassignment for a given cluster. The reassignments are evaluated by the
Expand All @@ -54,7 +40,8 @@ void random_search_eval_bench(bool measure_all) {
cluster::leader_balancer_types::muted_index mi{{}, {}};
cluster::leader_balancer_types::shard_index si(
leader_balancer_test_utils::copy_cluster_index(index));
auto gid_topic = make_gid_to_topic_index(si.shards());
auto gid_topic = leader_balancer_test_utils::make_gid_to_topic_index(
si.shards());

if (measure_all) {
perf_tests::start_measuring_time();
Expand Down Expand Up @@ -115,34 +102,8 @@ void random_bench() {
perf_tests::stop_measuring_time();
}

void balancer_bench(bool measure_all) {
cluster::leader_balancer_strategy::index_type index
= leader_balancer_test_utils::make_cluster_index(
node_count, shards_per_node, groups_per_shard, replicas);

if (measure_all) {
perf_tests::start_measuring_time();
}

auto greed = cluster::greedy_balanced_shards(std::move(index), {});
vassert(greed.error() == 0, "");
vassert(greed.error() == 0, "e > 0");

// movement should be _from_ the overloaded shard
if (!measure_all) {
perf_tests::start_measuring_time();
}
auto movement = greed.find_movement({});
vassert(!movement, "movemement ");
perf_tests::do_not_optimize(movement);
perf_tests::stop_measuring_time();
}

} // namespace

PERF_TEST(lb, even_shard_load_movement) { balancer_bench(false); }
PERF_TEST(lb, even_shard_load_all) { balancer_bench(true); }

PERF_TEST(lb, random_eval_movement) {
random_search_eval_bench<
cluster::leader_balancer_types::random_reassignments>(false);
Expand Down
Loading