Skip to content

Commit

Permalink
c/leader_balancer: replaced muted group index with roaring bitmap
Browse files Browse the repository at this point in the history
The muted group index was using a `absl::flat_has_set` the use of flat
data structure lead to oversized allocation when the number of
muted/skipped raft group was large. Replaced a hash set with roaring
bitmap. The roaring bitmap is efficient and compressed leading to better
performance and smaller allocation.

Fixes: redpanda-data#12005

Signed-off-by: Michal Maslanka <michal@redpanda.com>
(cherry picked from commit 745c324)
  • Loading branch information
mmaslankaprv authored and vbotbuildovich committed Mar 7, 2024
1 parent 3805bc4 commit 9ce5a47
Show file tree
Hide file tree
Showing 11 changed files with 44 additions and 35 deletions.
8 changes: 4 additions & 4 deletions src/v/cluster/scheduling/leader_balancer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -578,11 +578,11 @@ absl::flat_hash_set<model::node_id> leader_balancer::muted_nodes() const {
return nodes;
}

absl::flat_hash_set<raft::group_id> leader_balancer::muted_groups() const {
absl::flat_hash_set<raft::group_id> res;
res.reserve(_muted.size());
leader_balancer_types::muted_groups_t leader_balancer::muted_groups() const {
leader_balancer_types::muted_groups_t res;

for (const auto& e : _muted) {
res.insert(e.first);
res.add(static_cast<uint64_t>(e.first));
}
return res;
}
Expand Down
2 changes: 1 addition & 1 deletion src/v/cluster/scheduling/leader_balancer.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class leader_balancer {
leader_balancer_types::group_id_to_topic_revision_t
build_group_id_to_topic_rev() const;
index_type build_index();
absl::flat_hash_set<raft::group_id> muted_groups() const;
leader_balancer_types::muted_groups_t muted_groups() const;
absl::flat_hash_set<model::node_id> muted_nodes() const;

ss::future<bool> do_transfer(reassignment);
Expand Down
6 changes: 4 additions & 2 deletions src/v/cluster/scheduling/leader_balancer_constraints.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ even_topic_distributon_constraint::recommended_reassignment() {
const auto& replicas = g_info.replicas;

// Don't try moving any groups that are currently muted.
if (mi().muted_groups().contains(group)) {
if (mi().muted_groups().contains(
static_cast<uint64_t>(group))) {
continue;
}

Expand Down Expand Up @@ -269,7 +270,8 @@ even_shard_load_constraint::recommended_reassignment() {
// Consider each group from high load core, and record the
// reassignment involving the lowest load "to" core.
for (const auto& group : from->second) {
if (mi().muted_groups().contains(group.first)) {
if (mi().muted_groups().contains(
static_cast<uint64_t>(group.first))) {
continue;
}

Expand Down
10 changes: 5 additions & 5 deletions src/v/cluster/scheduling/leader_balancer_constraints.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class muted_index final : public index {
public:
muted_index(
absl::flat_hash_set<model::node_id> muted_nodes,
absl::flat_hash_set<raft::group_id> muted_groups)
leader_balancer_types::muted_groups_t muted_groups)
: _muted_nodes(std::move(muted_nodes))
, _muted_groups(std::move(muted_groups)) {}

Expand All @@ -80,25 +80,25 @@ class muted_index final : public index {
return _muted_nodes;
}

const absl::flat_hash_set<raft::group_id>& muted_groups() const {
const leader_balancer_types::muted_groups_t& muted_groups() const {
return _muted_groups;
}

absl::flat_hash_set<raft::group_id>& muted_groups() {
leader_balancer_types::muted_groups_t& muted_groups() {
return _muted_groups;
}

void update_index(const reassignment&) override {
// nothing to do here.
}

void update_muted_groups(absl::flat_hash_set<raft::group_id> new_groups) {
void update_muted_groups(leader_balancer_types::muted_groups_t new_groups) {
_muted_groups = std::move(new_groups);
}

private:
absl::flat_hash_set<model::node_id> _muted_nodes;
absl::flat_hash_set<raft::group_id> _muted_groups;
leader_balancer_types::muted_groups_t _muted_groups;
};

/*
Expand Down
4 changes: 2 additions & 2 deletions src/v/cluster/scheduling/leader_balancer_greedy.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class greedy_balanced_shards final : public leader_balancer_strategy {
explicit greedy_balanced_shards(
index_type cores, absl::flat_hash_set<model::node_id> muted_nodes)
: _mi(std::make_unique<leader_balancer_types::muted_index>(
std::move(muted_nodes), absl::flat_hash_set<raft::group_id>{}))
std::move(muted_nodes), leader_balancer_types::muted_groups_t{}))
, _even_shard_load_c(
leader_balancer_types::shard_index{std::move(cores)}, *_mi) {}

Expand All @@ -55,7 +55,7 @@ class greedy_balanced_shards final : public leader_balancer_strategy {
* muted node should not be touched in case the mute is temporary.
*/
std::optional<reassignment>
find_movement(const absl::flat_hash_set<raft::group_id>& skip) final {
find_movement(const leader_balancer_types::muted_groups_t& skip) final {
_mi->update_muted_groups(skip);
return _even_shard_load_c.recommended_reassignment();
}
Expand Down
5 changes: 3 additions & 2 deletions src/v/cluster/scheduling/leader_balancer_random.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <cmath>
#include <cstddef>
#include <cstdint>
#include <deque>
#include <functional>
#include <memory>
Expand Down Expand Up @@ -119,7 +120,7 @@ class random_hill_climbing_strategy final : public leader_balancer_strategy {
* Find a group reassignment that reduces total error.
*/
std::optional<reassignment>
find_movement(const absl::flat_hash_set<raft::group_id>& skip) override {
find_movement(const leader_balancer_types::muted_groups_t& skip) override {
for (;;) {
auto reassignment_opt = _reassignments.generate_reassignment();

Expand All @@ -129,7 +130,7 @@ class random_hill_climbing_strategy final : public leader_balancer_strategy {

auto reassignment = *reassignment_opt;
if (
skip.contains(reassignment.group)
skip.contains(static_cast<uint64_t>(reassignment.group))
|| _mi->muted_nodes().contains(reassignment.from.node_id)
|| _mi->muted_nodes().contains(reassignment.to.node_id)) {
continue;
Expand Down
2 changes: 1 addition & 1 deletion src/v/cluster/scheduling/leader_balancer_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class leader_balancer_strategy {
* Find a group reassignment that reduces total error.
*/
virtual std::optional<reassignment>
find_movement(const absl::flat_hash_set<raft::group_id>& skip) = 0;
find_movement(const leader_balancer_types::muted_groups_t& skip) = 0;

virtual void apply_movement(const reassignment& reassignment) = 0;

Expand Down
3 changes: 3 additions & 0 deletions src/v/cluster/scheduling/leader_balancer_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include <absl/container/btree_map.h>
#include <absl/container/flat_hash_map.h>
#include <absl/container/flat_hash_set.h>
#include <absl/container/node_hash_map.h>
#include <roaring/roaring64map.hh>

namespace cluster::leader_balancer_types {

Expand All @@ -43,6 +45,7 @@ using index_type = absl::node_hash_map<
using group_id_to_topic_revision_t
= absl::btree_map<raft::group_id, model::revision_id>;

using muted_groups_t = roaring::Roaring64Map;
/*
* Leaders per shard.
*/
Expand Down
5 changes: 3 additions & 2 deletions src/v/cluster/tests/leader_balancer_constraints_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* the Business Source License, use of this software will be governed
* by the Apache License, Version 2.0
*/
#include <cstdint>
#define BOOST_TEST_MODULE leader_balancer_constraints

#include "cluster/scheduling/leader_balancer_constraints.h"
Expand Down Expand Up @@ -510,7 +511,7 @@ BOOST_AUTO_TEST_CASE(topic_skew_error) {
auto rhc = lbt::random_hill_climbing_strategy(
shard_index.shards(), g_id_to_t_id, lbt::muted_index{{}, {}});

absl::flat_hash_set<raft::group_id> muted_groups{};
cluster::leader_balancer_types::muted_groups_t muted_groups{};

auto pre_topic_error = even_topic_con.error();
auto pre_shard_error = even_shard_con.error();
Expand All @@ -524,7 +525,7 @@ BOOST_AUTO_TEST_CASE(topic_skew_error) {
rhc.apply_movement(*movement_opt);
even_shard_con.update_index(*movement_opt);
even_topic_con.update_index(*movement_opt);
muted_groups.insert(movement_opt->group);
muted_groups.add(static_cast<uint64_t>(movement_opt->group));

auto new_error = rhc.error();
BOOST_REQUIRE(new_error <= current_error);
Expand Down
16 changes: 9 additions & 7 deletions src/v/cluster/tests/leader_balancer_constraints_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,11 @@ static bool no_movement(
const absl::flat_hash_set<int>& muted = {},
const absl::flat_hash_set<raft::group_id>& skip = {}) {
auto [shard_index, muted_index] = from_spec(spec, muted);
muted_index.update_muted_groups(skip);
cluster::leader_balancer_types::muted_groups_t skip_typed;
for (auto& s : skip) {
skip_typed.add(static_cast<uint64_t>(s));
}
muted_index.update_muted_groups(skip_typed);
auto balancer = lbt::even_shard_load_constraint(shard_index, muted_index);

return !balancer.recommended_reassignment();
Expand All @@ -175,12 +179,10 @@ static ss::sstring expect_movement(
const absl::flat_hash_set<int>& skip = {}) {
auto [shard_index, mute_index] = from_spec(spec, muted);

absl::flat_hash_set<raft::group_id> skip_typed;
std::transform(
skip.begin(),
skip.end(),
std::inserter(skip_typed, skip_typed.begin()),
[](auto groupid) { return raft::group_id{groupid}; });
cluster::leader_balancer_types::muted_groups_t skip_typed;
for (auto& s : skip) {
skip_typed.add(static_cast<uint64_t>(s));
}

mute_index.update_muted_groups(std::move(skip_typed));
auto balancer = lbt::even_shard_load_constraint(shard_index, mute_index);
Expand Down
18 changes: 9 additions & 9 deletions src/v/cluster/tests/leader_balancer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
* the Business Source License, use of this software will be governed
* by the Apache License, Version 2.0
*/
#include "cluster/scheduling/leader_balancer_types.h"

#include <cstdint>
#define BOOST_TEST_MODULE leader_balancer

#include "absl/container/flat_hash_map.h"
Expand Down Expand Up @@ -152,7 +155,7 @@ static auto from_spec(
bool no_movement(
const cluster_spec& spec,
const absl::flat_hash_set<int>& muted = {},
const absl::flat_hash_set<raft::group_id>& skip = {}) {
const cluster::leader_balancer_types::muted_groups_t& skip = {}) {
auto [_, balancer] = from_spec(spec, muted);
return !balancer.find_movement(skip);
}
Expand Down Expand Up @@ -206,12 +209,10 @@ ss::sstring expect_movement(
const absl::flat_hash_set<int>& skip = {}) {
auto [index, balancer] = from_spec(spec, muted);

absl::flat_hash_set<raft::group_id> skip_typed;
std::transform(
skip.begin(),
skip.end(),
std::inserter(skip_typed, skip_typed.begin()),
[](auto groupid) { return raft::group_id{groupid}; });
cluster::leader_balancer_types::muted_groups_t skip_typed;
for (auto s : skip) {
skip_typed.add(static_cast<uint64_t>(s));
}

auto reassignment = balancer.find_movement(skip_typed);

Expand Down Expand Up @@ -357,7 +358,6 @@ BOOST_AUTO_TEST_CASE(greedy_skip) {
expect_movement(spec, re(5, 1, 2), {}, {1, 2, 3, 4}), "");

// mute node 0 and skip all groups on node 1, no movement
absl::flat_hash_set<raft::group_id> skip{
raft::group_id(5), raft::group_id(6)};
cluster::leader_balancer_types::muted_groups_t skip{5, 6};
BOOST_REQUIRE(no_movement(spec, {0}, skip));
}

0 comments on commit 9ce5a47

Please sign in to comment.