Skip to content

Commit

Permalink
tests: added retries to raft force reconfiguration test
Browse files Browse the repository at this point in the history
Raft force reconfiguration test is designed to skip one of the nodes
when force updating configuration. In this scenario it may sometimes
happen that the node which wasn't updated with the configuration
override will become a leader. In this situation controller backend in
Redpanda will retry. Added retries to the test to make sure the test
will retry reconfiguration if it was reverted by node which was missing
update.

Fixes: redpanda-data#19938

Signed-off-by: Michał Maślanka <michal@redpanda.com>
  • Loading branch information
mmaslankaprv committed Jul 3, 2024
1 parent 62222c7 commit c23f599
Showing 1 changed file with 59 additions and 26 deletions.
85 changes: 59 additions & 26 deletions src/v/raft/tests/raft_reconfiguration_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -497,11 +497,67 @@ TEST_F_CORO(raft_fixture, test_force_reconfiguration) {

std::vector<vnode> base_replica_set = all_vnodes();
size_t reconfiguration_count = 0;
model::revision_id next_rev{1};

auto current_replicas = base_replica_set;

vlog(logger().info, "initial replicas: {}", current_replicas);

auto reconfigure_until_success =
[&](model::revision_id rev, raft::vnode to_skip) {
auto deadline = model::timeout_clock::now() + 30s;
return ss::repeat([this, rev, deadline, to_skip, &current_replicas] {
vassert(
model::timeout_clock::now() < deadline,
"Timeout waiting for reconfiguration");
return ss::parallel_for_each(
nodes().begin(),
nodes().end(),
[&current_replicas, to_skip, rev](
const raft_nodes_t::value_type& pair) {
auto raft = pair.second->raft();
if (pair.second->get_vnode() == to_skip) {
return ss::now();
}
return raft
->force_replace_configuration_locally(
current_replicas, {}, rev)
.discard_result();
})
.then([&current_replicas, this, rev] {
return wait_for_leader(10s)
.then([this](model::node_id id) {
vlog(
logger().info,
"new leader {} elected in term: {}",
id,
nodes()[id]->raft()->term());
})
.then([&, this, rev] {
for (auto& r : current_replicas) {
auto replica_rev
= node(r.id()).raft()->config().revision_id();
if (replica_rev < rev) {
vlog(
logger().warn,
"retrying reconfiguration to {}, requested "
"revision: {}, node {} config revision: {}",
current_replicas,
rev,
r.id(),
replica_rev);
return ss::stop_iteration::no;
}
}
vlog(
logger().info,
"successfully reconfigured to {} with revision: {}",
current_replicas,
rev);
return ss::stop_iteration::yes;
});
});
});
};
auto reconfigure_all = [&, this]() {
/**
* Switch between all 5 replicas and randomly selected 3 of them
Expand All @@ -519,22 +575,8 @@ TEST_F_CORO(raft_fixture, test_force_reconfiguration) {

vlog(logger().info, "reconfiguring group to: {}", current_replicas);
auto to_skip = random_generators::random_choice(base_replica_set);

return ss::parallel_for_each(
nodes().begin(),
nodes().end(),
[&, to_skip](const raft_nodes_t::value_type& pair) {
auto raft = pair.second->raft();
model::revision_id rev = ++raft->config().revision_id();
if (
pair.second->get_vnode() == to_skip
&& !pair.second->raft()->is_leader()) {
return ss::now();
}
return raft
->force_replace_configuration_locally(current_replicas, {}, rev)
.discard_result();
});
auto revision = next_rev++;
return reconfigure_until_success(revision, to_skip);
};

auto reconfigure_fiber = ss::do_until(
Expand All @@ -551,15 +593,6 @@ TEST_F_CORO(raft_fixture, test_force_reconfiguration) {
})
.handle_exception([](const std::exception_ptr&) {
// ignore exception
})
.then([this] {
return wait_for_leader(10s).then([this](model::node_id id) {
vlog(
logger().info,
"new leader {}, term: {}",
id,
nodes()[id]->raft()->term());
});
});
});

Expand Down

0 comments on commit c23f599

Please sign in to comment.