From 5576959996aac89012e461fa2854564ba07b1744 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Tue, 23 Apr 2024 12:52:19 +0200 Subject: [PATCH] c/controller_backend: try to force-abort reconfiguration only on leaders Previously, when force-aborting a reconfiguration, we appended an aborting configuration on all replicas. This can lead to log inconsistencies as on followers the configuration will be duplicated (one from own append, one replicated by the leader). Although these inconsistencies are expected for force-abort, if the leader is alive, we can minimize the chance of their appearance by waiting on followers for the aborting config to be replicated from the leader. Fixes https://github.com/redpanda-data/redpanda/issues/17847 (cherry picked from commit 8e221d36888652b0d79b81925473e4bb80da2351) --- src/v/cluster/controller_backend.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/v/cluster/controller_backend.cc b/src/v/cluster/controller_backend.cc index 5d7ece49baeb..c7ed3d4652f3 100644 --- a/src/v/cluster/controller_backend.cc +++ b/src/v/cluster/controller_backend.cc @@ -1614,6 +1614,14 @@ controller_backend::force_abort_replica_set_update( } co_return errc::waiting_for_recovery; } else { + auto leader_id = partition->get_leader_id(); + if (leader_id && leader_id != _self) { + // The leader is alive and we are a follower. Wait for the leader to + // replicate the aborting configuration, but don't append it + // ourselves to minimize the chance of log inconsistency. + co_return errc::not_leader; + } + vlog( clusterlog.debug, "[{}] force-aborting reconfiguration",