redpanda-data · ztlpn · May 16, 2024 · May 15, 2024 · May 14, 2024 · May 15, 2024
diff --git a/src/v/cluster/partition_balancer_planner.cc b/src/v/cluster/partition_balancer_planner.cc
@@ -1275,11 +1275,17 @@ void partition_balancer_planner::reassignable_partition::revert(
       _reallocated->partition.is_original(move.previous()->node_id),
       "ntp {}: move {}->{} should have been from original node",
       _ntp,
-      move.current(),
-      move.previous());
+      move.previous(),
+      move.current());
 
     auto err = _reallocated->partition.try_revert(move);
     vassert(err == errc::success, "ntp {}: revert error: {}", _ntp, err);
+    vlog(
+      clusterlog.info,
+      "ntp {}: reverted previously scheduled move {} -> {}",
+      _ntp,
+      move.previous()->node_id,
+      move.current().node_id);
 
     {
         // adjust topic node counts
@@ -1947,6 +1953,10 @@ ss::future<> partition_balancer_planner::get_counts_rebalancing_actions(
                     }
                 }
             },
+            [&](immutable_partition& p) {
+                p.report_failure(change_reason::partition_count_rebalancing);
+                should_stop = false;
+            },
             [](auto&) {});
 
           return ss::stop_iteration::no;
@@ -1956,7 +1966,7 @@ ss::future<> partition_balancer_planner::get_counts_rebalancing_actions(
         double cur_objective = calc_objective(domain);
         vlog(
           clusterlog.info,
-          "counts rebalancing objective in domain {}: {:6} -> {:6}",
+          "counts rebalancing objective in domain {}: {:.6} -> {:.6}",
           domain,
           orig_objective,
           cur_objective);

diff --git a/tests/rptest/tests/end_to_end.py b/tests/rptest/tests/end_to_end.py
@@ -79,6 +79,7 @@ def __init__(self,
 
     def start_redpanda(self,
                        num_nodes=1,
+                       num_started_nodes=None,
                        extra_rp_conf=None,
                        si_settings=None,
                        environment=None,
@@ -122,7 +123,11 @@ def start_redpanda(self,
             self.redpanda._installer.install(self.redpanda.nodes,
                                              version_to_install)
 
-        self.redpanda.start(auto_assign_node_id=new_bootstrap,
+        started_nodes = None
+        if num_started_nodes is not None:
+            started_nodes = self.redpanda.nodes[:num_started_nodes]
+        self.redpanda.start(nodes=started_nodes,
+                            auto_assign_node_id=new_bootstrap,
                             omit_seeds_on_idx_one=not new_bootstrap)
         if version_to_install and install_opts.num_to_upgrade > 0:
             # Perform the upgrade rather than starting each node on the

diff --git a/tests/rptest/tests/partition_balancer_test.py b/tests/rptest/tests/partition_balancer_test.py
@@ -1142,3 +1142,67 @@ def test_transfer_controller_leadership(self):
                                          target_id=transfer_to_idx)
 
             self.wait_until_ready()
+
+    @skip_debug_mode
+    @cluster(num_nodes=7, log_allow_list=CHAOS_LOG_ALLOW_LIST)
+    def test_recovery_mode_rebalance_finish(self):
+        """
+        Test that rebalancing on node add correctly finishes
+        if some (but not all) nodes were in recovery mode.
+        """
+
+        # start first 3 nodes and create some partitions on them
+        self.start_redpanda(num_nodes=5,
+                            num_started_nodes=3,
+                            new_bootstrap=True)
+        self.topic = TopicSpec(partition_count=50)
+        self.client().create_topic(self.topic)
+
+        self.start_producer(1)
+        self.start_consumer(1)
+        self.await_startup()
+
+        # restart seed nodes in recovery mode
+        seed_nodes = self.redpanda.nodes[:3]
+        self.redpanda.restart_nodes(
+            seed_nodes,
+            auto_assign_node_id=True,
+            omit_seeds_on_idx_one=False,
+            override_cfg_params={"recovery_mode_enabled": True})
+
+        # add 2 more nodes and make sure the balancer runs on one of them
+        # (it can't run on seed nodes because of recovery mode)
+        joiner_nodes = self.redpanda.nodes[3:]
+        for node in joiner_nodes:
+            self.redpanda.start_node(node,
+                                     auto_assign_node_id=True,
+                                     omit_seeds_on_idx_one=False)
+        self.redpanda.wait_for_membership(first_start=False)
+
+        admin = Admin(self.redpanda)
+
+        admin.transfer_leadership_to(namespace='redpanda',
+                                     topic='controller',
+                                     partition=0,
+                                     target_id=self.redpanda.node_id(
+                                         joiner_nodes[0]))
+
+        # the balancer will stall because not all partitions are moveable
+        self.wait_until_status(lambda s: s["status"] == "stalled")
+
+        # restart seed nodes in normal mode
+        self.redpanda.restart_nodes(seed_nodes, auto_assign_node_id=True)
+        self.redpanda.wait_for_membership(first_start=False)
+
+        self.wait_until_ready()
+
+        # check that partition counts are balanced
+        partition_counts = [
+            len(admin.get_partitions(node=n)) for n in self.redpanda.nodes
+        ]
+        self.logger.info(f"partition counts: {partition_counts}")
+        avg = sum(partition_counts) / len(partition_counts)
+        assert all(abs(c - avg) / avg < 0.05 for c in partition_counts), \
+            "partition counts not balanced"
+
+        self.run_validation(consumer_timeout_sec=CONSUMER_TIMEOUT)