From e62d561baf31098af0675ccbaa567021d897cc38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 2 Jul 2024 09:12:23 +0000 Subject: [PATCH 1/3] tests: wait for the leadership change metric to be updated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of asserting right away added a wait for the leadership change metric to be updated. Fixes: #20574 Signed-off-by: Michał Maślanka --- tests/rptest/tests/raft_availability_test.py | 34 +++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/tests/rptest/tests/raft_availability_test.py b/tests/rptest/tests/raft_availability_test.py index 31a150fc16d6..f05171bb0838 100644 --- a/tests/rptest/tests/raft_availability_test.py +++ b/tests/rptest/tests/raft_availability_test.py @@ -394,18 +394,28 @@ def test_leadership_transfer(self): f"New leader is {new_leader_id} {new_leader_node.account.hostname}" ) - for [id, metric_check] in metric_checks.items(): - # the metric should be updated only on the node that was elected as a leader - if id == new_leader_id: - metric_check.expect([ - ("vectorized_raft_leadership_changes_total", - lambda initial, current: current == initial + 1), - ]) - else: - metric_check.expect([ - ("vectorized_raft_leadership_changes_total", - lambda initial, current: current == initial), - ]) + def metrics_updated(): + results = [] + for [id, metric_check] in metric_checks.items(): + # the metric should be updated only on the node that was elected as a leader + if id == new_leader_id: + results.append( + metric_check.evaluate([ + ("vectorized_raft_leadership_changes_total", + lambda initial, current: current == initial + 1), + ])) + else: + results.append( + metric_check.evaluate([ + ("vectorized_raft_leadership_changes_total", + lambda initial, current: current == initial), + ])) + + return all(results) + + wait_until( + metrics_updated, 30, 1, + "Leadership changes metric should be updated only on the leader") @cluster(num_nodes=4) @parametrize(acks=1) From 877276383c8badbaaaec8af759ecb91c7d72b8d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 2 Jul 2024 12:23:39 +0000 Subject: [PATCH 2/3] t/metrics_check: add node host name and id to log line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Maślanka --- tests/rptest/services/metrics_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/rptest/services/metrics_check.py b/tests/rptest/services/metrics_check.py index a93f94d3c59b..c7c96a159069 100644 --- a/tests/rptest/services/metrics_check.py +++ b/tests/rptest/services/metrics_check.py @@ -94,7 +94,9 @@ def _capture(self, check_metrics): samples[sample.name] = sample.value for k, v in samples.items(): - self.logger.info(f" Captured {k}={v}") + self.logger.info( + f" Captured {k}={v} from {self.node.account.hostname}(node_id = {self.redpanda.node_id(self.node)})" + ) if len(samples) == 0: # Announce From 2c009b1771e91422e42a401dde29e8a0f8ccdc6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Ma=C5=9Blanka?= Date: Tue, 2 Jul 2024 12:24:11 +0000 Subject: [PATCH 3/3] tests: await stable leader in transfer test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Maślanka --- tests/rptest/tests/raft_availability_test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/rptest/tests/raft_availability_test.py b/tests/rptest/tests/raft_availability_test.py index f05171bb0838..59380b82cc8c 100644 --- a/tests/rptest/tests/raft_availability_test.py +++ b/tests/rptest/tests/raft_availability_test.py @@ -386,8 +386,12 @@ def test_leadership_transfer(self): partition=0, target_id=None, leader_id=initial_leader_id) - new_leader_id, _ = self._wait_for_leader( - lambda l: l is not None and l != initial_leader_id) + hosts = [n.account.hostname for n in self.redpanda.nodes] + new_leader_id = admin.await_stable_leader( + topic=self.topic, + partition=0, + hosts=hosts, + check=lambda l: l is not None and l != initial_leader_id) new_leader_node = self.redpanda.get_node_by_id(new_leader_id) assert new_leader_node is not None self.logger.info(