From f01b65c5e5426c48c53f27a13f8c8a8cb86dd1b2 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 21 Sep 2023 23:04:44 +0200 Subject: [PATCH] admin: add raft follower_recovery_state to debug partition json --- src/v/cluster/cluster_utils.cc | 10 +++++++++ src/v/cluster/types.h | 27 +++++++++++++++++++++++-- src/v/raft/consensus.h | 5 +++++ src/v/redpanda/admin/api-doc/debug.json | 18 +++++++++++++++++ src/v/redpanda/admin_server.cc | 6 ++++++ 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/v/cluster/cluster_utils.cc b/src/v/cluster/cluster_utils.cc index 158eb07ccf648..67f536fbdc3c4 100644 --- a/src/v/cluster/cluster_utils.cc +++ b/src/v/cluster/cluster_utils.cc @@ -350,6 +350,16 @@ partition_raft_state get_partition_raft_state(consensus_ptr ptr) { raft_state.followers = std::move(followers); } raft_state.stms = get_partition_stm_state(ptr); + + const auto& frs = ptr->get_follower_recovery_state(); + if (frs) { + raft_state.recovery_state + = partition_raft_state::follower_recovery_state{ + .is_active = frs->is_active(), + .pending_offset_count = frs->pending_offset_count(), + }; + } + return raft_state; } diff --git a/src/v/cluster/types.h b/src/v/cluster/types.h index 833ca521b8352..1bb55f3efd82f 100644 --- a/src/v/cluster/types.h +++ b/src/v/cluster/types.h @@ -3557,7 +3557,7 @@ struct partition_stm_state struct partition_raft_state : serde::envelope< partition_raft_state, - serde::version<1>, + serde::version<2>, serde::compat_version<0>> { using rpc_adl_exempt = std::true_type; @@ -3619,10 +3619,32 @@ struct partition_raft_state suppress_heartbeats, is_recovering); } + + friend bool operator==(const follower_state&, const follower_state&) + = default; + }; + + struct follower_recovery_state + : serde::envelope< + follower_recovery_state, + serde::version<0>, + serde::compat_version<0>> { + bool is_active = false; + int64_t pending_offset_count = 0; + + auto serde_fields() { + return std::tie(is_active, pending_offset_count); + } + + friend bool operator==( + const follower_recovery_state&, const follower_recovery_state&) + = default; }; // Set only on leaders. std::optional> followers; + // Set only on recovering followers. + std::optional recovery_state; auto serde_fields() { return std::tie( @@ -3644,7 +3666,8 @@ struct partition_raft_state is_leader, is_elected_leader, followers, - stms); + stms, + recovery_state); } friend bool diff --git a/src/v/raft/consensus.h b/src/v/raft/consensus.h index 20d98bc5f6c38..23616164ddd79 100644 --- a/src/v/raft/consensus.h +++ b/src/v/raft/consensus.h @@ -496,6 +496,11 @@ class consensus { void reset_last_sent_protocol_meta(const vnode&); + const std::optional& + get_follower_recovery_state() const { + return _follower_recovery_state; + } + private: friend replicate_entries_stm; friend vote_stm; diff --git a/src/v/redpanda/admin/api-doc/debug.json b/src/v/redpanda/admin/api-doc/debug.json index b4183f70868ad..45c329571bf01 100644 --- a/src/v/redpanda/admin/api-doc/debug.json +++ b/src/v/redpanda/admin/api-doc/debug.json @@ -913,6 +913,20 @@ } } }, + "follower_recovery_state": { + "id": "follower_recovery_state", + "description": "Follower-side Raft recovery state", + "properties": { + "is_active": { + "type": "boolean", + "description": "True if recovery is currently allowed by the scheduler" + }, + "pending_offset_count": { + "type": "long", + "description": "Difference between leader and our last offsets" + } + } + }, "raft_replica_state": { "id": "raft_replica_state", "description": "Raft level state for a single replica of a partition", @@ -998,6 +1012,10 @@ "type": "stm_state" }, "description": "All snapshottable stms attached to this replica" + }, + "follower_recovery_state": { + "type": "follower_recovery_state", + "description": "Raft recovery state if this replica is a follower in recovery" } } }, diff --git a/src/v/redpanda/admin_server.cc b/src/v/redpanda/admin_server.cc index 8fdae7d1c7fd1..89b96519909d2 100644 --- a/src/v/redpanda/admin_server.cc +++ b/src/v/redpanda/admin_server.cc @@ -4107,6 +4107,12 @@ void fill_raft_state( state.max_collectible_offset = stm.last_applied_offset; raft_state.stms.push(std::move(state)); } + if (src.recovery_state) { + ss::httpd::debug_json::follower_recovery_state frs; + frs.is_active = src.recovery_state->is_active; + frs.pending_offset_count = src.recovery_state->pending_offset_count; + raft_state.follower_recovery_state = std::move(frs); + } replica.raft_state = std::move(raft_state); } ss::future>>