Skip to content

Commit

Permalink
Merge pull request #2021 from RolandSherwin/bad_node_metric
Browse files Browse the repository at this point in the history
feat(metrics): track the bad node count
  • Loading branch information
RolandSherwin authored Aug 5, 2024
2 parents 5e61746 + 6dc63b4 commit b4da842
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 6 deletions.
7 changes: 6 additions & 1 deletion sn_networking/src/event/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,14 @@ pub enum NetworkEvent {
our_protocol: String,
their_protocol: String,
},
/// The peer is now considered as a bad node, due to the detected bad behaviour
/// A peer from our RT is considered as bad due to the included behaviour
PeerConsideredAsBad {
detected_by: PeerId,
bad_peer: PeerId,
bad_behaviour: String,
},
/// We have been flagged as a bad node by a peer.
FlaggedAsBadNode { flagged_by: PeerId },
/// The records bearing these keys are to be fetched from the holder or the network
KeysToFetchForReplication(Vec<(PeerId, RecordKey)>),
/// Started listening on a new address
Expand Down Expand Up @@ -195,6 +197,9 @@ impl Debug for NetworkEvent {
"NetworkEvent::PeerConsideredAsBad({bad_peer:?}, {bad_behaviour:?})"
)
}
NetworkEvent::FlaggedAsBadNode { flagged_by } => {
write!(f, "NetworkEvent::FlaggedAsBadNode({flagged_by:?})")
}
NetworkEvent::KeysToFetchForReplication(list) => {
let keys_len = list.len();
write!(f, "NetworkEvent::KeysForReplication({keys_len:?})")
Expand Down
12 changes: 11 additions & 1 deletion sn_networking/src/event/request_response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,18 @@ impl SwarmDriver {
channel: MsgResponder::FromPeer(channel),
});

if bad_peer == NetworkAddress::from_peer(self.self_peer_id) {
let (Some(detected_by), Some(bad_peer)) =
(detected_by.as_peer_id(), bad_peer.as_peer_id())
else {
error!("Could not get PeerId from detected_by or bad_peer NetworkAddress {detected_by:?}, {bad_peer:?}");
return Ok(());
};

if bad_peer == self.self_peer_id {
warn!("Peer {detected_by:?} consider us as BAD, due to {bad_behaviour:?}.");
self.send_event(NetworkEvent::FlaggedAsBadNode {
flagged_by: detected_by,
})
// TODO: shall we terminate self after received such notifications
// from the majority close_group nodes around us?
} else {
Expand Down
10 changes: 8 additions & 2 deletions sn_node/src/log_markers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ pub enum Marker<'a> {
NodeConnectedToNetwork,

/// Peer was added to the routing table
PeerAddedToRoutingTable(PeerId),
PeerAddedToRoutingTable(&'a PeerId),

/// Peer was removed from the routing table
PeerRemovedFromRoutingTable(PeerId),
PeerRemovedFromRoutingTable(&'a PeerId),

/// The number of peers in the routing table
PeersInRoutingTable(usize),
Expand Down Expand Up @@ -51,6 +51,12 @@ pub enum Marker<'a> {
/// Valid spend stored
ValidSpendPutFromClient(&'a PrettyPrintRecordKey<'a>),

/// The peer has been considered as bad
PeerConsideredAsBad(&'a PeerId),

/// We have been flagged as a bad node by a peer.
FlaggedAsBadNode(&'a PeerId),

/// Record rejected
RecordRejected(&'a PrettyPrintRecordKey<'a>, &'a Error),

Expand Down
26 changes: 26 additions & 0 deletions sn_node/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub(crate) struct NodeMetrics {
// routing table
peer_added_to_routing_table: Counter,
peer_removed_from_routing_table: Counter,
bad_peers_count: Counter,
shunned_count: Counter,

// wallet
pub(crate) current_reward_wallet_balance: Gauge,
Expand Down Expand Up @@ -100,6 +102,20 @@ impl NodeMetrics {
peer_removed_from_routing_table.clone(),
);

let shunned_count = Counter::default();
sub_registry.register(
"shunned_count",
"Number of peers that have shunned our node",
shunned_count.clone(),
);

let bad_peers_count = Counter::default();
sub_registry.register(
"bad_peers_count",
"Number of bad peers that have been detected by us and been added to the blocklist",
bad_peers_count.clone(),
);

let current_reward_wallet_balance = Gauge::default();
sub_registry.register(
"current_reward_wallet_balance",
Expand Down Expand Up @@ -128,6 +144,8 @@ impl NodeMetrics {
replication_keys_to_fetch,
peer_added_to_routing_table,
peer_removed_from_routing_table,
bad_peers_count,
shunned_count,
current_reward_wallet_balance,
total_forwarded_rewards,
started_instant: Instant::now(),
Expand Down Expand Up @@ -185,6 +203,14 @@ impl NodeMetrics {
let _ = self.peer_removed_from_routing_table.inc();
}

Marker::PeerConsideredAsBad(_) => {
let _ = self.bad_peers_count.inc();
}

Marker::FlaggedAsBadNode(_) => {
let _ = self.shunned_count.inc();
}

_ => {}
}
}
Expand Down
10 changes: 8 additions & 2 deletions sn_node/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ impl Node {
}

self.record_metrics(Marker::PeersInRoutingTable(connected_peers));
self.record_metrics(Marker::PeerAddedToRoutingTable(peer_id));
self.record_metrics(Marker::PeerAddedToRoutingTable(&peer_id));

// try replication here
let network = self.network().clone();
Expand All @@ -452,7 +452,7 @@ impl Node {
NetworkEvent::PeerRemoved(peer_id, connected_peers) => {
event_header = "PeerRemoved";
self.record_metrics(Marker::PeersInRoutingTable(connected_peers));
self.record_metrics(Marker::PeerRemovedFromRoutingTable(peer_id));
self.record_metrics(Marker::PeerRemovedFromRoutingTable(&peer_id));

let network = self.network().clone();
self.record_metrics(Marker::IntervalReplicationTriggered);
Expand All @@ -469,6 +469,8 @@ impl Node {
bad_behaviour,
} => {
event_header = "PeerConsideredAsBad";
self.record_metrics(Marker::PeerConsideredAsBad(&bad_peer));

let request = Request::Cmd(Cmd::PeerConsideredAsBad {
detected_by: NetworkAddress::from_peer(detected_by),
bad_peer: NetworkAddress::from_peer(bad_peer),
Expand All @@ -480,6 +482,10 @@ impl Node {
network.send_req_ignore_reply(request, bad_peer);
});
}
NetworkEvent::FlaggedAsBadNode { flagged_by } => {
event_header = "FlaggedAsBadNode";
self.record_metrics(Marker::FlaggedAsBadNode(&flagged_by));
}
NetworkEvent::NewListenAddr(_) => {
event_header = "NewListenAddr";
if !cfg!(feature = "local-discovery") {
Expand Down

0 comments on commit b4da842

Please sign in to comment.