Skip to content
This repository has been archived by the owner on Nov 6, 2020. It is now read-only.

Fix deadlock in network-devp2p #11013

Merged
merged 2 commits into from
Sep 2, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions util/network-devp2p/src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ struct ProtocolTimer {
}

/// Root IO handler. Manages protocol handlers, IO timers and network connections.
///
/// NOTE: must keep the lock in order of: reserved_nodes (rwlock) -> session (mutex, from sessions)
pub struct Host {
pub info: RwLock<HostInfo>,
udp_socket: Mutex<Option<UdpSocket>>,
Expand Down Expand Up @@ -722,12 +724,13 @@ impl Host {
let session_result = session.lock().readable(io, &self.info.read());
match session_result {
Err(e) => {
let reserved_nodes = self.reserved_nodes.read();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any other occurences where we need reserved_nodes and session.lock()?

As a rule of thumb, we try to keep the lock order in the same order as fields declaration. Here we lock something that is not part of the original struct, but since it's retrieve from self.sessions I'd say that we should keep the lock ordering as well.

Otherwise let's put a comment on top of sessions: and note what order the session.lock() should be acquired in.

let s = session.lock();
trace!(target: "network", "Session read error: {}:{:?} ({:?}) {:?}", token, s.id(), s.remote_addr(), e);
match e {
Error::Disconnect(DisconnectReason::IncompatibleProtocol) | Error::Disconnect(DisconnectReason::UselessPeer) => {
if let Some(id) = s.id() {
if !self.reserved_nodes.read().contains(id) {
if !reserved_nodes.contains(id) {
let mut nodes = self.nodes.write();
nodes.note_failure(&id);
nodes.mark_as_useless(id);
Expand All @@ -741,6 +744,7 @@ impl Host {
},
Ok(SessionData::Ready) => {
let (_, egress_count, ingress_count) = self.session_count();
let reserved_nodes = self.reserved_nodes.read();
let mut s = session.lock();
let (min_peers, mut max_peers, reserved_only, self_id) = {
let info = self.info.read();
Expand All @@ -765,7 +769,7 @@ impl Host {
if reserved_only ||
(s.info.originated && egress_count > min_peers) ||
(!s.info.originated && ingress_count > max_ingress) {
if !self.reserved_nodes.read().contains(&id) {
if !reserved_nodes.contains(&id) {
// only proceed if the connecting peer is reserved.
trace!(target: "network", "Disconnecting non-reserved peer {:?}", id);
s.disconnect(io, DisconnectReason::TooManyPeers);
Expand Down Expand Up @@ -980,7 +984,8 @@ impl Host {
for i in to_remove {
trace!(target: "network", "Removed from node table: {}", i);
}
self.nodes.write().update(node_changes, &*self.reserved_nodes.read());
let reserved_nodes = self.reserved_nodes.read();
self.nodes.write().update(node_changes, &*reserved_nodes);
}

pub fn with_context<F>(&self, protocol: ProtocolId, io: &IoContext<NetworkIoMessage>, action: F) where F: FnOnce(&dyn NetworkContextTrait) {
Expand Down Expand Up @@ -1066,8 +1071,9 @@ impl IoHandler<NetworkIoMessage> for Host {
},
NODE_TABLE => {
trace!(target: "network", "Refreshing node table");
self.nodes.write().clear_useless();
self.nodes.write().save();
let mut nodes = self.nodes.write();
nodes.clear_useless();
nodes.save();
},
_ => match self.timers.read().get(&token).cloned() {
Some(timer) => match self.handlers.read().get(&timer.protocol).cloned() {
Expand Down