Skip to content

Commit

Permalink
Merge bitcoin#30118: test: improve robustness of connect_nodes()
Browse files Browse the repository at this point in the history
6629d1d test: improve robustness of connect_nodes() (furszy)

Pull request description:

  Decoupled from bitcoin#27837 because this can help other too, found it investigating a CI failure https://cirrus-ci.com/task/5805115213348864?logs=ci#L3200.

  The `connect_nodes` function in the test framework relies on a stable number of peer
  connections to verify that the new connection between the nodes is successfully established.
  This approach is fragile, as any of the peers involved in the process can drop, lose, or
  create a connection at any step, causing subsequent `wait_until` checks to stall indefinitely
  even when the peers in question were connected successfully.

  This commit improves the situation by using the nodes' subversion and the connection
  direction (inbound/outbound) to identify the exact peer connection and perform the
  checks exclusively on it.

ACKs for top commit:
  stratospher:
    reACK 6629d1d.
  achow101:
    ACK 6629d1d
  maflcko:
    utACK 6629d1d
  AngusP:
    re-ACK 6629d1d

Tree-SHA512: 5f345c0ce49ea81b643e97c5cffd133e182838752c27592fcdeac14ad10919fb4b7ff38e289e42a7c3c638a170bd0d0b7a9cd493898997a2082a7b7ceef4aeeb
  • Loading branch information
achow101 authored and UdjinM6 committed Oct 2, 2024
1 parent ac94de2 commit 2c825b4
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 9 deletions.
28 changes: 20 additions & 8 deletions test/functional/test_framework/test_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,23 +697,35 @@ def connect_nodes(self, a, b):

from_connection = self.nodes[a]
to_connection = self.nodes[b]
from_num_peers = 1 + len(from_connection.getpeerinfo())
to_num_peers = 1 + len(to_connection.getpeerinfo())
ip_port = "127.0.0.1:" + str(p2p_port(b))
from_connection.addnode(ip_port, "onetry")

# Use subversion as peer id. Test nodes have their node number appended to the user agent string
from_connection_subver = from_connection.getnetworkinfo()['subversion']
to_connection_subver = to_connection.getnetworkinfo()['subversion']

def find_conn(node, peer_subversion, inbound):
return next(filter(lambda peer: peer['subver'] == peer_subversion and peer['inbound'] == inbound, node.getpeerinfo()), None)

# poll until version handshake complete to avoid race conditions
# with transaction relaying
# See comments in net_processing:
# * Must have a version message before anything else
# * Must have a verack message before anything else
self.wait_until(lambda: sum(peer['version'] != 0 for peer in from_connection.getpeerinfo()) == from_num_peers)
self.wait_until(lambda: sum(peer['version'] != 0 for peer in to_connection.getpeerinfo()) == to_num_peers)
self.wait_until(lambda: sum(peer['bytesrecv_per_msg'].pop('verack', 0) == 24 for peer in from_connection.getpeerinfo()) == from_num_peers)
self.wait_until(lambda: sum(peer['bytesrecv_per_msg'].pop('verack', 0) == 24 for peer in to_connection.getpeerinfo()) == to_num_peers)
self.wait_until(lambda: find_conn(from_connection, to_connection_subver, inbound=False) is not None)
self.wait_until(lambda: find_conn(to_connection, from_connection_subver, inbound=True) is not None)

def check_bytesrecv(peer, msg_type, min_bytes_recv):
assert peer is not None, "Error: peer disconnected"
return peer['bytesrecv_per_msg'].pop(msg_type, 0) >= min_bytes_recv

self.wait_until(lambda: check_bytesrecv(find_conn(from_connection, to_connection_subver, inbound=False), 'verack', 24))
self.wait_until(lambda: check_bytesrecv(find_conn(to_connection, from_connection_subver, inbound=True), 'verack', 24))

# The message bytes are counted before processing the message, so make
# sure it was fully processed by waiting for a ping.
self.wait_until(lambda: sum(peer["bytesrecv_per_msg"].pop("pong", 0) >= 32 for peer in from_connection.getpeerinfo()) == from_num_peers)
self.wait_until(lambda: sum(peer["bytesrecv_per_msg"].pop("pong", 0) >= 32 for peer in to_connection.getpeerinfo()) == to_num_peers)
self.wait_until(lambda: check_bytesrecv(find_conn(from_connection, to_connection_subver, inbound=False), 'pong', 32))
self.wait_until(lambda: check_bytesrecv(find_conn(to_connection, from_connection_subver, inbound=True), 'pong', 32))

def disconnect_nodes(self, a, b):
# A node cannot disconnect from itself, bail out early
Expand Down
2 changes: 1 addition & 1 deletion test/functional/test_framework/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(self, i, datadir, extra_args_from_options, *, chain, rpchost, timew
"-debug",
"-debugexclude=libevent",
"-debugexclude=leveldb",
"-uacomment=testnode%d" % i,
"-uacomment=testnode%d" % i, # required for subversion uniqueness across peers
]
if self.mocktime != 0:
self.args.append(f"-mocktime={mocktime}")
Expand Down

0 comments on commit 2c825b4

Please sign in to comment.