Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.1 -> main] Fix failure to sync on startup #74

Merged
merged 5 commits into from
Aug 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions plugins/net_plugin/net_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,7 @@ namespace eosio {
close(false);
}
return;
} else {
} else if( latest_blk_time > 0 ) {
const tstamp timeout = std::max(hb_timeout/2, 2*std::chrono::milliseconds(config::block_interval_ms).count());
if ( current_time > latest_blk_time + timeout ) {
send_handshake();
Expand Down Expand Up @@ -1586,9 +1586,10 @@ namespace eosio {
// if closing the connection we are currently syncing from, then reset our last requested and next expected.
if( c == sync_source ) {
reset_last_requested_num(g);
uint32_t head_blk_num = 0;
std::tie( std::ignore, head_blk_num, std::ignore, std::ignore, std::ignore, std::ignore ) = my_impl->get_chain_info();
sync_next_expected_num = head_blk_num + 1;
// if starting to sync need to always start from lib as we might be on our own fork
uint32_t lib_num = 0;
std::tie( lib_num, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore ) = my_impl->get_chain_info();
sync_next_expected_num = lib_num + 1;
request_next_chunk( std::move(g) );
}
}
Expand All @@ -1605,8 +1606,9 @@ namespace eosio {
("r", sync_last_requested_num)("e", sync_next_expected_num)("k", sync_known_lib_num)("s", sync_req_span) );

if( fork_head_block_num < sync_last_requested_num && sync_source && sync_source->current() ) {
fc_ilog( logger, "ignoring request, head is ${h} last req = ${r}, source connection ${c}",
("h", fork_head_block_num)("r", sync_last_requested_num)("c", sync_source->connection_id) );
fc_ilog( logger, "ignoring request, head is ${h} last req = ${r}, sync_next_expected_num: ${e}, sync_known_lib_num: ${k}, sync_req_span: ${s}, source connection ${c}",
("h", fork_head_block_num)("r", sync_last_requested_num)("e", sync_next_expected_num)
("k", sync_known_lib_num)("s", sync_req_span)("c", sync_source->connection_id) );
return;
}

Expand Down Expand Up @@ -1740,12 +1742,11 @@ namespace eosio {
if( sync_state == in_sync ) {
set_state( lib_catchup );
}
// if starting to sync need to always start from lib as we might be on our own fork
sync_next_expected_num = lib_num + 1;
sync_next_expected_num = std::max( lib_num + 1, sync_next_expected_num );

// p2p_high_latency_test.py test depends on this exact log statement.
peer_ilog( c, "Catching up with chain, our last req is ${cc}, theirs is ${t}",
("cc", sync_last_requested_num)("t", target) );
peer_ilog( c, "Catching up with chain, our last req is ${cc}, theirs is ${t}, next expected ${n}",
("cc", sync_last_requested_num)("t", target)("n", sync_next_expected_num) );

request_next_chunk( std::move( g_sync ), c );
}
Expand Down
7 changes: 7 additions & 0 deletions tests/nodeos_startup_catchup.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,16 @@ def waitForNodeStarted(node):
numBlocksToCatchup=(lastLibNum-lastCatchupLibNum-1)+twoRounds
waitForBlock(catchupNode, lastLibNum, timeout=twoRoundsTimeout, blockType=BlockType.lib)

catchupHead=head(catchupNode)
Print("Shutdown catchup node and validate exit code")
catchupNode.interruptAndVerifyExitStatus(60)

# every other catchup make a lib catchup
if catchup_num % 2 == 0:
Print(f"Wait for producer to advance lib past head of catchup {catchupHead}")
# catchupHead+5 to allow for advancement of head during shutdown of catchupNode
waitForBlock(node0, catchupHead+5, timeout=twoRoundsTimeout*2, blockType=BlockType.lib)

Print("Restart catchup node")
catchupNode.relaunch(cachePopen=True)
waitForNodeStarted(catchupNode)
Expand Down