Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[4.0] Fix for loading a snapshot with an empty block log but an existing fork database #1299

Merged
merged 6 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions libraries/chain/controller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,10 @@ struct controller_impl {
ilog( "no irreversible blocks need to be replayed" );
}

if( !except_ptr && !check_shutdown() && fork_db.head() ) {
if (snapshot_head_block != 0 && !blog_head) {
// loading from snapshot without a block log so fork_db can't be considered valid
fork_db.reset( *head );
} else if( !except_ptr && !check_shutdown() && fork_db.head() ) {
auto head_block_num = head->block_num;
auto branch = fork_db.fetch_branch( fork_db.head()->id );
int rev = 0;
Expand Down Expand Up @@ -590,14 +593,17 @@ struct controller_impl {
void startup(std::function<void()> shutdown, std::function<bool()> check_shutdown, const snapshot_reader_ptr& snapshot) {
EOS_ASSERT( snapshot, snapshot_exception, "No snapshot reader provided" );
this->shutdown = shutdown;
ilog( "Starting initialization from snapshot, this may take a significant amount of time" );
try {
snapshot->validate();
if( auto blog_head = blog.head() ) {
ilog( "Starting initialization from snapshot and block log ${b}-${e}, this may take a significant amount of time",
("b", blog.first_block_num())("e", blog_head->block_num()) );
read_from_snapshot( snapshot, blog.first_block_num(), blog_head->block_num() );
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to add a similar log saying snapshot has been loaded?

ilog( "Snapshot loaded, lib: ${lib}", ("lib", lib_num) );

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thanks. Done.

} else {
ilog( "Starting initialization from snapshot and no block log, this may take a significant amount of time" );
read_from_snapshot( snapshot, 0, std::numeric_limits<uint32_t>::max() );
const uint32_t lib_num = head->block_num;
ilog( "Snapshot loaded, lib: ${lib}", ("lib", lib_num) );
EOS_ASSERT( lib_num > 0, snapshot_exception,
"Snapshot indicates controller head at block number 0, but that is not allowed. "
"Snapshot is invalid." );
Expand Down
72 changes: 66 additions & 6 deletions tests/nodeos_irreversible_mode_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
cmdError = Utils.cmdError
relaunchTimeout = 30
numOfProducers = 4
totalNodes = 15
totalNodes = 20

# Parse command line arguments
args = TestHelper.parse_args({"-v","--clean-run","--dump-error-details","--leave-running","--keep-logs","--unshared"})
Expand All @@ -33,6 +33,7 @@
killWallet=not dontKill
keepLogs=args.keep_logs
speculativeReadMode="head"
blockLogRetainBlocks="10000"

# Setup cluster and it's wallet manager
walletMgr=WalletMgr(True)
Expand Down Expand Up @@ -180,7 +181,13 @@ def relaunchNode(node: Node, chainArg="", addSwapFlags=None, relaunchAssertMessa
11:"--read-mode irreversible",
12:"--read-mode speculative",
13:"--read-mode irreversible",
14:"--read-mode speculative --plugin eosio::producer_api_plugin"})
14:"--read-mode speculative --plugin eosio::producer_api_plugin",
15:"--read-mode speculative",
16:"--read-mode irreversible",
17:"--read-mode speculative",
18:"--read-mode irreversible",
19:"--read-mode speculative --plugin eosio::producer_api_plugin"
})

producingNodeId = 0
producingNode = cluster.getNode(producingNodeId)
Expand Down Expand Up @@ -260,7 +267,7 @@ def switchSpecToIrrMode(nodeIdOfNodeToTest, nodeToTest):

# Kill and relaunch in irreversible mode
nodeToTest.kill(signal.SIGTERM)
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": "irreversible"})
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": "irreversible", "--block-log-retain-blocks":blockLogRetainBlocks})

# Ensure the node condition is as expected after relaunch
confirmHeadLibAndForkDbHeadOfIrrMode(nodeToTest, headLibAndForkDbHeadBeforeSwitchMode)
Expand All @@ -273,7 +280,7 @@ def switchIrrToSpecMode(nodeIdOfNodeToTest, nodeToTest):

# Kill and relaunch in speculative mode
nodeToTest.kill(signal.SIGTERM)
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": speculativeReadMode})
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": speculativeReadMode, "--block-log-retain-blocks":blockLogRetainBlocks})

# Ensure the node condition is as expected after relaunch
confirmHeadLibAndForkDbHeadOfSpecMode(nodeToTest, headLibAndForkDbHeadBeforeSwitchMode)
Expand All @@ -289,7 +296,7 @@ def switchSpecToIrrModeWithConnectedToProdNode(nodeIdOfNodeToTest, nodeToTest):
# Kill and relaunch in irreversible mode
nodeToTest.kill(signal.SIGTERM)
waitForBlksProducedAndLibAdvanced() # Wait for some blks to be produced and lib advance
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": "irreversible"})
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": "irreversible", "--block-log-retain-blocks":blockLogRetainBlocks})

# Ensure the node condition is as expected after relaunch
ensureHeadLibAndForkDbHeadIsAdvancing(nodeToTest)
Expand All @@ -308,7 +315,7 @@ def switchIrrToSpecModeWithConnectedToProdNode(nodeIdOfNodeToTest, nodeToTest):
# Kill and relaunch in irreversible mode
nodeToTest.kill(signal.SIGTERM)
waitForBlksProducedAndLibAdvanced() # Wait for some blks to be produced and lib advance)
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": speculativeReadMode})
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": speculativeReadMode, "--block-log-retain-blocks":blockLogRetainBlocks})

# Ensure the node condition is as expected after relaunch
ensureHeadLibAndForkDbHeadIsAdvancing(nodeToTest)
Expand Down Expand Up @@ -400,6 +407,50 @@ def switchToSpecModeWithIrrModeSnapshot(nodeIdOfNodeToTest, nodeToTest):
finally:
stopProdNode()

# 10th test case: Load an irreversible snapshot into a node running without a block log
# Expectation: Node launches successfully
# and the head and lib should be advancing after some blocks produced
def switchToNoBlockLogWithIrrModeSnapshot(nodeIdOfNodeToTest, nodeToTest):
try:
# Kill node and backup blocks directory of speculative mode
headLibAndForkDbHeadBeforeShutdown = getHeadLibAndForkDbHead(nodeToTest)
nodeToTest.kill(signal.SIGTERM)

# Relaunch in irreversible mode and create the snapshot
relaunchNode(nodeToTest, addSwapFlags={"--read-mode": "irreversible", "--block-log-retain-blocks":"0"})
confirmHeadLibAndForkDbHeadOfIrrMode(nodeToTest)
nodeToTest.createSnapshot()
nodeToTest.kill(signal.SIGTERM)

# Start from clean data dir and then relaunch with irreversible snapshot, no block log means that fork_db will be reset
removeState(nodeIdOfNodeToTest)
relaunchNode(nodeToTest, chainArg=" --snapshot {}".format(getLatestSnapshot(nodeIdOfNodeToTest)), addSwapFlags={"--read-mode": speculativeReadMode, "--block-log-retain-blocks":"0"})
confirmHeadLibAndForkDbHeadOfSpecMode(nodeToTest)
# Ensure it does not replay "reversible blocks", i.e. head and lib should be different
headLibAndForkDbHeadAfterRelaunch = getHeadLibAndForkDbHead(nodeToTest)
assert headLibAndForkDbHeadBeforeShutdown != headLibAndForkDbHeadAfterRelaunch, \
"1: Head, Lib, and Fork Db same after relaunch {} vs {}".format(headLibAndForkDbHeadBeforeShutdown, headLibAndForkDbHeadAfterRelaunch)

# Start production and wait until lib advance, ensure everything is alright
startProdNode()
ensureHeadLibAndForkDbHeadIsAdvancing(nodeToTest)

# Note the head, lib and fork db head
stopProdNode()
headLibAndForkDbHeadBeforeShutdown = getHeadLibAndForkDbHead(nodeToTest)
nodeToTest.kill(signal.SIGTERM)

# Relaunch the node again (using the same snapshot)
# The end result should be the same as before shutdown
removeState(nodeIdOfNodeToTest)
relaunchNode(nodeToTest)
headLibAndForkDbHeadAfterRelaunch2 = getHeadLibAndForkDbHead(nodeToTest)
assert headLibAndForkDbHeadAfterRelaunch == headLibAndForkDbHeadAfterRelaunch2, \
"2: Head, Lib, and Fork Db after relaunch is different {} vs {}".format(headLibAndForkDbHeadAfterRelaunch, headLibAndForkDbHeadAfterRelaunch2)
finally:
stopProdNode()


# Start executing test cases here
testSuccessful = executeTest(1, replayInIrrModeWithRevBlks)
testSuccessful = testSuccessful and executeTest(2, replayInIrrModeWithoutRevBlks)
Expand All @@ -419,6 +470,15 @@ def switchToSpecModeWithIrrModeSnapshot(nodeIdOfNodeToTest, nodeToTest):
testSuccessful = testSuccessful and executeTest(13, switchIrrToSpecModeWithConnectedToProdNode)
testSuccessful = testSuccessful and executeTest(14, switchToSpecModeWithIrrModeSnapshot)

# retest with read-mode head and no block log
speculativeReadMode="head"
blockLogRetainBlocks="0"
testSuccessful = testSuccessful and executeTest(15, switchSpecToIrrMode)
testSuccessful = testSuccessful and executeTest(16, switchIrrToSpecMode)
testSuccessful = testSuccessful and executeTest(17, switchSpecToIrrModeWithConnectedToProdNode)
testSuccessful = testSuccessful and executeTest(18, switchIrrToSpecModeWithConnectedToProdNode)
testSuccessful = testSuccessful and executeTest(19, switchToNoBlockLogWithIrrModeSnapshot)

finally:
TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails)
# Print test result
Expand Down
28 changes: 15 additions & 13 deletions tests/ship_streamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,21 @@ int main(int argc, char* argv[]) {
eosio::check(result_document[1]["head"].HasMember("block_id"), "'head' does not contain 'block_id'");
eosio::check(result_document[1]["head"]["block_id"].IsString(), "'head.block_id' isn't a string");

// stream what was received
if(is_first) {
std::cout << "[" << std::endl;
is_first = false;
} else {
std::cout << "," << std::endl;
}
std::cout << "{ \"get_blocks_result_v0\":" << std::endl;

rapidjson::StringBuffer result_sb;
rapidjson::PrettyWriter<rapidjson::StringBuffer> result_writer(result_sb);
result_document[1].Accept(result_writer);
std::cout << result_sb.GetString() << std::endl << "}" << std::endl;

// validate after streaming, so that invalid entry is included in the output
uint32_t this_block_num = 0;
if( result_document[1].HasMember("this_block") && result_document[1]["this_block"].IsObject() ) {
if( result_document[1]["this_block"].HasMember("block_num") && result_document[1]["this_block"]["block_num"].IsUint() ) {
Expand Down Expand Up @@ -168,19 +183,6 @@ int main(int argc, char* argv[]) {

}

if(is_first) {
std::cout << "[" << std::endl;
is_first = false;
} else {
std::cout << "," << std::endl;
}
std::cout << "{ \"get_blocks_result_v0\":" << std::endl;

rapidjson::StringBuffer result_sb;
rapidjson::PrettyWriter<rapidjson::StringBuffer> result_writer(result_sb);
result_document[1].Accept(result_writer);
std::cout << result_sb.GetString() << std::endl << "}" << std::endl;

if( this_block_num == end_block_num ) break;
}

Expand Down