From 1575ea7a93be0418e4654fdbc4f86492adbd4c1d Mon Sep 17 00:00:00 2001 From: taokayan Date: Fri, 19 Jul 2019 16:22:27 +0800 Subject: [PATCH] fix 7600 double confirm after changing sign key --- plugins/producer_plugin/producer_plugin.cpp | 34 +-- programs/eosio-launcher/main.cpp | 20 +- tests/CMakeLists.txt | 5 + tests/Cluster.py | 7 +- tests/Node.py | 9 +- tests/nodeos_producer_watermark_test.py | 257 ++++++++++++++++++++ 6 files changed, 302 insertions(+), 30 deletions(-) create mode 100755 tests/nodeos_producer_watermark_test.py diff --git a/plugins/producer_plugin/producer_plugin.cpp b/plugins/producer_plugin/producer_plugin.cpp index c4485708b10..a969f4240c4 100644 --- a/plugins/producer_plugin/producer_plugin.cpp +++ b/plugins/producer_plugin/producer_plugin.cpp @@ -265,28 +265,13 @@ class producer_plugin_impl : public std::enable_shared_from_thischain(); - const auto hbn = bsp->block_num; - auto new_pbhs = bsp->next(bsp->header.timestamp.next(), 0); - - // for newly installed producers we can set their watermarks to the block they became active - if( bsp->active_schedule.version != new_pbhs.active_schedule.version ) { - flat_set new_producers; - new_producers.reserve(new_pbhs.active_schedule.producers.size()); - for( const auto& p: new_pbhs.active_schedule.producers) { - if (_producers.count(p.producer_name) > 0) - new_producers.insert(p.producer_name); - } - - for( const auto& p: bsp->active_schedule.producers) { - new_producers.erase(p.producer_name); - } - - for (const auto& new_producer: new_producers) { - _producer_watermarks[new_producer] = hbn; - } + // simplify handling of watermark in on_block + auto block_producer = bsp->header.producer; + auto watermark_itr = _producer_watermarks.find( block_producer ); + if( watermark_itr != _producer_watermarks.end() ) { + watermark_itr->second = bsp->block_num; + } else if( _producers.count( block_producer ) > 0 ) { + _producer_watermarks.emplace( block_producer, bsp->block_num ); } } @@ -1391,9 +1376,12 @@ producer_plugin_impl::start_block_result producer_plugin_impl::start_block() { if (currrent_watermark_itr != _producer_watermarks.end()) { auto watermark = currrent_watermark_itr->second; if (watermark < hbs->block_num) { - blocks_to_confirm = std::min(std::numeric_limits::max(), (uint16_t)(hbs->block_num - watermark)); + blocks_to_confirm = (uint16_t)(std::min(std::numeric_limits::max(), (uint32_t)(hbs->block_num - watermark))); } } + + // can not confirm irreversible blocks + blocks_to_confirm = (uint16_t)(std::min(blocks_to_confirm, (uint32_t)(hbs->block_num - hbs->dpos_irreversible_blocknum))); } _unapplied_transactions.add_aborted( chain.abort_block() ); diff --git a/programs/eosio-launcher/main.cpp b/programs/eosio-launcher/main.cpp index bf9cba588db..d6bca57f09f 100644 --- a/programs/eosio-launcher/main.cpp +++ b/programs/eosio-launcher/main.cpp @@ -346,7 +346,7 @@ enum allowed_connection : char { class producer_names { public: - static string producer_name(unsigned int producer_number); + static string producer_name(unsigned int producer_number, bool shared_producer = false); private: static const int total_chars = 12; static const char slot_chars[]; @@ -358,8 +358,9 @@ const char producer_names::valid_char_range = sizeof(producer_names::slot_chars) // for 26 or fewer total producers create "defproducera" .. "defproducerz" // above 26 produce "defproducera" .. "defproducerz", "defproduceaa" .. "defproducerb", etc. -string producer_names::producer_name(unsigned int producer_number) { +string producer_names::producer_name(unsigned int producer_number, bool shared_producer) { // keeping legacy "defproducer[a-z]", but if greater than valid_char_range, will use "defpraaaaaaa" + // shared_producer will appear in all nodes' config char prod_name[] = "defproducera"; if (producer_number > valid_char_range) { for (int current_char_loc = 5; current_char_loc < total_chars; ++current_char_loc) { @@ -380,6 +381,12 @@ string producer_names::producer_name(unsigned int producer_number) { // make sure we haven't cycled back to the first 26 names (some time after 26^6) if (string(prod_name) == "defproducera" && producer_number != 0) throw std::runtime_error( "launcher not designed to handle numbers this large " ); + + if (shared_producer) { + prod_name[0] = 's'; + prod_name[1] = 'h'; + prod_name[2] = 'r'; + } return prod_name; } @@ -389,6 +396,7 @@ struct launcher_def { size_t unstarted_nodes; size_t prod_nodes; size_t producers; + size_t shared_producers; size_t next_node; string shape; allowed_connection allowed_connections = PC_NONE; @@ -479,7 +487,8 @@ launcher_def::set_options (bpo::options_description &cfg) { ("nodes,n",bpo::value(&total_nodes)->default_value(1),"total number of nodes to configure and launch") ("unstarted-nodes",bpo::value(&unstarted_nodes)->default_value(0),"total number of nodes to configure, but not launch") ("pnodes,p",bpo::value(&prod_nodes)->default_value(1),"number of nodes that contain one or more producers") - ("producers",bpo::value(&producers)->default_value(21),"total number of non-bios producer instances in this network") + ("producers",bpo::value(&producers)->default_value(21),"total number of non-bios and non-shared producer instances in this network") + ("shared-producers",bpo::value(&shared_producers)->default_value(0),"total number of shared producers on each non-bios nodes") ("mode,m",bpo::value>()->multitoken()->default_value({"any"}, "any"),"connection mode, combination of \"any\", \"producers\", \"specified\", \"none\"") ("shape,s",bpo::value(&shape)->default_value("star"),"network topology, use \"star\" \"mesh\" or give a filename for custom") ("genesis,g",bpo::value()->default_value("./genesis.json"),"set the path to genesis.json") @@ -901,6 +910,11 @@ launcher_def::bind_nodes () { producer_set.schedule.push_back({prodname,pubkey}); ++producer_number; } + for (unsigned j = 0; j < shared_producers; ++j) { + const auto prodname = producer_names::producer_name(j, true); + node.producers.push_back(prodname); + producer_set.schedule.push_back({prodname,pubkey}); + } } node.dont_start = i >= to_not_start_node; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 30c1f38ac46..f310972626e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -52,6 +52,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/launcher_test.py ${CMAKE_CURRENT_BINA configure_file(${CMAKE_CURRENT_SOURCE_DIR}/db_modes_test.py ${CMAKE_CURRENT_BINARY_DIR}/db_modes_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/prod_preactivation_test.py ${CMAKE_CURRENT_BINARY_DIR}/prod_preactivation_test.py COPYONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/version-label.sh ${CMAKE_CURRENT_BINARY_DIR}/version-label.sh COPYONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/nodeos_producer_watermark_test.py ${CMAKE_CURRENT_BINARY_DIR}/nodeos_producer_watermark_test.py COPYONLY) #To run plugin_test with all log from blockchain displayed, put --verbose after --, i.e. plugin_test -- --verbose add_test(NAME plugin_test COMMAND plugin_test --report_level=detailed --color_output) @@ -132,6 +133,10 @@ add_test(NAME nodeos_multiple_version_protocol_feature_mv_test COMMAND tests/nod -v --clean-run --dump-error-detail --alternate-version-labels-file ${ALTERNATE_VERSION_LABELS_FILE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) set_property(TEST nodeos_multiple_version_protocol_feature_mv_test PROPERTY LABELS mixed_version_tests) +add_test(NAME nodeos_producer_watermark_lr_test COMMAND tests/nodeos_producer_watermark_test.py -v --clean-run --dump-error-detail WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) +set_property(TEST nodeos_producer_watermark_lr_test PROPERTY LABELS long_running_tests) + + if(ENABLE_COVERAGE_TESTING) set(Coverage_NAME ${PROJECT_NAME}_coverage) diff --git a/tests/Cluster.py b/tests/Cluster.py index 8ca632e02e5..d73646465a1 100644 --- a/tests/Cluster.py +++ b/tests/Cluster.py @@ -144,7 +144,7 @@ def setAlternateVersionLabels(self, file): # pylint: disable=too-many-branches # pylint: disable=too-many-statements def launch(self, pnodes=1, unstartedNodes=0, totalNodes=1, prodCount=1, topo="mesh", delay=1, onlyBios=False, dontBootstrap=False, - totalProducers=None, extraNodeosArgs=None, useBiosBootFile=True, specificExtraNodeosArgs=None, onlySetProds=False, + totalProducers=None, sharedProducers=0, extraNodeosArgs=None, useBiosBootFile=True, specificExtraNodeosArgs=None, onlySetProds=False, pfSetupPolicy=PFSetupPolicy.FULL, alternateVersionLabelsFile=None, associatedNodeLabels=None, loadSystemContract=True): """Launch cluster. pnodes: producer nodes count @@ -200,6 +200,9 @@ def launch(self, pnodes=1, unstartedNodes=0, totalNodes=1, prodCount=1, topo="me assert(isinstance(totalProducers, (str,int))) producerFlag="--producers %s" % (totalProducers) + if sharedProducers > 0: + producerFlag += (" --shared-producers %d" % (sharedProducers)) + self.setAlternateVersionLabels(alternateVersionLabelsFile) tries = 30 @@ -431,7 +434,7 @@ def connectGroup(group, producerNodes, bridgeNodes) : if not loadSystemContract: useBiosBootFile=False #ensure we use Cluster.bootstrap if onlyBios or not useBiosBootFile: - self.biosNode=self.bootstrap(biosNode, startedNodes, prodCount, totalProducers, pfSetupPolicy, onlyBios, onlySetProds, loadSystemContract) + self.biosNode=self.bootstrap(biosNode, startedNodes, prodCount + sharedProducers, totalProducers, pfSetupPolicy, onlyBios, onlySetProds, loadSystemContract) if self.biosNode is None: Utils.Print("ERROR: Bootstrap failed.") return False diff --git a/tests/Node.py b/tests/Node.py index f8167663c28..523d511f09f 100644 --- a/tests/Node.py +++ b/tests/Node.py @@ -44,6 +44,7 @@ def __init__(self, host, port, pid=None, cmd=None, walletMgr=None, enableMongo=F self.infoValid=None self.lastRetrievedHeadBlockNum=None self.lastRetrievedLIB=None + self.lastRetrievedHeadBlockProducer="" self.transCache={} self.walletMgr=walletMgr self.missingTransaction=False @@ -1169,6 +1170,7 @@ def getInfo(self, silentErrors=False, exitOnError=False): self.infoValid=True self.lastRetrievedHeadBlockNum=int(info["head_block_num"]) self.lastRetrievedLIB=int(info["last_irreversible_block_num"]) + self.lastRetrievedHeadBlockProducer=info["head_block_producer"] return info def getBlockFromDb(self, idx): @@ -1313,9 +1315,12 @@ def getBlockProducer(self, timeout=None, waitForBlock=True, exitOnError=True, bl return blockProducer def getNextCleanProductionCycle(self, trans): - transId=Node.getTransId(trans) rounds=21*12*2 # max time to ensure that at least 2/3+1 of producers x blocks per producer x at least 2 times - self.waitForTransFinalization(transId, timeout=rounds/2) + if trans is not None: + transId=Node.getTransId(trans) + self.waitForTransFinalization(transId, timeout=rounds/2) + else: + transId="Null" irreversibleBlockNum=self.getIrreversibleBlockNum() # The voted schedule should be promoted now, then need to wait for that to become irreversible diff --git a/tests/nodeos_producer_watermark_test.py b/tests/nodeos_producer_watermark_test.py new file mode 100755 index 00000000000..822a04243d3 --- /dev/null +++ b/tests/nodeos_producer_watermark_test.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python3 + +from testUtils import Utils +import testUtils +from Cluster import Cluster +from WalletMgr import WalletMgr +from Node import Node +from TestHelper import TestHelper + +import time +import decimal +import math +import re + +############################################################### +# nodeos_producer_watermark_test +# --dump-error-details +# --keep-logs +############################################################### +def isValidBlockProducer(prodsActive, blockNum, node): + blockProducer=node.getBlockProducerByNum(blockNum) + if blockProducer not in prodsActive: + return False + return prodsActive[blockProducer] + +def validBlockProducer(prodsActive, prodsSeen, blockNum, node): + blockProducer=node.getBlockProducerByNum(blockNum) + if blockProducer not in prodsActive: + Utils.cmdError("unexpected block producer %s at blockNum=%s" % (blockProducer,blockNum)) + Utils.errorExit("Failed because of invalid block producer") + if not prodsActive[blockProducer]: + Utils.cmdError("block producer %s for blockNum=%s not elected, belongs to node %s" % (blockProducer, blockNum, ProducerToNode.map[blockProducer])) + Utils.errorExit("Failed because of incorrect block producer") + prodsSeen[blockProducer]=True + return blockProducer + +def setProds(sharedProdKey): + setProdsStr='{"schedule": [' + firstTime=True + for name in ["defproducera", "shrproducera", "defproducerb", "defproducerc"]: + if firstTime: + firstTime = False + else: + setProdsStr += ',' + key = cluster.defProducerAccounts[name].activePublicKey + if name == "shrproducera": + key = sharedProdKey + setProdsStr += ' { "producer_name": "%s", "block_signing_key": "%s" }' % (name, key) + + setProdsStr += ' ] }' + Utils.Print("setprods: %s" % (setProdsStr)) + opts="--permission eosio@active" + # pylint: disable=redefined-variable-type + trans=cluster.biosNode.pushMessage("eosio", "setprods", setProdsStr, opts) + if trans is None or not trans[0]: + Utils.Print("ERROR: Failed to set producer with cmd %s" % (setProdsStr)) + +def verifyProductionRounds(trans, node, prodsActive, rounds): + blockNum=node.getNextCleanProductionCycle(trans) + Utils.Print("Validating blockNum=%s" % (blockNum)) + + temp=Utils.Debug + Utils.Debug=False + Utils.Print("FIND VALID BLOCK PRODUCER") + blockProducer=node.getBlockProducerByNum(blockNum) + lastBlockProducer=blockProducer + adjust=False + while not isValidBlockProducer(prodsActive, blockNum, node): + adjust=True + blockProducer=node.getBlockProducerByNum(blockNum) + if lastBlockProducer!=blockProducer: + Utils.Print("blockProducer=%s for blockNum=%s is for node=%s" % (blockProducer, blockNum, ProducerToNode.map[blockProducer])) + lastBlockProducer=blockProducer + blockNum+=1 + + Utils.Print("VALID BLOCK PRODUCER") + saw=0 + sawHigh=0 + startingFrom=blockNum + doPrint=0 + invalidCount=0 + while adjust: + invalidCount+=1 + if lastBlockProducer==blockProducer: + saw+=1 + else: + if saw>=12: + startingFrom=blockNum + if saw>12: + Utils.Print("ERROR!!!!!!!!!!!!!! saw=%s, blockProducer=%s, blockNum=%s" % (saw,blockProducer,blockNum)) + break + else: + if saw > sawHigh: + sawHigh = saw + Utils.Print("sawHigh=%s" % (sawHigh)) + if doPrint < 5: + doPrint+=1 + Utils.Print("saw=%s, blockProducer=%s, blockNum=%s" % (saw,blockProducer,blockNum)) + lastBlockProducer=blockProducer + saw=1 + blockProducer=node.getBlockProducerByNum(blockNum) + blockNum+=1 + + if adjust: + blockNum-=1 + + Utils.Print("ADJUSTED %s blocks" % (invalidCount-1)) + + prodsSeen=None + reportFirstMissedBlock=False + Utils.Print("Verify %s complete rounds of all producers producing" % (rounds)) + + prodsSize = len(prodsActive) + for i in range(0, rounds): + prodsSeen={} + lastBlockProducer=None + for j in range(0, prodsSize): + # each new set of 12 blocks should have a different blockProducer + if lastBlockProducer is not None and lastBlockProducer==node.getBlockProducerByNum(blockNum): + Utils.cmdError("expected blockNum %s to be produced by any of the valid producers except %s" % (blockNum, lastBlockProducer)) + Utils.errorExit("Failed because of incorrect block producer order") + + # make sure that the next set of 12 blocks all have the same blockProducer + lastBlockProducer=node.getBlockProducerByNum(blockNum) + for k in range(0, 12): + blockProducer = validBlockProducer(prodsActive, prodsSeen, blockNum, node1) + if lastBlockProducer!=blockProducer: + if not reportFirstMissedBlock: + printStr="" + newBlockNum=blockNum-18 + for l in range(0,36): + printStr+="%s" % (newBlockNum) + printStr+=":" + newBlockProducer=node.getBlockProducerByNum(newBlockNum) + printStr+="%s" % (newBlockProducer) + printStr+=" " + newBlockNum+=1 + Utils.Print("NOTE: expected blockNum %s (started from %s) to be produced by %s, but produded by %s: round=%s, prod slot=%s, prod num=%s - %s" % (blockNum, startingFrom, lastBlockProducer, blockProducer, i, j, k, printStr)) + reportFirstMissedBlock=True + break + blockNum+=1 + + # make sure that we have seen all 21 producers + prodsSeenKeys=prodsSeen.keys() + if len(prodsSeenKeys) != prodsSize: + Utils.cmdError("only saw %s producers of expected %d. At blockNum %s only the following producers were seen: %s" % (len(prodsSeenKeys), prodsSize, blockNum, ",".join(prodsSeenKeys))) + Utils.errorExit("Failed because of missing block producers") + + Utils.Debug=temp + + +Print=Utils.Print +errorExit=Utils.errorExit + +args = TestHelper.parse_args({"--prod-count","--dump-error-details","--keep-logs","-v","--leave-running","--clean-run", + "--wallet-port"}) +Utils.Debug=args.v +totalNodes=3 +cluster=Cluster(walletd=True) +dumpErrorDetails=args.dump_error_details +keepLogs=args.keep_logs +dontKill=args.leave_running +prodCount=args.prod_count +killAll=args.clean_run +walletPort=args.wallet_port + +walletMgr=WalletMgr(True, port=walletPort) +testSuccessful=False +killEosInstances=not dontKill +killWallet=not dontKill + +WalletdName=Utils.EosWalletName +ClientName="cleos" + +try: + assert(totalNodes == 3) + + TestHelper.printSystemInfo("BEGIN") + cluster.setWalletMgr(walletMgr) + + cluster.killall(allInstances=killAll) + cluster.cleanup() + Print("Stand up cluster") + if cluster.launch(prodCount=prodCount, onlyBios=False, pnodes=totalNodes, totalNodes=totalNodes, totalProducers=totalNodes, useBiosBootFile=False, onlySetProds=True, sharedProducers=1) is False: + Utils.cmdError("launcher") + Utils.errorExit("Failed to stand up eos cluster.") + + Print("Validating system accounts after bootstrap") + cluster.validateAccounts(None) + + node0=cluster.getNode(0) + node1=cluster.getNode(1) + node2=cluster.getNode(2) + + node=node0 + numprod = totalNodes + 1 + + trans=None + prodsActive={} + prodsActive["shrproducera"] = True + prodsActive["defproducera"] = True + prodsActive["defproducerb"] = True + prodsActive["defproducerc"] = True + + Print("Wait for initial schedule: defproducera(node 0) shrproducera(node 2) defproducerb(node 1) defproducerc(node 2)") + + tries=10 + while tries > 0: + node.infoValid = False + info = node.getInfo() + if node.infoValid and node.lastRetrievedHeadBlockProducer != "eosio": + break + time.sleep(1) + tries = tries-1 + if tries == 0: + Utils.errorExit("failed to wait for initial schedule") + + # try to change signing key of shrproducera, shrproducera will produced by node1 instead of node2 + Print("change producer signing key, shrproducera will be produced by node1 instead of node2") + shracc_node1 = cluster.defProducerAccounts["shrproducera"] + shracc_node1.activePublicKey = cluster.defProducerAccounts["defproducerb"].activePublicKey + setProds(shracc_node1.activePublicKey) + Print("sleep for 4/3 rounds...") + time.sleep(numprod * 6 * 4 / 3) + verifyProductionRounds(trans, node0, prodsActive, 1) + + # change signing key of shrproducera that no one can sign + accounts = cluster.createAccountKeys(1) + Print("change producer signing key of shrproducera that none of the node has") + shracc_node1.activePublicKey = accounts[0].activePublicKey + del prodsActive["shrproducera"] + setProds(shracc_node1.activePublicKey) + Print("sleep for 4/3 rounds...") + time.sleep(numprod * 6 * 4 / 3) + verifyProductionRounds(trans, node0, prodsActive, 1) + + # change signing key back to node1 + Print("change producer signing key of shrproducera so that node1 can produce again") + shracc_node1.activePublicKey = cluster.defProducerAccounts["defproducerb"].activePublicKey + prodsActive["shrproducera"] = True + setProds(shracc_node1.activePublicKey) + tries=numprod * 6 * 4 # give 4 rounds + while tries > 0: + node.infoValid = False + info = node.getInfo() + if node.infoValid and node.lastRetrievedHeadBlockProducer == "shrproducera": + break + time.sleep(1) + tries = tries-1 + if tries == 0: + Utils.errorExit("shrproducera failed to produce") + + testSuccessful=True +finally: + TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, killEosInstances=killEosInstances, killWallet=killWallet, keepLogs=keepLogs, cleanRun=killAll, dumpErrorDetails=dumpErrorDetails) + +exit(0)