Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Fix net_plugin catchup reported blocks #6756

Merged
merged 3 commits into from
Feb 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions plugins/net_plugin/net_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,13 +866,7 @@ namespace eosio {
fc_dlog(logger, "maybe truncating branch at = ${h}:${id}",("h",remote_head_num)("id",remote_head_id));
}

// base our branch off of the last handshake we sent the peer instead of our current
// LIB which could have moved forward in time as packets were in flight.
if (last_handshake_sent.generation >= 1) {
lib_id = last_handshake_sent.last_irreversible_block_id;
} else {
lib_id = cc.last_irreversible_block_id();
}
lib_id = last_handshake_recv.last_irreversible_block_id;
head_id = cc.fork_db_head_block_id();
}
catch (const assert_exception& ex) {
Expand Down Expand Up @@ -1590,6 +1584,10 @@ namespace eosio {
set_state(head_catchup);
}
}

if (state == in_sync) {
send_handshakes();
}
}
else if (state == lib_catchup) {
if( blk_num == sync_known_lib_num ) {
Expand Down
19 changes: 17 additions & 2 deletions tests/Node.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,19 @@ def delegatebw(self, fromAccount, netQuantity, cpuQuantity, toAccount=None, tran

return self.waitForTransBlockIfNeeded(trans, waitForTransBlock, exitOnError=exitOnError)

def undelegatebw(self, fromAccount, netQuantity, cpuQuantity, toAccount=None, waitForTransBlock=False, exitOnError=False):
if toAccount is None:
toAccount=fromAccount

cmdDesc="system undelegatebw"
cmd="%s -j %s %s \"%s %s\" \"%s %s\"" % (
cmdDesc, fromAccount.name, toAccount.name, netQuantity, CORE_SYMBOL, cpuQuantity, CORE_SYMBOL)
msg="fromAccount=%s, toAccount=%s" % (fromAccount.name, toAccount.name);
trans=self.processCleosCmd(cmd, cmdDesc, exitOnError=exitOnError, exitMsg=msg)
self.trackCmdTransaction(trans)

return self.waitForTransBlockIfNeeded(trans, waitForTransBlock, exitOnError=exitOnError)

def regproducer(self, producer, url, location, waitForTransBlock=False, exitOnError=False):
cmdDesc="system regproducer"
cmd="%s -j %s %s %s %s" % (
Expand Down Expand Up @@ -1220,6 +1233,8 @@ def interruptAndVerifyExitStatus(self):
def verifyAlive(self, silent=False):
if not silent and Utils.Debug: Utils.Print("Checking if node(pid=%s) is alive(killed=%s): %s" % (self.pid, self.killed, self.cmd))
if self.killed or self.pid is None:
self.killed=True
self.pid=None
return False

try:
Expand All @@ -1231,8 +1246,8 @@ def verifyAlive(self, silent=False):
return False
except PermissionError as ex:
return True
else:
return True

return True

def getBlockProducerByNum(self, blockNum, timeout=None, waitForBlock=True, exitOnError=True):
if waitForBlock:
Expand Down
47 changes: 43 additions & 4 deletions tests/nodeos_forked_chain_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ def getMinHeadAndLib(prodNodes):
#identify the earliest LIB to start identify the earliest block to check if divergent branches eventually reach concensus
(headBlockNum, libNumAroundDivergence)=getMinHeadAndLib(prodNodes)
Print("Tracking block producers from %d till divergence or %d. Head block is %d and lowest LIB is %d" % (preKillBlockNum, lastBlockNum, headBlockNum, libNumAroundDivergence))
transitionCount=0
missedTransitionBlock=None
for blockNum in range(preKillBlockNum,lastBlockNum):
#avoiding getting LIB until my current block passes the head from the last time I checked
if blockNum>headBlockNum:
Expand All @@ -342,12 +344,24 @@ def getMinHeadAndLib(prodNodes):
if not nextProdChange and prodChanged and blockProducer1==killAtProducer:
nextProdChange=True
elif nextProdChange and blockProducer1!=killAtProducer:
actualLastBlockNum=blockNum
break
nextProdChange=False
if blockProducer0!=blockProducer1:
Print("Divergence identified at block %s, node_00 producer: %s, node_01 producer: %s" % (blockNum, blockProducer0, blockProducer1))
actualLastBlockNum=blockNum
break
else:
missedTransitionBlock=blockNum
transitionCount+=1
# allow this to transition twice, in case the script was identifying an earlier transition than the bridge node received the kill command
if transitionCount>1:
Print("At block %d and have passed producer: %s %d times and we have not diverged, stopping looking and letting errors report" % (blockNum, killAtProducer, transitionCount))
actualLastBlockNum=blockNum
break

#if we diverge before identifying the actualLastBlockNum, then there is an ERROR
if blockProducer0!=blockProducer1:
Utils.errorExit("Groups reported different block producers for block number %d. %s != %s." % (blockNum,blockProducer0,blockProducer1))
extra="" if transitionCount==0 else " Diverged after expected killAtProducer transition at block %d." % (missedTransitionBlock)
Utils.errorExit("Groups reported different block producers for block number %d.%s %s != %s." % (blockNum,extra,blockProducer0,blockProducer1))

#verify that the non producing node is not alive (and populate the producer nodes with current getInfo data to report if
#an error occurs)
Expand Down Expand Up @@ -405,6 +419,23 @@ def getMinHeadAndLib(prodNodes):

#ensure that the nodes have enough time to get in concensus, so wait for 3 producers to produce their complete round
time.sleep(inRowCountPerProducer * 3 / 2)
remainingChecks=20
match=False
checkHead=False
while remainingChecks>0:
checkMatchBlock=killBlockNum if not checkHead else prodNodes[0].getBlockNum()
blockProducer0=prodNodes[0].getBlockProducerByNum(checkMatchBlock)
blockProducer1=prodNodes[1].getBlockProducerByNum(checkMatchBlock)
match=blockProducer0==blockProducer1
if match:
if checkHead:
break
else:
checkHead=True
continue
Print("Fork has not resolved yet, wait a little more. Block %s has producer %s for node_00 and %s for node_01. Original divergence was at block %s. Wait time remaining: %d" % (checkMatchBlock, blockProducer0, blockProducer1, killBlockNum, remainingChecks))
time.sleep(1)
remainingChecks-=1

for prodNode in prodNodes:
info=prodNode.getInfo()
Expand All @@ -426,12 +457,20 @@ def getMinHeadAndLib(prodNodes):

analyzeBPs(blockProducers0, blockProducers1, expectDivergence=False)

resolvedKillBlockProducer=None
for prod in blockProducers0:
if prod["blockNum"]==killBlockNum:
resolvedKillBlockProducer = prod["prod"]
if resolvedKillBlockProducer is None:
Utils.errorExit("Did not find find block %s (the original divergent block) in blockProducers0, test setup is wrong. blockProducers0: %s" % (killBlockNum, ", ".join(blockProducers)))
Print("Fork resolved and determined producer %s for block %s" % (resolvedKillBlockProducer, killBlockNum))

blockProducers0=[]
blockProducers1=[]

testSuccessful=True
finally:
TestHelper.shutdown(cluster, walletMgr, testSuccessful, killEosInstances, killWallet, keepLogs, killAll, dumpErrorDetails)
TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, killEosInstances=killEosInstances, killWallet=killWallet, keepLogs=keepLogs, cleanRun=killAll, dumpErrorDetails=dumpErrorDetails)

if not testSuccessful:
Print(Utils.FileDivider)
Expand Down