Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(lib/babe): add --babe-lead flag, update epoch handling logic #1895

Merged
merged 55 commits into from
Oct 21, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
f9d3d0c
implement tipSyncer.hasCurrentWorker
noot Oct 11, 2021
f11edb8
don't handle transaction messages when syncing
noot Oct 11, 2021
af49037
fix core tests
noot Oct 11, 2021
da7f10e
add unit test
noot Oct 11, 2021
56136ea
fix grandpa >2/3 check
noot Oct 11, 2021
3449217
lint
noot Oct 11, 2021
04fc7b0
fix grandpa unit tests
noot Oct 12, 2021
701dc09
fix grandpa skipped tests
noot Oct 12, 2021
d839ed9
fix get grandpa-ghost
noot Oct 12, 2021
a58ef35
update tracker, update network to cache/not re-gossip consensus msgs
noot Oct 12, 2021
e411560
merge w other branch
noot Oct 12, 2021
cd71260
re-gossip consensus msgs again
noot Oct 12, 2021
eb87877
Merge branch 'development' into noot/sync-cont
noot Oct 12, 2021
2a7eb15
update logs
noot Oct 13, 2021
3c23837
address comments
noot Oct 13, 2021
e267b53
Merge branch 'noot/sync-cont' of github.com:ChainSafe/gossamer into n…
noot Oct 13, 2021
2cc41c2
Merge branch 'development' of github.com:ChainSafe/gossamer into noot…
noot Oct 13, 2021
7aa48d0
Merge branch 'noot/sync-cont' of github.com:ChainSafe/gossamer into n…
noot Oct 13, 2021
fde6e68
add epoch timeout in invokeBlockAuthoring, add lead bool to BABE, rem…
noot Oct 13, 2021
ea75c91
Merge branch 'development' of github.com:ChainSafe/gossamer into noot…
noot Oct 13, 2021
d13d5fd
fix waiting for first block
noot Oct 13, 2021
70bd312
lint
noot Oct 13, 2021
9345b0e
lint
noot Oct 13, 2021
2294703
Merge branch 'development' of github.com:ChainSafe/gossamer into noot…
noot Oct 13, 2021
2f59f64
cleanup
noot Oct 13, 2021
059e122
Merge branch 'noot/sync-finality' of github.com:ChainSafe/gossamer in…
noot Oct 13, 2021
7539451
add --babe-lead flag
noot Oct 13, 2021
21d2fb3
update log
noot Oct 14, 2021
1bbc646
Merge branch 'noot/sync-finality' of github.com:ChainSafe/gossamer in…
noot Oct 14, 2021
ec5fba9
address comments
noot Oct 15, 2021
674af10
update waitForFirstBlock to return err
noot Oct 15, 2021
67809ae
lint
noot Oct 15, 2021
6c3f134
Merge branch 'noot/sync-finality' of github.com:ChainSafe/gossamer in…
noot Oct 15, 2021
7a7d68c
update dev cfg to have babe-lead=true
noot Oct 15, 2021
edfee2c
Merge branch 'development' into noot/sync-finality
noot Oct 15, 2021
bae7217
merge w development
noot Oct 15, 2021
4fccb85
Merge branch 'noot/sync-finality' of github.com:ChainSafe/gossamer in…
noot Oct 15, 2021
5ce9c8e
fix grandpa round test
noot Oct 15, 2021
05e8e88
merge w base
noot Oct 15, 2021
dc8c892
Merge branch 'development' of github.com:ChainSafe/gossamer into noot…
noot Oct 18, 2021
9719415
fix unit tests
noot Oct 18, 2021
fceafe3
fix integration tests
noot Oct 18, 2021
b0ee396
fix unit tests
noot Oct 19, 2021
9f2e5e7
address comments
noot Oct 19, 2021
c2dce86
lint
noot Oct 19, 2021
bc41726
fix log
noot Oct 19, 2021
7cd77c1
Merge branch 'development' of github.com:ChainSafe/gossamer into noot…
noot Oct 19, 2021
911a7fc
fix tests
noot Oct 19, 2021
398eab2
fix deadlock
noot Oct 19, 2021
57717f4
fix stress tests
noot Oct 19, 2021
486001d
address comments
noot Oct 19, 2021
5b53ca0
address comments
noot Oct 20, 2021
b62dd86
lint
noot Oct 20, 2021
8d9dd14
Merge branch 'development' into noot/sync-babe-fix
noot Oct 20, 2021
bec16d8
Merge branch 'development' into noot/sync-babe-fix
noot Oct 21, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions lib/babe/babe.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ type Service struct {
dev bool
// lead is used when setting up a new network from genesis.
// the "lead" node is the node that is designated to build block 1, after which the rest of the nodes
// will sync it and determine the first slot of the network based on that block
// will sync block 1 and determine the first slot of the network based on it
lead bool

// Storage interfaces
Expand Down Expand Up @@ -206,10 +206,7 @@ func (b *Service) Start() error {
}
}

go func() {
b.initiate()
}()

go b.initiate()
return nil
}

Expand All @@ -219,22 +216,24 @@ func (b *Service) waitForFirstBlock() error {

const firstBlockTimeout = time.Minute
timeout := time.NewTimer(firstBlockTimeout)
qdm12 marked this conversation as resolved.
Show resolved Hide resolved
defer func() {
cleanup := func() {
if !timeout.Stop() {
<-timeout.C
}
qdm12 marked this conversation as resolved.
Show resolved Hide resolved
}()
}

// loop until block 1
for {
select {
case block := <-ch:
if block != nil && block.Header.Number.Int64() > 0 {
case block, ok := <-ch:
if ok && block.Header.Number.Int64() > 0 {
cleanup()
Copy link
Contributor

@qdm12 qdm12 Oct 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we handle when ch gets closed (ok == true)? EDIT: ok == false

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice thanks! Also quick question, what caller / where would the channel be closed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's actually closed anywhere, so this case probably would never happen

return nil
}
case <-timeout.C:
return errFirstBlockTimeout
case <-b.ctx.Done():
cleanup()
return b.ctx.Err()
}
}
Expand Down Expand Up @@ -412,11 +411,11 @@ func (b *Service) invokeBlockAuthoring() error {

nextEpochStartTime := getSlotStartTime(nextEpochStart, b.slotDuration)
epochTimer := time.NewTimer(time.Until(nextEpochStartTime))
defer func() {
cleanup := func() {
if !epochTimer.Stop() {
<-epochTimer.C
}
}()
}

slotDone := make([]<-chan time.Time, b.epochLength-intoEpoch)
for i := 0; i < int(b.epochLength-intoEpoch); i++ {
Expand All @@ -428,8 +427,10 @@ func (b *Service) invokeBlockAuthoring() error {

select {
case <-b.ctx.Done():
cleanup()
return nil
case <-b.pause:
cleanup()
return nil
case <-slotDone[i]:
slotNum := startSlot + uint64(i)
Expand Down
2 changes: 1 addition & 1 deletion tests/stress/grandpa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func TestStress_Grandpa_CatchUp(t *testing.T) {

time.Sleep(time.Second * 70) // let some rounds run
//nolint
node, err := utils.RunGossamer(t, numNodes-1, utils.TestDir(t, utils.KeyList[numNodes-1]), utils.GenesisSixAuths, utils.ConfigDefault, false)
node, err := utils.RunGossamer(t, numNodes-1, utils.TestDir(t, utils.KeyList[numNodes-1]), utils.GenesisSixAuths, utils.ConfigDefault, false, false)
require.NoError(t, err)
nodes = append(nodes, node)

Expand Down
68 changes: 34 additions & 34 deletions tests/stress/stress_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func TestSync_SingleBlockProducer(t *testing.T) {

// start block producing node first
//nolint
node, err := utils.RunGossamer(t, numNodes-1, utils.TestDir(t, utils.KeyList[numNodes-1]), utils.GenesisDev, utils.ConfigNoGrandpa, false)
node, err := utils.RunGossamer(t, numNodes-1, utils.TestDir(t, utils.KeyList[numNodes-1]), utils.GenesisDev, utils.ConfigNoGrandpa, false, true)
require.NoError(t, err)

// wait and start rest of nodes - if they all start at the same time the first round usually doesn't complete since
Expand Down Expand Up @@ -182,40 +182,40 @@ func TestSync_MultipleEpoch(t *testing.T) {
}
}

func TestSync_SingleSyncingNode(t *testing.T) {
// TODO: Fix this test and enable it.
t.Skip("skipping TestSync_SingleSyncingNode")
utils.SetLogLevel(log.LvlInfo)

// start block producing node
alice, err := utils.RunGossamer(t, 0, utils.TestDir(t, utils.KeyList[0]), utils.GenesisDev, utils.ConfigDefault, false)
require.NoError(t, err)
time.Sleep(time.Second * 15)

// start syncing node
bob, err := utils.RunGossamer(t, 1, utils.TestDir(t, utils.KeyList[1]), utils.GenesisDev, utils.ConfigNoBABE, false)
require.NoError(t, err)

nodes := []*utils.Node{alice, bob}
defer func() {
errList := utils.StopNodes(t, nodes)
require.Len(t, errList, 0)
}()

numCmps := 100
for i := 0; i < numCmps; i++ {
t.Log("comparing...", i)
_, err = compareBlocksByNumberWithRetry(t, nodes, strconv.Itoa(i))
require.NoError(t, err, i)
}
}
// func TestSync_SingleSyncingNode(t *testing.T) {
// // TODO: Fix this test and enable it.
// t.Skip("skipping TestSync_SingleSyncingNode")
// utils.SetLogLevel(log.LvlInfo)

// // start block producing node
// alice, err := utils.RunGossamer(t, 0, utils.TestDir(t, utils.KeyList[0]), utils.GenesisDev, utils.ConfigDefault, false)
// require.NoError(t, err)
// time.Sleep(time.Second * 15)

// // start syncing node
// bob, err := utils.RunGossamer(t, 1, utils.TestDir(t, utils.KeyList[1]), utils.GenesisDev, utils.ConfigNoBABE, false)
// require.NoError(t, err)

// nodes := []*utils.Node{alice, bob}
// defer func() {
// errList := utils.StopNodes(t, nodes)
// require.Len(t, errList, 0)
// }()

// numCmps := 100
// for i := 0; i < numCmps; i++ {
// t.Log("comparing...", i)
// _, err = compareBlocksByNumberWithRetry(t, nodes, strconv.Itoa(i))
// require.NoError(t, err, i)
// }
// }

func TestSync_Bench(t *testing.T) {
utils.SetLogLevel(log.LvlInfo)
numBlocks := 64

// start block producing node
alice, err := utils.RunGossamer(t, 0, utils.TestDir(t, utils.KeyList[1]), utils.GenesisDev, utils.ConfigNoGrandpa, false)
alice, err := utils.RunGossamer(t, 0, utils.TestDir(t, utils.KeyList[1]), utils.GenesisDev, utils.ConfigNoGrandpa, false, true)
require.NoError(t, err)

for {
Expand All @@ -236,7 +236,7 @@ func TestSync_Bench(t *testing.T) {
t.Log("BABE paused")

// start syncing node
bob, err := utils.RunGossamer(t, 1, utils.TestDir(t, utils.KeyList[0]), utils.GenesisDev, utils.ConfigNotAuthority, false)
bob, err := utils.RunGossamer(t, 1, utils.TestDir(t, utils.KeyList[0]), utils.GenesisDev, utils.ConfigNotAuthority, false, true)
require.NoError(t, err)

nodes := []*utils.Node{alice, bob}
Expand Down Expand Up @@ -290,7 +290,7 @@ func TestSync_Restart(t *testing.T) {

// start block producing node first
//nolint
node, err := utils.RunGossamer(t, numNodes-1, utils.TestDir(t, utils.KeyList[numNodes-1]), utils.GenesisDefault, utils.ConfigDefault, false)
node, err := utils.RunGossamer(t, numNodes-1, utils.TestDir(t, utils.KeyList[numNodes-1]), utils.GenesisDefault, utils.ConfigDefault, false, true)
require.NoError(t, err)

// wait and start rest of nodes
Expand Down Expand Up @@ -343,15 +343,15 @@ func TestSync_SubmitExtrinsic(t *testing.T) {
idx := 0 // TODO: randomise this

// start block producing node first
node, err := utils.RunGossamer(t, 0, utils.TestDir(t, utils.KeyList[0]), utils.GenesisDev, utils.ConfigNoGrandpa, false)
node, err := utils.RunGossamer(t, 0, utils.TestDir(t, utils.KeyList[0]), utils.GenesisDev, utils.ConfigNoGrandpa, false, true)
require.NoError(t, err)
nodes := []*utils.Node{node}

// Start rest of nodes
node, err = utils.RunGossamer(t, 1, utils.TestDir(t, utils.KeyList[1]), utils.GenesisDev, utils.ConfigNotAuthority, false)
node, err = utils.RunGossamer(t, 1, utils.TestDir(t, utils.KeyList[1]), utils.GenesisDev, utils.ConfigNotAuthority, false, false)
require.NoError(t, err)
nodes = append(nodes, node)
node, err = utils.RunGossamer(t, 2, utils.TestDir(t, utils.KeyList[2]), utils.GenesisDev, utils.ConfigNotAuthority, false)
node, err = utils.RunGossamer(t, 2, utils.TestDir(t, utils.KeyList[2]), utils.GenesisDev, utils.ConfigNotAuthority, false, false)
require.NoError(t, err)
nodes = append(nodes, node)

Expand Down
8 changes: 4 additions & 4 deletions tests/utils/gossamer_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,14 @@ func StartGossamer(t *testing.T, node *Node, websocket bool) error {
}

// RunGossamer will initialise and start a gossamer instance
func RunGossamer(t *testing.T, idx int, basepath, genesis, config string, websocket bool) (*Node, error) {
func RunGossamer(t *testing.T, idx int, basepath, genesis, config string, websocket, babeLead bool) (*Node, error) {
node, err := InitGossamer(idx, basepath, genesis, config)
if err != nil {
logger.Crit("could not initialise gossamer", "error", err)
os.Exit(1)
}

if idx == 0 {
if idx == 0 || babeLead {
node.BABELead = true
}

Expand Down Expand Up @@ -316,7 +316,7 @@ func InitializeAndStartNodes(t *testing.T, num int, genesis, config string) ([]*
if i < len(KeyList) {
name = KeyList[i]
}
node, err := RunGossamer(t, i, TestDir(t, name), genesis, config, false)
node, err := RunGossamer(t, i, TestDir(t, name), genesis, config, false, false)
if err != nil {
logger.Error("failed to run gossamer", "i", i)
}
Expand Down Expand Up @@ -346,7 +346,7 @@ func InitializeAndStartNodesWebsocket(t *testing.T, num int, genesis, config str
if i < len(KeyList) {
name = KeyList[i]
}
node, err := RunGossamer(t, i, TestDir(t, name), genesis, config, true)
node, err := RunGossamer(t, i, TestDir(t, name), genesis, config, true, false)
if err != nil {
logger.Error("failed to run gossamer", "i", i)
}
Expand Down