diff --git a/state.go b/state.go index cc422bc1a..cce013866 100644 --- a/state.go +++ b/state.go @@ -439,8 +439,8 @@ func (m *Memberlist) resetNodes() { m.nodeLock.Lock() defer m.nodeLock.Unlock() - // Move the dead nodes - deadIdx := moveDeadNodes(m.nodes) + // Move dead nodes, but respect gossip to the dead interval + deadIdx := moveDeadNodes(m.nodes, m.config.GossipToTheDeadTime) // Deregister the dead nodes for i := deadIdx; i < len(m.nodes); i++ { diff --git a/state_test.go b/state_test.go index 57b17246e..093820a7d 100644 --- a/state_test.go +++ b/state_test.go @@ -862,6 +862,16 @@ func TestMemberList_ResetNodes(t *testing.T) { d := dead{Node: "test2", Incarnation: 1} m.deadNode(&d) + m.config.GossipToTheDeadTime = 100 * time.Millisecond + m.resetNodes() + if len(m.nodes) != 3 { + t.Fatalf("Bad length") + } + if _, ok := m.nodeMap["test2"]; !ok { + t.Fatalf("test2 should not be unmapped") + } + + time.Sleep(200 * time.Millisecond) m.resetNodes() if len(m.nodes) != 2 { t.Fatalf("Bad length") diff --git a/util.go b/util.go index a294c3034..a141f625f 100644 --- a/util.go +++ b/util.go @@ -190,9 +190,9 @@ func pushPullScale(interval time.Duration, n int) time.Duration { return time.Duration(multiplier) * interval } -// moveDeadNodes moves all the nodes in the dead state -// to the end of the slice and returns the index of the first dead node. -func moveDeadNodes(nodes []*nodeState) int { +// moveDeadNodes moves nodes that are dead and beyond the gossip to the dead interval +// to the end of the slice and returns the index of the first moved node. +func moveDeadNodes(nodes []*nodeState, gossipToTheDeadTime time.Duration) int { numDead := 0 n := len(nodes) for i := 0; i < n-numDead; i++ { @@ -200,6 +200,11 @@ func moveDeadNodes(nodes []*nodeState) int { continue } + // Respect the gossip to the dead interval + if time.Since(nodes[i].StateChange) <= gossipToTheDeadTime { + continue + } + // Move this node to the end nodes[i], nodes[n-numDead-1] = nodes[n-numDead-1], nodes[i] numDead++ diff --git a/util_test.go b/util_test.go index bed608821..4ea7ba893 100644 --- a/util_test.go +++ b/util_test.go @@ -252,29 +252,50 @@ func TestPushPullScale(t *testing.T) { func TestMoveDeadNodes(t *testing.T) { nodes := []*nodeState{ &nodeState{ - State: stateDead, + State: stateDead, + StateChange: time.Now().Add(-20 * time.Second), }, &nodeState{ - State: stateAlive, + State: stateAlive, + StateChange: time.Now().Add(-20 * time.Second), }, + // This dead node should not be moved, as its state changed + // less than the specified GossipToTheDead time ago &nodeState{ - State: stateAlive, + State: stateDead, + StateChange: time.Now().Add(-10 * time.Second), }, &nodeState{ - State: stateDead, + State: stateAlive, + StateChange: time.Now().Add(-20 * time.Second), }, &nodeState{ - State: stateAlive, + State: stateDead, + StateChange: time.Now().Add(-20 * time.Second), + }, + &nodeState{ + State: stateAlive, + StateChange: time.Now().Add(-20 * time.Second), }, } - idx := moveDeadNodes(nodes) - if idx != 3 { + idx := moveDeadNodes(nodes, (15 * time.Second)) + if idx != 4 { t.Fatalf("bad index") } for i := 0; i < idx; i++ { - if nodes[i].State != stateAlive { - t.Fatalf("Bad state %d", i) + fmt.Println("index %d, state %d", i, nodes[i].State) + switch i { + case 2: + // Recently dead node remains at index 2, + // since nodes are swapped out to move to end. + if nodes[i].State != stateDead { + t.Fatalf("Bad state %d", i) + } + default: + if nodes[i].State != stateAlive { + t.Fatalf("Bad state %d", i) + } } } for i := idx; i < len(nodes); i++ {