Skip to content

Commit

Permalink
etcdserver: adjust tick advance on restart
Browse files Browse the repository at this point in the history
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
  • Loading branch information
gyuho committed Feb 26, 2018
1 parent 7ec77c4 commit 5bfa52a
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 12 deletions.
12 changes: 0 additions & 12 deletions etcdserver/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,16 +370,6 @@ func (r *raftNode) resumeSending() {
p.Resume()
}

// advanceTicksForElection advances ticks to the node for fast election.
// This reduces the time to wait for first leader election if bootstrapping the whole
// cluster, while leaving at least 1 heartbeat for possible existing leader
// to contact it.
func advanceTicksForElection(n raft.Node, electionTicks int) {
for i := 0; i < electionTicks-1; i++ {
n.Tick()
}
}

func startNode(cfg ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) {
var err error
member := cl.MemberByName(cfg.Name)
Expand Down Expand Up @@ -417,7 +407,6 @@ func startNode(cfg ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id
raftStatusMu.Lock()
raftStatus = n.Status
raftStatusMu.Unlock()
advanceTicksForElection(n, c.ElectionTick)
return id, n, s, w
}

Expand Down Expand Up @@ -451,7 +440,6 @@ func restartNode(cfg ServerConfig, snapshot *raftpb.Snapshot) (types.ID, *member
raftStatusMu.Lock()
raftStatus = n.Status
raftStatusMu.Unlock()
advanceTicksForElection(n, c.ElectionTick)
return id, cl, n, s, w
}

Expand Down
48 changes: 48 additions & 0 deletions etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
snapshot *raftpb.Snapshot
)

restart := false
switch {
case !haveWAL && !cfg.NewCluster:
if err = cfg.VerifyJoinExisting(); err != nil {
Expand Down Expand Up @@ -388,6 +389,7 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
cfg.Print()
if !cfg.ForceNewCluster {
id, cl, n, s, w = restartNode(cfg, snapshot)
restart = true
} else {
id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
}
Expand Down Expand Up @@ -518,9 +520,55 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
}
srv.r.transport = tr

activePeers := 0
for _, m := range cl.Members() {
if m.ID != id {
if tr.IsActive(m.ID) {
activePeers++
}
}
}

clusterN := len(cl.Members())
plog.Infof("%s has %d active peers among %d found member(s)", srv.ID(), activePeers, clusterN)

// only advance ticks if starting to fresh cluster
// and if single-node cluster, or peer connection
// has been already established (activePeers > 0)
if !restart && (clusterN == 1 || activePeers == 0) {
// save one tick in case leader node comes
// in with heartbeat before timeout
tick := cfg.ElectionTicks - 1
plog.Infof("%s is advancing %d ticks for faster election (election tick %d)", srv.ID(), tick, cfg.ElectionTicks)
advanceTicksForElection(n, tick)
} else {
// on restart, there is likely an active peer already
// with an active leader; adjust ticks to advance
// in case leader heartbeats arrive and restarted
// node can revert back to follower
// otherwise, restarted follower can disrupt
// current cluster by starting an election
// with higher term
tick := cfg.ElectionTicks / 10 // default election tick is 10 (1s)
plog.Infof("%s is advancing %d ticks (election tick %d)", srv.ID(), tick, cfg.ElectionTicks)
advanceTicksForElection(n, tick)
}

return srv, nil
}

// advanceTicksForElection advances ticks to the node
// for fast election. This reduces the time to wait
// for first leader election if bootstrapping the whole
// cluster (especially cross data-center deployments),
// while leaving a few heartbeats for possible existing
// leader to contact it.
func advanceTicksForElection(n raft.Node, ticks int) {
for i := 0; i < ticks; i++ {
n.Tick()
}
}

// Start performs any initialization of the Server necessary for it to
// begin serving requests. It must be called before Do or Process.
// Start must be non-blocking; any long-running server functionality
Expand Down

0 comments on commit 5bfa52a

Please sign in to comment.