Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server: transfer leadership in case of error #12293

Merged
merged 5 commits into from
Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/12293.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
server: Transfer Raft leadership in case the Nomad server fails to establish leadership
```
59 changes: 57 additions & 2 deletions nomad/leader.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,30 @@ func (s *Server) monitorLeadership() {
}
}

func (s *Server) leadershipTransfer() error {
retryCount := 3
for i := 0; i < retryCount; i++ {
err := s.raft.LeadershipTransfer().Error()
if err == nil {
s.logger.Info("successfully transferred leadership")
return nil
}

// Don't retry if the Raft version doesn't support leadership transfer
// since this will never succeed.
if err == raft.ErrUnsupportedProtocol {
return fmt.Errorf("leadership transfer not supported with Raft version lower than 3")
}

s.logger.Error("failed to transfer leadership attempt, will retry",
"attempt", i,
"retry_limit", retryCount,
"error", err,
)
}
return fmt.Errorf("failed to transfer leadership in %d attempts", retryCount)
}

// leaderLoop runs as long as we are the leader to run various
// maintenance activities
func (s *Server) leaderLoop(stopCh chan struct{}) {
Expand Down Expand Up @@ -151,7 +175,15 @@ RECONCILE:
s.logger.Error("failed to revoke leadership", "error", err)
}

goto WAIT
// Attempt to transfer leadership. If successful, leave the
// leaderLoop since this node is no longer the leader. Otherwise
// try to establish leadership again after 5 seconds.
if err := s.leadershipTransfer(); err != nil {
s.logger.Error("failed to transfer leadership", "error", err)
interval = time.After(5 * time.Second)
goto WAIT
}
return
}

establishedLeader = true
Expand Down Expand Up @@ -182,10 +214,12 @@ RECONCILE:
}

WAIT:
// Wait until leadership is lost
// Wait until leadership is lost or periodically reconcile as long as we
// are the leader, or when Serf events arrive.
for {
select {
case <-stopCh:
// Lost leadership.
return
case <-s.shutdownCh:
return
Expand Down Expand Up @@ -213,6 +247,27 @@ WAIT:
s.revokeLeadership()
err := s.establishLeadership(stopCh)
errCh <- err

// In case establishLeadership fails, try to transfer leadership.
// At this point Raft thinks we are the leader, but Nomad did not
// complete the required steps to act as the leader.
if err != nil {
if err := s.leadershipTransfer(); err != nil {
// establishedLeader was true before, but it no longer is
// since we revoked leadership and leadershipTransfer also
// failed.
// Stay in the leaderLoop with establishedLeader set to
// false so we try to establish leadership again in the
// next loop.
establishedLeader = false
interval = time.After(5 * time.Second)
goto WAIT
}

// leadershipTransfer was successful and it is
// time to leave the leaderLoop.
return
}
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions website/content/docs/upgrade/upgrade-specific.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ The volume staging directory for new CSI plugin tasks will now be
mounted to the task's `NOMAD_TASK_DIR` instead of the
`csi_plugin.mount_config`.

#### Raft leadership transfer on error

Starting with Nomad 1.3.0, when a Nomad leader fails to establish leadership it
will attempted to gracefully transfer leadership to another eligible server in
lgfa29 marked this conversation as resolved.
Show resolved Hide resolved
the cluster. This operation is only supported when using Raft Protocol Version
3.

#### Server Raft Database

The server raft database in `raft.db` will be automatically migrated to a new
Expand Down