Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Teach Nomad servers how to fall back to Consul. #1276

Merged
merged 13 commits into from
Jun 16, 2016
Merged
22 changes: 18 additions & 4 deletions nomad/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ func (s *Server) setupBootstrapHandler() error {
// drift because time.Timer is implemented as a monotonic clock.
var peersTimeout *time.Timer = time.NewTimer(0)

// consulQueryCount is the number of times the bootstrapFn has been
// called, regardless of success.
var consulQueryCount uint64

// leadershipTimedOut is a helper method that returns true if the
// peersTimeout timer has expired.
leadershipTimedOut := func() bool {
Expand Down Expand Up @@ -430,12 +434,16 @@ func (s *Server) setupBootstrapHandler() error {
bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect)
if bootstrapExpect == 0 {
// This Nomad Server has been bootstrapped. Rely on
// timeouts to determine health.

// the peersTimeout firing as a guard to prevent
// aggressive querying of Consul.
if !leadershipTimedOut() {
return nil
}
} else {
if consulQueryCount > 0 && !leadershipTimedOut() {
return nil
}

// This Nomad Server has not been bootstrapped, reach
// out to Consul if our peer list is less than
// `bootstrap_expect`.
Expand All @@ -454,6 +462,7 @@ func (s *Server) setupBootstrapHandler() error {
return nil
}
}
consulQueryCount++

s.logger.Printf("[DEBUG] server.consul: lost contact with Nomad quorum, falling back to Consul for server list")

Expand Down Expand Up @@ -481,6 +490,7 @@ func (s *Server) setupBootstrapHandler() error {
var mErr multierror.Error
const defaultMaxNumNomadServers = 8
nomadServerServices := make([]string, 0, defaultMaxNumNomadServers)
localNode := s.serf.Memberlist().LocalNode()
for _, dc := range dcs {
consulOpts := &consulapi.QueryOptions{
AllowStale: true,
Expand All @@ -501,6 +511,9 @@ func (s *Server) setupBootstrapHandler() error {
if addr == "" {
addr = cs.Address
}
if localNode.Addr.String() == addr && int(localNode.Port) == cs.ServicePort {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put a comment that we are skipping ourselves

continue
}
serverAddr := net.JoinHostPort(addr, port)
nomadServerServices = append(nomadServerServices, serverAddr)
}
Expand All @@ -514,8 +527,9 @@ func (s *Server) setupBootstrapHandler() error {

// Log the error and return nil so future handlers
// can attempt to register the `nomad` service.
s.logger.Printf("[TRACE] server.consul: no Nomad Servers advertising service %+q in Consul datacenters %+q", nomadServerServiceName, dcs)
peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
pollInterval := peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)
s.logger.Printf("[TRACE] server.consul: no Nomad Servers advertising service %+q in Consul datacenters %+q, sleeping for %v", nomadServerServiceName, dcs, pollInterval)
peersTimeout.Reset(pollInterval)
return nil
}

Expand Down