diff --git a/.changelog/15290.txt b/.changelog/15290.txt new file mode 100644 index 000000000000..a7405ffe4b28 --- /dev/null +++ b/.changelog/15290.txt @@ -0,0 +1,3 @@ +```release-note:bug +autopilot: Fixed a bug where autopilot would try to fetch raft stats from other regions +``` diff --git a/nomad/autopilot.go b/nomad/autopilot.go index c5a382ecebb7..52a303e50995 100644 --- a/nomad/autopilot.go +++ b/nomad/autopilot.go @@ -195,7 +195,7 @@ func (s *Server) autopilotServers() map[raft.ServerID]*autopilot.Server { s.logger.Warn("Error parsing server info", "name", member.Name, "error", err) continue } else if srv == nil { - // this member was a client + // this member was a client or in another region continue } @@ -210,6 +210,9 @@ func (s *Server) autopilotServer(m serf.Member) (*autopilot.Server, error) { if !ok { return nil, nil } + if srv.Region != s.Region() { + return nil, nil + } return s.autopilotServerFromMetadata(srv) } diff --git a/nomad/autopilot_test.go b/nomad/autopilot_test.go index fb2afbe08603..4b417d3db405 100644 --- a/nomad/autopilot_test.go +++ b/nomad/autopilot_test.go @@ -190,6 +190,42 @@ func TestAutopilot_RollingUpdate(t *testing.T) { waitForStableLeadership(t, servers) } +func TestAutopilot_MultiRegion(t *testing.T) { + ci.Parallel(t) + + conf := func(c *Config) { + c.NumSchedulers = 0 // reduces test log noise + c.BootstrapExpect = 3 + } + s1, cleanupS1 := TestServer(t, conf) + defer cleanupS1() + + s2, cleanupS2 := TestServer(t, conf) + defer cleanupS2() + + s3, cleanupS3 := TestServer(t, conf) + defer cleanupS3() + + // federated regions should not be considered raft peers or show up in the + // known servers list + s4, cleanupS4 := TestServer(t, func(c *Config) { + c.BootstrapExpect = 0 + c.Region = "other" + }) + defer cleanupS4() + + servers := []*Server{s1, s2, s3} + TestJoin(t, s1, s2, s3, s4) + + t.Logf("waiting for initial stable cluster") + waitForStableLeadership(t, servers) + + apDelegate := &AutopilotDelegate{s3} + known := apDelegate.KnownServers() + must.Eq(t, 3, len(known)) + +} + func TestAutopilot_CleanupStaleRaftServer(t *testing.T) { ci.Parallel(t)