From be58566e35269ba57d05da5bc5588477455d758c Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 17 Oct 2022 13:57:48 -0400 Subject: [PATCH] make version checks specific to region * One-time tokens are not replicated between regions, so we don't want to enforce that the version check across all of serf, just members in the same region. * Scheduler: Disconnected clients handling is specific to a single region, so we don't want to enforce that the version check across all of serf, just members in the same region. * Cleans up a bunch of legacy checks. This changeset is specific to 1.3.x and the changes for other versions of Nomad will be manually backported in separate PRs --- .changelog/14911.txt | 6 ++++++ nomad/acl_endpoint.go | 6 +++--- nomad/core_sched.go | 2 +- nomad/job_endpoint.go | 4 ++-- nomad/leader.go | 8 ++++---- nomad/operator_endpoint.go | 4 ++-- nomad/periodic.go | 2 +- nomad/plan_apply.go | 2 +- nomad/util.go | 34 +++++++++------------------------- nomad/util_test.go | 6 +++--- nomad/worker.go | 4 ++-- scheduler/scheduler.go | 7 ++++--- 12 files changed, 38 insertions(+), 47 deletions(-) create mode 100644 .changelog/14911.txt diff --git a/.changelog/14911.txt b/.changelog/14911.txt new file mode 100644 index 000000000000..4e8562800f8e --- /dev/null +++ b/.changelog/14911.txt @@ -0,0 +1,6 @@ +```release-note:bug +acl: Fixed a bug where Nomad version checking for one-time tokens was enforced across regions +``` +```release-note:bug +scheduler: Fixed a bug where version checking for disconnected clients handling was enforced across regions +``` diff --git a/nomad/acl_endpoint.go b/nomad/acl_endpoint.go index 6c61ce46d36b..cfe71b2fa7cd 100644 --- a/nomad/acl_endpoint.go +++ b/nomad/acl_endpoint.go @@ -891,7 +891,7 @@ func (a *ACL) UpsertOneTimeToken(args *structs.OneTimeTokenUpsertRequest, reply defer metrics.MeasureSince( []string{"nomad", "acl", "upsert_one_time_token"}, time.Now()) - if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) } @@ -943,7 +943,7 @@ func (a *ACL) ExchangeOneTimeToken(args *structs.OneTimeTokenExchangeRequest, re defer metrics.MeasureSince( []string{"nomad", "acl", "exchange_one_time_token"}, time.Now()) - if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) } @@ -1000,7 +1000,7 @@ func (a *ACL) ExpireOneTimeTokens(args *structs.OneTimeTokenExpireRequest, reply defer metrics.MeasureSince( []string{"nomad", "acl", "expire_one_time_tokens"}, time.Now()) - if !ServersMeetMinimumVersion(a.srv.Members(), minOneTimeAuthenticationTokenVersion, false) { + if !ServersMeetMinimumVersion(a.srv.Members(), a.srv.Region(), minOneTimeAuthenticationTokenVersion, false) { return fmt.Errorf("All servers should be running version %v or later to use one-time authentication tokens", minAutopilotVersion) } diff --git a/nomad/core_sched.go b/nomad/core_sched.go index fa20bd0e290b..43ac913ff567 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -498,7 +498,7 @@ OUTER: func (c *CoreScheduler) nodeReap(eval *structs.Evaluation, nodeIDs []string) error { // For old clusters, send single deregistration messages COMPAT(0.11) minVersionBatchNodeDeregister := version.Must(version.NewVersion("0.9.4")) - if !ServersMeetMinimumVersion(c.srv.Members(), minVersionBatchNodeDeregister, true) { + if !ServersMeetMinimumVersion(c.srv.Members(), c.srv.Region(), minVersionBatchNodeDeregister, true) { for _, id := range nodeIDs { req := structs.NodeDeregisterRequest{ NodeID: id, diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index f584c53f4d4e..e0d289674f3f 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -359,7 +359,7 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis // COMPAT(1.1.0): Remove the ServerMeetMinimumVersion check to always set args.Eval // 0.12.1 introduced atomic eval job registration - if eval != nil && ServersMeetMinimumVersion(j.srv.Members(), minJobRegisterAtomicEvalVersion, false) { + if eval != nil && ServersMeetMinimumVersion(j.srv.Members(), j.srv.Region(), minJobRegisterAtomicEvalVersion, false) { args.Eval = eval submittedEval = true } @@ -848,7 +848,7 @@ func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobD } // COMPAT(1.1.0): remove conditional and always set args.Eval - if ServersMeetMinimumVersion(j.srv.Members(), minJobRegisterAtomicEvalVersion, false) { + if ServersMeetMinimumVersion(j.srv.Members(), j.srv.Region(), minJobRegisterAtomicEvalVersion, false) { args.Eval = eval } diff --git a/nomad/leader.go b/nomad/leader.go index 122abba25a0a..0421f179ed19 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -802,7 +802,7 @@ func (s *Server) schedulePeriodic(stopCh chan struct{}) { s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobCSIVolumeClaimGC, index)) } case <-oneTimeTokenGC.C: - if !ServersMeetMinimumVersion(s.Members(), minOneTimeAuthenticationTokenVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), s.Region(), minOneTimeAuthenticationTokenVersion, false) { continue } @@ -1639,7 +1639,7 @@ func (s *Server) getOrCreateAutopilotConfig() *structs.AutopilotConfig { return config } - if !ServersMeetMinimumVersion(s.Members(), minAutopilotVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), AllRegions, minAutopilotVersion, false) { s.logger.Named("autopilot").Warn("can't initialize until all servers are above minimum version", "min_version", minAutopilotVersion) return nil } @@ -1666,7 +1666,7 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { if config != nil { return config } - if !ServersMeetMinimumVersion(s.Members(), minSchedulerConfigVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), s.Region(), minSchedulerConfigVersion, false) { s.logger.Named("core").Warn("can't initialize scheduler config until all servers are above minimum version", "min_version", minSchedulerConfigVersion) return nil } @@ -1681,7 +1681,7 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { } func (s *Server) generateClusterID() (string, error) { - if !ServersMeetMinimumVersion(s.Members(), minClusterIDVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), AllRegions, minClusterIDVersion, false) { s.logger.Named("core").Warn("cannot initialize cluster ID until all servers are above minimum version", "min_version", minClusterIDVersion) return "", fmt.Errorf("cluster ID cannot be created until all servers are above minimum version %s", minClusterIDVersion) } diff --git a/nomad/operator_endpoint.go b/nomad/operator_endpoint.go index 40c27669eac6..43786cb10a7b 100644 --- a/nomad/operator_endpoint.go +++ b/nomad/operator_endpoint.go @@ -248,7 +248,7 @@ func (op *Operator) AutopilotSetConfiguration(args *structs.AutopilotSetConfigRe } // All servers should be at or above 0.8.0 to apply this operatation - if !ServersMeetMinimumVersion(op.srv.Members(), minAutopilotVersion, false) { + if !ServersMeetMinimumVersion(op.srv.Members(), op.srv.Region(), minAutopilotVersion, false) { return fmt.Errorf("All servers should be running version %v to update autopilot config", minAutopilotVersion) } @@ -316,7 +316,7 @@ func (op *Operator) SchedulerSetConfiguration(args *structs.SchedulerSetConfigRe } // All servers should be at or above 0.9.0 to apply this operation - if !ServersMeetMinimumVersion(op.srv.Members(), minSchedulerConfigVersion, false) { + if !ServersMeetMinimumVersion(op.srv.Members(), op.srv.Region(), minSchedulerConfigVersion, false) { return fmt.Errorf("All servers should be running version %v to update scheduler config", minSchedulerConfigVersion) } diff --git a/nomad/periodic.go b/nomad/periodic.go index c4f40066f63e..5b7aa5fdddda 100644 --- a/nomad/periodic.go +++ b/nomad/periodic.go @@ -80,7 +80,7 @@ func (s *Server) DispatchJob(job *structs.Job) (*structs.Evaluation, error) { eval.ModifyIndex = index // COMPAT(1.1): Remove in 1.1.0 - 0.12.1 introduced atomic eval job registration - if !ServersMeetMinimumVersion(s.Members(), minJobRegisterAtomicEvalVersion, false) { + if !ServersMeetMinimumVersion(s.Members(), s.Region(), minJobRegisterAtomicEvalVersion, false) { // Create a new evaluation eval.JobModifyIndex = index update := &structs.EvalUpdateRequest{ diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 69e174098fc9..d79c7925ff49 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -254,7 +254,7 @@ func (p *planner) applyPlan(plan *structs.Plan, result *structs.PlanResult, snap preemptedJobIDs := make(map[structs.NamespacedID]struct{}) - if ServersMeetMinimumVersion(p.Members(), MinVersionPlanNormalization, true) { + if ServersMeetMinimumVersion(p.Members(), p.Region(), MinVersionPlanNormalization, true) { // Initialize the allocs request using the new optimized log entry format. // Determine the minimum number of updates, could be more if there // are multiple updates per node diff --git a/nomad/util.go b/nomad/util.go index 5ea347f2b6ba..4400159b2fea 100644 --- a/nomad/util.go +++ b/nomad/util.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/serf/serf" + "golang.org/x/exp/slices" ) const ( @@ -143,15 +144,20 @@ func isNomadServer(m serf.Member) (bool, *serverParts) { return true, parts } +const AllRegions = "" + // ServersMeetMinimumVersion returns whether the Nomad servers are at least on the // given Nomad version. The checkFailedServers parameter specifies whether version // for the failed servers should be verified. -func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Version, checkFailedServers bool) bool { +func ServersMeetMinimumVersion(members []serf.Member, region string, minVersion *version.Version, checkFailedServers bool) bool { for _, member := range members { - if valid, parts := isNomadServer(member); valid && (parts.Status == serf.StatusAlive || (checkFailedServers && parts.Status == serf.StatusFailed)) { + valid, parts := isNomadServer(member) + if valid && + (parts.Region == region || region == AllRegions) && + (parts.Status == serf.StatusAlive || (checkFailedServers && parts.Status == serf.StatusFailed)) { // Check if the versions match - version.LessThan will return true for // 0.8.0-rc1 < 0.8.0, so we want to ignore the metadata - versionsMatch := slicesMatch(minVersion.Segments(), parts.Build.Segments()) + versionsMatch := slices.Equal(minVersion.Segments(), parts.Build.Segments()) if parts.Build.LessThan(minVersion) && !versionsMatch { return false } @@ -161,28 +167,6 @@ func ServersMeetMinimumVersion(members []serf.Member, minVersion *version.Versio return true } -func slicesMatch(a, b []int) bool { - if a == nil && b == nil { - return true - } - - if a == nil || b == nil { - return false - } - - if len(a) != len(b) { - return false - } - - for i := range a { - if a[i] != b[i] { - return false - } - } - - return true -} - // shuffleStrings randomly shuffles the list of strings func shuffleStrings(list []string) { for i := range list { diff --git a/nomad/util_test.go b/nomad/util_test.go index 300fe2c66484..dec9051b9168 100644 --- a/nomad/util_test.go +++ b/nomad/util_test.go @@ -148,7 +148,7 @@ func TestServersMeetMinimumVersionExcludingFailed(t *testing.T) { } for _, tc := range cases { - result := ServersMeetMinimumVersion(tc.members, tc.ver, false) + result := ServersMeetMinimumVersion(tc.members, AllRegions, tc.ver, false) if result != tc.expected { t.Fatalf("bad: %v, %v, %v", result, tc.ver.String(), tc) } @@ -186,7 +186,7 @@ func TestServersMeetMinimumVersionIncludingFailed(t *testing.T) { } for _, tc := range cases { - result := ServersMeetMinimumVersion(tc.members, tc.ver, true) + result := ServersMeetMinimumVersion(tc.members, AllRegions, tc.ver, true) if result != tc.expected { t.Fatalf("bad: %v, %v, %v", result, tc.ver.String(), tc) } @@ -224,7 +224,7 @@ func TestServersMeetMinimumVersionSuffix(t *testing.T) { } for _, tc := range cases { - result := ServersMeetMinimumVersion(tc.members, tc.ver, true) + result := ServersMeetMinimumVersion(tc.members, AllRegions, tc.ver, true) if result != tc.expected { t.Fatalf("bad: %v, %v, %v", result, tc.ver.String(), tc) } diff --git a/nomad/worker.go b/nomad/worker.go index deaab0303f9b..cc73ca9feab3 100644 --- a/nomad/worker.go +++ b/nomad/worker.go @@ -585,7 +585,7 @@ func (w *Worker) invokeScheduler(snap *state.StateSnapshot, eval *structs.Evalua // other packages to perform server version checks without direct references to // the Nomad server. func (w *Worker) ServersMeetMinimumVersion(minVersion *version.Version, checkFailedServers bool) bool { - return ServersMeetMinimumVersion(w.srv.Members(), minVersion, checkFailedServers) + return ServersMeetMinimumVersion(w.srv.Members(), w.srv.Region(), minVersion, checkFailedServers) } // SubmitPlan is used to submit a plan for consideration. This allows @@ -606,7 +606,7 @@ func (w *Worker) SubmitPlan(plan *structs.Plan) (*structs.PlanResult, scheduler. plan.SnapshotIndex = w.snapshotIndex // Normalize stopped and preempted allocs before RPC - normalizePlan := ServersMeetMinimumVersion(w.srv.Members(), MinVersionPlanNormalization, true) + normalizePlan := ServersMeetMinimumVersion(w.srv.Members(), w.srv.Region(), MinVersionPlanNormalization, true) if normalizePlan { plan.NormalizeAllocations() } diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 705ac9afb9f1..7feb058e544d 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -134,8 +134,9 @@ type Planner interface { // that on leader changes, the evaluation will be reblocked properly. ReblockEval(*structs.Evaluation) error - // ServersMeetMinimumVersion returns whether the Nomad servers are at least on the - // given Nomad version. The checkFailedServers parameter specifies whether version - // for the failed servers should be verified. + // ServersMeetMinimumVersion returns whether the Nomad servers in the + // worker's region are at least on the given Nomad version. The + // checkFailedServers parameter specifies whether version for the failed + // servers should be verified. ServersMeetMinimumVersion(minVersion *version.Version, checkFailedServers bool) bool }