From 95b3b6eb0251070f40a4196b4d535ea691499aad Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 23 Jan 2018 16:47:00 -0800 Subject: [PATCH 01/79] drain: initial drainv2 structs and impl --- api/tasks.go | 40 ++ client/alloc_runner_health_watcher.go | 85 ++- jobspec/parse.go | 45 ++ jobspec/parse_test.go | 6 + jobspec/test-fixtures/basic.hcl | 7 + nomad/drain.go | 752 ++++++++++++++++++++++++++ nomad/drain_test.go | 216 ++++++++ nomad/leader.go | 3 + nomad/mock/mock.go | 1 + nomad/node_endpoint.go | 5 + nomad/plan_apply.go | 3 + nomad/state/state_store.go | 12 + nomad/structs/structs.go | 178 +++++- scheduler/generic_sched.go | 5 +- scheduler/util.go | 10 +- testutil/rpcapi/rcpapi.go | 114 ++++ 16 files changed, 1433 insertions(+), 49 deletions(-) create mode 100644 nomad/drain.go create mode 100644 nomad/drain_test.go create mode 100644 testutil/rpcapi/rcpapi.go diff --git a/api/tasks.go b/api/tasks.go index 047afccaf0a3..f7d3d9fb0737 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -284,6 +284,43 @@ func (e *EphemeralDisk) Canonicalize() { } } +// MigrateStrategy describes how allocations for a task group should be +// migrated between nodes (eg when draining). +type MigrateStrategy struct { + MaxParallel *int `mapstructure:"max_parallel"` + HealthCheck *string `mapstructure:"health_check"` + MinHealthyTime *time.Duration `mapstructure:"min_healthy_time"` + HealthyDeadline *time.Duration `mapstructure:"healthy_deadline"` +} + +func DefaultMigrateStrategy() *MigrateStrategy { + return &MigrateStrategy{ + MaxParallel: helper.IntToPtr(1), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(10 * time.Second), + HealthyDeadline: helper.TimeToPtr(5 * time.Minute), + } +} + +func (m *MigrateStrategy) Canonicalize() { + if m == nil { + return + } + defaults := DefaultMigrateStrategy() + if m.MaxParallel == nil { + m.MaxParallel = defaults.MaxParallel + } + if m.HealthCheck == nil { + m.HealthCheck = defaults.HealthCheck + } + if m.MinHealthyTime == nil { + m.MinHealthyTime = defaults.MinHealthyTime + } + if m.HealthyDeadline == nil { + m.HealthyDeadline = defaults.HealthyDeadline + } +} + // TaskGroup is the unit of scheduling. type TaskGroup struct { Name *string @@ -294,6 +331,7 @@ type TaskGroup struct { ReschedulePolicy *ReschedulePolicy EphemeralDisk *EphemeralDisk Update *UpdateStrategy + Migrate *MigrateStrategy Meta map[string]string } @@ -377,6 +415,8 @@ func (g *TaskGroup) Canonicalize(job *Job) { } g.ReschedulePolicy = defaultReschedulePolicy + g.Migrate.Canonicalize() + var defaultRestartPolicy *RestartPolicy switch *job.Type { case "service", "system": diff --git a/client/alloc_runner_health_watcher.go b/client/alloc_runner_health_watcher.go index ba94763b555e..db9164740319 100644 --- a/client/alloc_runner_health_watcher.go +++ b/client/alloc_runner_health_watcher.go @@ -31,7 +31,17 @@ func (r *AllocRunner) watchHealth(ctx context.Context) { // See if we should watch the allocs health alloc := r.Alloc() - if alloc.DeploymentID == "" || alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { + if alloc.Job.Type == structs.JobTypeSystem || alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { + // Neither deployments nor migrations apply to system jobs and + // we don't need to track allocations which already have a + // status + return + } + + isDeploy := alloc.DeploymentID != "" + + if isDeploy && alloc.Job.Type != structs.JobTypeService { + // Deployments don't track non-Service jobs return } @@ -39,7 +49,8 @@ func (r *AllocRunner) watchHealth(ctx context.Context) { if tg == nil { r.logger.Printf("[ERR] client.alloc_watcher: failed to lookup allocation's task group. Exiting watcher") return - } else if tg.Update == nil || tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Manual { + } + if isDeploy && (tg.Update == nil || tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Manual) { return } @@ -47,14 +58,36 @@ func (r *AllocRunner) watchHealth(ctx context.Context) { l := r.allocBroadcast.Listen() defer l.Close() + // Define the deadline, health method, min healthy time from the + // deployment if this is a deployment; otherwise from the migration + // strategy. + var deadline time.Time + var useChecks bool + var minHealthyTime time.Duration + + if isDeploy { + deadline = time.Now().Add(tg.Update.HealthyDeadline) + minHealthyTime = tg.Update.MinHealthyTime + useChecks = tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks + } else { + strategy := tg.Migrate + if strategy == nil { + // For backwards compat with pre-0.8 allocations that + // don't have a migrate strategy set. + strategy = structs.DefaultMigrateStrategy() + } + deadline = time.Now().Add(strategy.HealthyDeadline) + minHealthyTime = strategy.MinHealthyTime + useChecks = strategy.HealthCheck == structs.MigrateStrategyHealthChecks + } + // Create a new context with the health deadline - deadline := time.Now().Add(tg.Update.HealthyDeadline) healthCtx, healthCtxCancel := context.WithDeadline(ctx, deadline) defer healthCtxCancel() - r.logger.Printf("[DEBUG] client.alloc_watcher: deadline (%v) for alloc %q is at %v", tg.Update.HealthyDeadline, alloc.ID, deadline) + r.logger.Printf("[DEBUG] client.alloc_watcher: deadline for alloc %q is at %v (deploy=%t checks=%t)", alloc.ID, deadline, isDeploy, useChecks) // Create the health tracker object - tracker := newAllocHealthTracker(healthCtx, r.logger, alloc, l, r.consulClient) + tracker := newAllocHealthTracker(healthCtx, r.logger, alloc, l, r.consulClient, minHealthyTime, useChecks) tracker.Start() allocHealthy := false @@ -77,8 +110,8 @@ func (r *AllocRunner) watchHealth(ctx context.Context) { r.allocHealth = helper.BoolToPtr(allocHealthy) r.allocLock.Unlock() - // We are unhealthy so emit task events explaining why - if !allocHealthy { + // If deployment is unhealthy emit task events explaining why + if !allocHealthy && isDeploy { r.taskLock.RLock() for task, event := range tracker.TaskEvents() { if tr, ok := r.tasks[task]; ok { @@ -107,6 +140,13 @@ type allocHealthTracker struct { // tg is the task group we are tracking tg *structs.TaskGroup + // minHealthyTime is the duration an alloc must remain healthy to be + // considered healthy + minHealthyTime time.Duration + + // useChecks specifies whether to use Consul healh checks or not + useChecks bool + // consulCheckCount is the number of checks the task group will attempt to // register consulCheckCount int @@ -146,7 +186,8 @@ type allocHealthTracker struct { // alloc listener and consul API object are given so that the watcher can detect // health changes. func newAllocHealthTracker(parentCtx context.Context, logger *log.Logger, alloc *structs.Allocation, - allocUpdates *cstructs.AllocListener, consulClient ConsulServiceAPI) *allocHealthTracker { + allocUpdates *cstructs.AllocListener, consulClient ConsulServiceAPI, + minHealthyTime time.Duration, useChecks bool) *allocHealthTracker { a := &allocHealthTracker{ logger: logger, @@ -154,8 +195,11 @@ func newAllocHealthTracker(parentCtx context.Context, logger *log.Logger, alloc allocStopped: make(chan struct{}), alloc: alloc, tg: alloc.Job.LookupTaskGroup(alloc.TaskGroup), - allocUpdates: allocUpdates, - consulClient: consulClient, + //FIXME should i wrap all these parameters up in a struct? + minHealthyTime: minHealthyTime, + useChecks: useChecks, + allocUpdates: allocUpdates, + consulClient: consulClient, } a.taskHealth = make(map[string]*taskHealthState, len(a.tg.Tasks)) @@ -176,7 +220,7 @@ func newAllocHealthTracker(parentCtx context.Context, logger *log.Logger, alloc // Start starts the watcher. func (a *allocHealthTracker) Start() { go a.watchTaskEvents() - if a.tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks { + if a.useChecks { go a.watchConsulEvents() } } @@ -210,7 +254,9 @@ func (a *allocHealthTracker) TaskEvents() map[string]string { // Go through are task information and build the event map for task, state := range a.taskHealth { - if e, ok := state.event(deadline, a.tg.Update); ok { + //FIXME skip this for migrations? + useChecks := a.tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks + if e, ok := state.event(deadline, a.tg.Update.MinHealthyTime, useChecks); ok { events[task] = e } } @@ -227,7 +273,7 @@ func (a *allocHealthTracker) setTaskHealth(healthy, terminal bool) { // If we are marked healthy but we also require Consul to be healthy and it // isn't yet, return, unless the task is terminal - requireConsul := a.tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks && a.consulCheckCount > 0 + requireConsul := a.useChecks && a.consulCheckCount > 0 if !terminal && healthy && requireConsul && !a.checksHealthy { return } @@ -337,7 +383,7 @@ func (a *allocHealthTracker) watchTaskEvents() { // Set the timer since all tasks are started if !latestStartTime.IsZero() { allStartedTime = latestStartTime - healthyTimer.Reset(a.tg.Update.MinHealthyTime) + healthyTimer.Reset(a.minHealthyTime) } } @@ -453,7 +499,7 @@ OUTER: } primed = true - healthyTimer.Reset(a.tg.Update.MinHealthyTime) + healthyTimer.Reset(a.minHealthyTime) } } } @@ -470,7 +516,7 @@ type taskHealthState struct { // event takes the deadline time for the allocation to be healthy and the update // strategy of the group. It returns true if the task has contributed to the // allocation being unhealthy and if so, an event description of why. -func (t *taskHealthState) event(deadline time.Time, update *structs.UpdateStrategy) (string, bool) { +func (t *taskHealthState) event(deadline time.Time, minHealthyTime time.Duration, useChecks bool) (string, bool) { requireChecks := false desiredChecks := 0 for _, s := range t.task.Services { @@ -479,7 +525,7 @@ func (t *taskHealthState) event(deadline time.Time, update *structs.UpdateStrate desiredChecks += nc } } - requireChecks = requireChecks && update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks + requireChecks = requireChecks && useChecks if t.state != nil { if t.state.Failed { @@ -490,8 +536,9 @@ func (t *taskHealthState) event(deadline time.Time, update *structs.UpdateStrate } // We are running so check if we have been running long enough - if t.state.StartedAt.Add(update.MinHealthyTime).After(deadline) { - return fmt.Sprintf("Task not running for min_healthy_time of %v by deadline", update.MinHealthyTime), true + //FIXME need minHealthyTime here + if t.state.StartedAt.Add(minHealthyTime).After(deadline) { + return fmt.Sprintf("Task not running for min_healthy_time of %v by deadline", minHealthyTime), true } } diff --git a/jobspec/parse.go b/jobspec/parse.go index d6f235e05f26..4bfebc9099aa 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -285,6 +285,7 @@ func parseGroups(result *api.Job, list *ast.ObjectList) error { "update", "reschedule", "vault", + "migrate", } if err := helper.CheckHCLKeys(listVal, valid); err != nil { return multierror.Prefix(err, fmt.Sprintf("'%s' ->", n)) @@ -301,6 +302,7 @@ func parseGroups(result *api.Job, list *ast.ObjectList) error { delete(m, "ephemeral_disk") delete(m, "update") delete(m, "vault") + delete(m, "migrate") // Build the group with the basic decode var g api.TaskGroup @@ -344,6 +346,13 @@ func parseGroups(result *api.Job, list *ast.ObjectList) error { } } + // If we have a migration strategy, then parse that + if o := listVal.Filter("migrate"); len(o.Items) > 0 { + if err := parseMigrate(&g.Migrate, o); err != nil { + return multierror.Prefix(err, "migrate ->") + } + } + // Parse out meta fields. These are in HCL as a list so we need // to iterate over them and merge them. if metaO := listVal.Filter("meta"); len(metaO.Items) > 0 { @@ -1320,6 +1329,42 @@ func parseUpdate(result **api.UpdateStrategy, list *ast.ObjectList) error { return dec.Decode(m) } +func parseMigrate(result **api.MigrateStrategy, list *ast.ObjectList) error { + list = list.Elem() + if len(list.Items) > 1 { + return fmt.Errorf("only one 'migrate' block allowed") + } + + // Get our resource object + o := list.Items[0] + + var m map[string]interface{} + if err := hcl.DecodeObject(&m, o.Val); err != nil { + return err + } + + // Check for invalid keys + valid := []string{ + "max_parallel", + "health_check", + "min_healthy_time", + "healthy_deadline", + } + if err := helper.CheckHCLKeys(o.Val, valid); err != nil { + return err + } + + dec, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + DecodeHook: mapstructure.StringToTimeDurationHookFunc(), + WeaklyTypedInput: true, + Result: result, + }) + if err != nil { + return err + } + return dec.Decode(m) +} + func parsePeriodic(result **api.PeriodicConfig, list *ast.ObjectList) error { list = list.Elem() if len(list.Items) > 1 { diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index dbf1200570eb..c3989a68ca94 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -110,6 +110,12 @@ func TestParse(t *testing.T) { AutoRevert: helper.BoolToPtr(false), Canary: helper.IntToPtr(2), }, + Migrate: &api.MigrateStrategy{ + MaxParallel: helper.IntToPtr(2), + HealthCheck: helper.StringToPtr("task_states"), + MinHealthyTime: helper.TimeToPtr(11 * time.Second), + HealthyDeadline: helper.TimeToPtr(11 * time.Minute), + }, Tasks: []*api.Task{ { Name: "binstore", diff --git a/jobspec/test-fixtures/basic.hcl b/jobspec/test-fixtures/basic.hcl index 9942e3dfc34c..2b3f973aa9c4 100644 --- a/jobspec/test-fixtures/basic.hcl +++ b/jobspec/test-fixtures/basic.hcl @@ -67,6 +67,13 @@ job "binstore-storagelocker" { canary = 2 } + migrate { + max_parallel = 2 + health_check = "task_states" + min_healthy_time = "11s" + healthy_deadline = "11m" + } + task "binstore" { driver = "docker" user = "bob" diff --git a/nomad/drain.go b/nomad/drain.go new file mode 100644 index 000000000000..a1dc99972029 --- /dev/null +++ b/nomad/drain.go @@ -0,0 +1,752 @@ +package nomad + +import ( + "context" + "log" + "strings" + "sync" + "time" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +// drainingJob contains the Job and allocations for that job meant to be used +// when collecting all allocations for a job with at least one allocation on a +// draining node. +// +// This allows the MaxParallel calculation to take the entire job's allocation +// state into account. FIXME is that even useful? +type drainingJob struct { + job *structs.Job + allocs []*structs.Allocation +} + +// drainingAlloc contains a conservative deadline an alloc has to be healthy by +// before it should stopped being watched and replaced. +type drainingAlloc struct { + // LastModified+MigrateStrategy.HealthyDeadline + deadline time.Time + + // Task Group key + tgKey string +} + +func newDrainingAlloc(a *structs.Allocation, deadline time.Time) drainingAlloc { + return drainingAlloc{ + deadline: deadline, + tgKey: makeTaskGroupKey(a), + } +} + +// makeTaskGroupKey returns a unique key for an allocation's task group +func makeTaskGroupKey(a *structs.Allocation) string { + return strings.Join([]string{a.Namespace, a.JobID, a.TaskGroup}, "-") +} + +// stopAllocs tracks allocs to drain by a unique TG key +type stopAllocs struct { + perTaskGroup map[string]int + allocBatch []*structs.Allocation + + // namespace+jobid -> Job + jobBatch map[string]*structs.Job +} + +//FIXME this method does an awful lot +func (s *stopAllocs) add(j *structs.Job, a *structs.Allocation) { + // Increment the counter for how many allocs in this task group are being stopped + tgKey := makeTaskGroupKey(a) + s.perTaskGroup[tgKey]++ + + // Update the allocation + a.ModifyTime = time.Now().UnixNano() + a.DesiredStatus = structs.AllocDesiredStatusStop + + // Add alloc to the allocation batch + s.allocBatch = append(s.allocBatch, a) + + // Add job to the job batch + jobKey := strings.Join([]string{j.Namespace, j.ID}, "-") + s.jobBatch[jobKey] = j +} + +// startNodeDrainer should be called in establishLeadership by the leader. +func (s *Server) startNodeDrainer(stopCh chan struct{}) { + state := s.fsm.State() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + select { + case <-stopCh: + cancel() + case <-ctx.Done(): + } + }() + + nodes, nodesIndex, drainingAllocs, allocsIndex := initDrainer(s.logger, state) + + // Wait for a node's drain deadline to expire + var nextDeadline time.Time + for _, node := range nodes { + if nextDeadline.IsZero() { + nextDeadline = node.DrainStrategy.DeadlineTime() + continue + } + if deadline := node.DrainStrategy.DeadlineTime(); deadline.Before(nextDeadline) { + nextDeadline = deadline + } + + } + deadlineTimer := time.NewTimer(time.Until(nextDeadline)) + + // Watch for nodes to start or stop draining + nodeWatcher := newNodeWatcher(s.logger, nodes, nodesIndex, state) + go nodeWatcher.run(ctx) + + // Watch for drained allocations to be replaced + prevAllocs := newPrevAllocWatcher(s.logger, drainingAllocs, allocsIndex, state) + go prevAllocs.run(ctx) + + for { + //TODO this method of async node updates means we could make + //migration decisions on out of date information. the worst + //possible outcome of this is that an allocation could be + //stopped on a node that recently had its drain cancelled which + //doesn't seem like that bad of a pathological case + select { + case nodes = <-nodeWatcher.nodesCh: + // update draining nodes + //TODO remove allocs from draining list with node ids not in this map + s.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes)) + case drainedID := <-prevAllocs.allocsCh: + // drained alloc has been replaced + //TODO instead of modifying a view of draining allocs here created a shared map like prevallocs + delete(drainingAllocs, drainedID) + s.logger.Printf("[TRACE] nomad.drain: running due to alloc change (%s replaced)", drainedID) + case when := <-deadlineTimer.C: + // deadline for a node was reached + s.logger.Printf("[TRACE] nomad.drain: running due to deadline reached (at %s)", when) + case <-ctx.Done(): + // exit + return + } + + // Tracks nodes that are done draining + doneNodes := map[string]*structs.Node{} + + //TODO work from a state snapshot? perhaps from a last update + //index? I can't think of why this would be beneficial as this + //entire process runs asynchronously with the fsm/scheduler/etc + snapshot, err := state.Snapshot() + if err != nil { + //FIXME + panic(err) + } + now := time.Now() // for determing deadlines in a consistent way + + // namespace -> job id -> {job, allocs} + // Collect all allocs for all jobs with at least one + // alloc on a draining node. + // Invariants: + // - No system jobs + // - No batch jobs unless their node's deadline is reached + // - No entries with 0 allocs + //TODO could this be a helper method on prevAllocWatcher + drainable := map[string]map[string]*drainingJob{} + + // Collect all drainable jobs + for nodeID, node := range nodes { + allocs, err := snapshot.AllocsByNode(nil, nodeID) + if err != nil { + //FIXME + panic(err) + } + + // track number of allocs left on this node to be drained + allocsLeft := false + for _, alloc := range allocs { + if _, ok := drainable[alloc.Namespace]; !ok { + // namespace does not exist + drainable[alloc.Namespace] = make(map[string]*drainingJob) + } + + if _, ok := drainable[alloc.Namespace][alloc.JobID]; ok { + // already found + continue + } + + // job does not found yet + job, err := snapshot.JobByID(nil, alloc.Namespace, alloc.JobID) + if err != nil { + //FIXME + panic(err) + } + //TODO check for job == nil? + + // Don't bother collecting system jobs + if job.Type == structs.JobTypeSystem { + continue + } + + // If a drainable alloc isn't yet stopping this + // node has allocs left to be drained + if !alloc.TerminalStatus() { + allocsLeft = true + } + + // Don't bother collecting batch jobs for nodes that haven't hit their deadline + if job.Type == structs.JobTypeBatch && node.DrainStrategy.DeadlineTime().After(now) { + continue + } + + jobAllocs, err := snapshot.AllocsByJob(nil, alloc.Namespace, alloc.JobID, true) + if err != nil { + //FIXME + panic(err) + } + + drainable[alloc.Namespace][alloc.JobID] = &drainingJob{ + job: job, + allocs: jobAllocs, + } + } + + // if node has no allocs, it's done draining! + if !allocsLeft { + delete(nodes, nodeID) + doneNodes[nodeID] = node + } + } + + // Initialize stoplist with a count of allocs already draining per task group + //TODO wrap this up in a new func + stoplist := &stopAllocs{ + perTaskGroup: make(map[string]int, len(drainingAllocs)), + allocBatch: make([]*structs.Allocation, len(drainingAllocs)), + jobBatch: make(map[string]*structs.Job), + } + // initialize perTaskGroup to be the number of total *currently draining* allocations per task group + for _, a := range drainingAllocs { + stoplist.perTaskGroup[a.tgKey]++ + } + + // deadlineNodes is a map of node IDs that have reached their + // deadline and allocs that will be stopped due to deadline + deadlineNodes := map[string]int{} + + //TODO build drain list considering deadline & max_parallel + for _, drainingJobs := range drainable { + for _, drainingJob := range drainingJobs { + for _, alloc := range drainingJob.allocs { + // Already draining/dead allocs don't need to be drained + if alloc.TerminalStatus() { + continue + } + + node, ok := nodes[alloc.NodeID] + if !ok { + // Alloc's node is not draining so not elligible for draining! + continue + } + + if node.DrainStrategy.DeadlineTime().Before(now) { + s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + // Alloc's Node has reached its deadline + stoplist.add(drainingJob.job, alloc) + + deadlineNodes[node.ID]++ + + //FIXME purge from watchlist? + continue + } + + // Batch jobs are only stopped when the node + // deadline is reached which has already been + // done. + if drainingJob.job.Type == structs.JobTypeBatch { + continue + } + + // Stop allocs with count=1, max_parallel==0, or draining how many allocs are + // already draining for this task + // group, drain and track this alloc + tgKey := makeTaskGroupKey(alloc) + + //FIXME change this to be based off of the sum(deploymentstatus!=nil && clientstatus==running) for this task group + if tg.Migrate.MaxParallel > stoplist.perTaskGroup[tgKey] { + s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to max parallel", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + // More migrations are allowed, add to stoplist + stoplist.add(drainingJob.job, alloc) + + // Also add to prevAllocWatcher + prevAllocs.watch(alloc.ID) + } + } + } + } + + // log drains due to node deadlines + for nodeID, remaining := range deadlineNodes { + s.logger.Printf("[DEBUG] nomad.drain: node %s drain deadline reached; stopping %d remaining allocs", nodeID, remaining) + } + + if len(stoplist.allocBatch) > 0 { + s.logger.Printf("[DEBUG] nomad.drain: stopping %d alloc(s) for %d job(s)", len(stoplist.allocBatch), len(stoplist.jobBatch)) + + // Stop allocs in stoplist and add them to drainingAllocs + prevAllocWatcher + batch := &structs.AllocUpdateRequest{ + Alloc: stoplist.allocBatch, + WriteRequest: structs.WriteRequest{Region: s.config.Region}, + } + + // Commit this update via Raft + //TODO Not the right request + _, index, err := s.raftApply(structs.AllocClientUpdateRequestType, batch) + if err != nil { + //FIXME + panic(err) + } + + //TODO i bet there's something useful to do with this index + _ = index + + // Reevaluate affected jobs + evals := make([]*structs.Evaluation, 0, len(stoplist.jobBatch)) + for _, job := range stoplist.jobBatch { + evals = append(evals, &structs.Evaluation{ + ID: uuid.Generate(), + Namespace: job.Namespace, + Priority: job.Priority, + Type: job.Type, + TriggeredBy: structs.EvalTriggerNodeDrain, + JobID: job.ID, + JobModifyIndex: job.ModifyIndex, + Status: structs.EvalStatusPending, + }) + } + + evalUpdate := &structs.EvalUpdateRequest{ + Evals: evals, + WriteRequest: structs.WriteRequest{Region: s.config.Region}, + } + + // Commit this evaluation via Raft + _, _, err = s.raftApply(structs.EvalUpdateRequestType, evalUpdate) + if err != nil { + //FIXME + panic(err) + } + } + + // Unset drain for nodes done draining + for nodeID, node := range doneNodes { + args := structs.NodeUpdateDrainRequest{ + NodeID: nodeID, + Drain: false, + WriteRequest: structs.WriteRequest{Region: s.config.Region}, + } + + _, _, err := s.raftApply(structs.NodeUpdateDrainRequestType, &args) + if err != nil { + s.logger.Printf("[ERR] nomad.drain: failed to unset drain for: %v", err) + //FIXME + panic(err) + } + s.logger.Printf("[INFO] nomad.drain: node %s (%s) completed draining", nodeID, node.Name) + } + } +} + +// nodeWatcher watches for nodes to start or stop draining +type nodeWatcher struct { + index uint64 + nodes map[string]*structs.Node + nodesCh chan map[string]*structs.Node + state *state.StateStore + logger *log.Logger +} + +func newNodeWatcher(logger *log.Logger, nodes map[string]*structs.Node, index uint64, state *state.StateStore) *nodeWatcher { + return &nodeWatcher{ + nodes: nodes, + nodesCh: make(chan map[string]*structs.Node), + index: index, + state: state, + logger: logger, + } +} + +func (n *nodeWatcher) run(ctx context.Context) { + // Trigger an initial drain pass if there are already nodes draining + //FIXME this is unneccessary if a node has reached a deadline + n.logger.Printf("[TRACE] nomad.drain: initial draining nodes: %d", len(n.nodes)) + if len(n.nodes) > 0 { + n.nodesCh <- n.nodes + } + + for { + //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? + resp, index, err := n.state.BlockingQuery(n.queryNodeDrain, n.index, ctx) + if err != nil { + if err == context.Canceled { + n.logger.Printf("[TRACE] nomad.drain: draining node watcher shutting down") + return + } + n.logger.Printf("[ERR] nomad.drain: error blocking on node updates at index %d: %v", n.index, err) + return + } + + // update index for next run + n.index = index + + changed := false + newNodes := resp.([]*structs.Node) + n.logger.Printf("[TRACE] nomad.drain: %d nodes to consider", len(newNodes)) //FIXME remove + for _, newNode := range newNodes { + if _, ok := n.nodes[newNode.ID]; ok { + // Node was draining + if !newNode.Drain { + // Node stopped draining + delete(n.nodes, newNode.ID) + changed = true + } else { + // Update deadline + n.nodes[newNode.ID] = newNode + //FIXME set changed if it changed? + //changed = true + } + } else { + // Node was not draining + if newNode.Drain { + // Node started draining + n.nodes[newNode.ID] = newNode + changed = true + } + } + } + + // Send a copy of the draining nodes if there were changes + if !changed { + continue + } + + nodesCopy := make(map[string]*structs.Node, len(n.nodes)) + for k, v := range n.nodes { + nodesCopy[k] = v + } + + select { + case n.nodesCh <- nodesCopy: + case <-ctx.Done(): + return + } + } +} + +func (n *nodeWatcher) queryNodeDrain(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + iter, err := state.Nodes(ws) + if err != nil { + return nil, 0, err + } + + index, err := state.Index("nodes") + if err != nil { + return nil, 0, err + } + + //FIXME initial cap? + resp := make([]*structs.Node, 0, 1) + + for { + raw := iter.Next() + if raw == nil { + break + } + + node := raw.(*structs.Node) + resp = append(resp, node) + } + + return resp, index, nil +} + +// prevAllocWatcher monitors allocation updates for allocations which replace +// draining allocations. +type prevAllocWatcher struct { + // watchList is a map of alloc ids to look for in PreviousAllocation + // fields of new allocs + watchList map[string]struct{} + watchListMu sync.Mutex + + state *state.StateStore + + // allocIndex to start watching from + allocIndex uint64 + + // allocsCh is sent Allocation.IDs as they're removed from the watchList + allocsCh chan string + + logger *log.Logger +} + +// newPrevAllocWatcher creates a new prevAllocWatcher watching drainingAllocs +// from allocIndex in the state store. Must call run to start watching. +func newPrevAllocWatcher(logger *log.Logger, drainingAllocs map[string]drainingAlloc, allocIndex uint64, + state *state.StateStore) *prevAllocWatcher { + + watchList := make(map[string]struct{}, len(drainingAllocs)) + for allocID := range drainingAllocs { + watchList[allocID] = struct{}{} + } + + return &prevAllocWatcher{ + watchList: watchList, + state: state, + allocIndex: allocIndex, + allocsCh: make(chan string, 8), //FIXME 8? really? what should this be + logger: logger, + } +} + +// watch for an allocation ID to be replaced. +func (p *prevAllocWatcher) watch(allocID string) { + p.watchListMu.Lock() + defer p.watchListMu.Unlock() + p.watchList[allocID] = struct{}{} +} + +// run the prevAllocWatcher and send replaced draining alloc IDs on allocsCh. +func (p *prevAllocWatcher) run(ctx context.Context) { + // index to watch from + var resp interface{} + var err error + + for { + //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? + resp, p.allocIndex, err = p.state.BlockingQuery(p.queryPrevAlloc, p.allocIndex, ctx) + if err != nil { + if err == context.Canceled { + p.logger.Printf("[TRACE] nomad.drain: previous allocation watcher shutting down") + return + } + p.logger.Printf("[ERR] nomad.drain: error blocking on alloc updates: %v", err) + return + } + + allocIDs := resp.([]string) + for _, id := range allocIDs { + select { + case p.allocsCh <- id: + case <-ctx.Done(): + return + } + } + } +} + +// queryPrevAlloc is the BlockingQuery func for scanning for replacement allocs +func (p *prevAllocWatcher) queryPrevAlloc(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + iter, err := state.Allocs(ws) + if err != nil { + return nil, 0, err + } + + index, err := state.Index("allocs") + if err != nil { + return nil, 0, err + } + + //FIXME do fine grained locking around watclist mutations? + p.watchListMu.Lock() + defer p.watchListMu.Unlock() + + resp := make([]string, 0, len(p.watchList)) + + for { + raw := iter.Next() + if raw == nil { + break + } + + alloc := raw.(*structs.Allocation) + _, ok := p.watchList[alloc.PreviousAllocation] + if !ok { + // PreviousAllocation not in watchList, skip it + continue + } + + // If the migration health is set on the replacement alloc we can stop watching the drained alloc + if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { + delete(p.watchList, alloc.PreviousAllocation) + resp = append(resp, alloc.PreviousAllocation) + } + } + + return resp, index, nil +} + +// initDrainer initializes the node drainer state and returns a list of +// draining nodes as well as allocs that are draining that should be watched +// for a replacement. +func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*structs.Node, uint64, map[string]drainingAlloc, uint64) { + // StateStore.Snapshot never returns an error so don't bother checking it + snapshot, _ := state.Snapshot() + now := time.Now() + + iter, err := snapshot.Nodes(nil) + if err != nil { + logger.Printf("[ERR] nomad.drain: error iterating nodes: %v", err) + panic(err) //FIXME + } + + // map of draining nodes keyed by node ID + nodes := map[string]*structs.Node{} + + //FIXME rollup by composite namespace+job.ID+tg key? + // List of draining allocs by namespace and job: namespace -> job.ID -> alloc.ID -> *Allocation + allocsByNS := map[string]map[string]map[string]*structs.Allocation{} + + for { + raw := iter.Next() + if raw == nil { + break + } + + // Filter on datacenter and status + node := raw.(*structs.Node) + if !node.Drain { + continue + } + + // Track draining node + nodes[node.ID] = node + + // No point in tracking draining allocs as the deadline has been reached + if node.DrainStrategy.DeadlineTime().Before(now) { + continue + } + + allocs, err := snapshot.AllocsByNode(nil, node.ID) + if err != nil { + logger.Printf("[ERR] nomad.drain: error iterating allocs for node %q: %v", node.ID, err) + panic(err) //FIXME + } + + for _, alloc := range allocs { + //FIXME is it safe to assume the drainer set the desired status to stop? + if alloc.DesiredStatus == structs.AllocDesiredStatusStop { + if allocsByJob, ok := allocsByNS[alloc.Namespace]; ok { + if allocs, ok := allocsByJob[alloc.JobID]; ok { + allocs[alloc.ID] = alloc + } else { + // First alloc for job + allocsByJob[alloc.JobID] = map[string]*structs.Allocation{alloc.ID: alloc} + } + } else { + // First alloc in namespace + allocsByNS[alloc.Namespace] = map[string]map[string]*structs.Allocation{ + alloc.JobID: map[string]*structs.Allocation{alloc.ID: alloc}, + } + } + } + } + } + + // drainingAllocs is the list of all allocations that are currently + // draining and waiting for a replacement + drainingAllocs := map[string]drainingAlloc{} + + for ns, allocsByJobs := range allocsByNS { + for jobID, allocs := range allocsByJobs { + for allocID, alloc := range allocs { + job, err := snapshot.JobByID(nil, ns, jobID) + if err != nil { + logger.Printf("[ERR] nomad.drain: error getting job %q for alloc %q: %v", alloc.JobID, allocID, err) + //FIXME + panic(err) + } + + // Don't track drains for stopped or gc'd jobs + if job == nil || job.Status == structs.JobStatusDead { + continue + } + + jobAllocs, err := snapshot.AllocsByJob(nil, ns, jobID, true) + if err != nil { + //FIXME + panic(err) + } + + // Remove drained allocs for replacement allocs + for _, alloc := range jobAllocs { + if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { + delete(allocs, alloc.PreviousAllocation) + } + } + + //FIXME why are we doing a nested loop over allocs? + // Any remaining allocs need to be tracked + for allocID, alloc := range allocs { + tg := job.LookupTaskGroup(alloc.TaskGroup) + if tg == nil { + logger.Printf("[DEBUG] nomad.drain: unable to find task group %q for alloc %q", alloc.TaskGroup, allocID) + continue + } + + if tg.Migrate == nil { + // No migrate strategy so don't track + continue + } + + //FIXME Remove this? ModifyTime is not updated as expected + + // alloc.ModifyTime + HealthyDeadline is >= the + // healthy deadline for the allocation, so we + // can stop tracking it at that time. + deadline := time.Unix(0, alloc.ModifyTime).Add(tg.Migrate.HealthyDeadline) + + if deadline.After(now) { + // deadline already reached; don't bother tracking + continue + } + + // Draining allocation hasn't been replaced or + // reached its deadline; track it! + drainingAllocs[allocID] = newDrainingAlloc(alloc, deadline) + } + } + } + } + + nodesIndex, _ := snapshot.Index("nodes") + if nodesIndex == 0 { + nodesIndex = 1 + } + allocsIndex, _ := snapshot.Index("allocs") + if allocsIndex == 0 { + allocsIndex = 1 + } + return nodes, nodesIndex, drainingAllocs, allocsIndex +} diff --git a/nomad/drain_test.go b/nomad/drain_test.go new file mode 100644 index 000000000000..bf1ec875de3a --- /dev/null +++ b/nomad/drain_test.go @@ -0,0 +1,216 @@ +package nomad + +import ( + "fmt" + "sort" + "strings" + "testing" + "time" + + msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" + "github.com/hashicorp/nomad/client" + "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + "github.com/hashicorp/nomad/testutil/rpcapi" + "github.com/stretchr/testify/require" +) + +// TestNodeDrainer_SimpleDrain asserts that draining when there are two nodes +// moves allocs from the draining node to the other node. +func TestNodeDrainer_SimpleDrain(t *testing.T) { + require := require.New(t) + server := TestServer(t, nil) + defer server.Shutdown() + + testutil.WaitForLeader(t, server.RPC) + + // Setup 2 Nodes: A & B; A has allocs and is draining + + // Create mock jobs + state := server.fsm.State() + + serviceJob := mock.Job() + serviceJob.Name = "service-job" + serviceJob.Type = structs.JobTypeService + serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{ + MaxParallel: 1, + HealthCheck: structs.MigrateStrategyHealthStates, + MinHealthyTime: time.Millisecond, + HealthyDeadline: 2 * time.Second, + } + serviceJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" + serviceJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() + serviceJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ + "run_for": "10m", + "kill_after": "1ms", + } + serviceJob.TaskGroups[0].Tasks[0].Services = nil + + systemJob := mock.SystemJob() + systemJob.Name = "system-job" + systemJob.Type = structs.JobTypeSystem + //FIXME hack until system job reschedule policy validation is fixed + systemJob.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: time.Minute} + systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" + systemJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ + "run_for": "10m", + "kill_after": "1ms", + } + systemJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() + systemJob.TaskGroups[0].Tasks[0].Services = nil + + batchJob := mock.Job() + batchJob.Name = "batch-job" + batchJob.Type = structs.JobTypeBatch + batchJob.TaskGroups[0].Name = "batch-group" + batchJob.TaskGroups[0].Migrate = nil + batchJob.TaskGroups[0].Tasks[0].Name = "batch-task" + batchJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" + batchJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ + "run_for": "10m", + "kill_after": "1ms", + "exit_code": 13, // set nonzero exit code to cause rescheduling + } + batchJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() + batchJob.TaskGroups[0].Tasks[0].Services = nil + + // Start node 1 + c1 := client.TestClient(t, func(conf *config.Config) { + conf.LogOutput = testlog.NewWriter(t) + conf.Servers = []string{server.config.RPCAddr.String()} + }) + defer c1.Shutdown() + + // Start jobs so they all get placed on node 1 + codec := rpcClient(t, server) + for _, job := range []*structs.Job{systemJob, serviceJob, batchJob} { + req := &structs.JobRegisterRequest{ + Job: job.Copy(), + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: job.Namespace, + }, + } + + // Fetch the response + var resp structs.JobRegisterResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp)) + require.NotZero(resp.Index) + } + + // Wait for jobs to start on c1 + rpc := rpcapi.NewRPC(codec) + testutil.WaitForResult(func() (bool, error) { + resp, err := rpc.NodeGetAllocs(c1.NodeID()) + if err != nil { + return false, err + } + + system, batch, service := 0, 0, 0 + for _, alloc := range resp.Allocs { + if alloc.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("alloc %s for job %s not running: %s", alloc.ID, alloc.Job.Name, alloc.ClientStatus) + } + switch alloc.JobID { + case batchJob.ID: + batch++ + case serviceJob.ID: + service++ + case systemJob.ID: + system++ + } + } + // 1 system + 10 batch + 10 service = 21 + if system+batch+service != 21 { + return false, fmt.Errorf("wrong number of allocs: system %d/1, batch %d/10, service %d/10", system, batch, service) + } + return true, nil + }, func(err error) { + if resp, err := rpc.NodeGetAllocs(c1.NodeID()); err == nil { + for i, alloc := range resp.Allocs { + t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus) + } + } + t.Fatalf("failed waiting for all allocs to start: %v", err) + }) + + // Start draining node 1 + //FIXME update drain rpc to skip fsm manipulation and use api + node, err := state.NodeByID(nil, c1.NodeID()) + require.Nil(err) + require.Nil(state.UpdateNodeDrain(node.ModifyIndex+1, node.ID, true)) + + // Start node 2 + c2 := client.TestClient(t, func(conf *config.Config) { + conf.NetworkSpeed = 10000 + conf.Servers = []string{server.config.RPCAddr.String()} + }) + defer c2.Shutdown() + + // Wait for services to be migrated + testutil.WaitForResult(func() (bool, error) { + resp, err := rpc.NodeGetAllocs(c2.NodeID()) + if err != nil { + return false, err + } + + system, batch, service := 0, 0, 0 + for _, alloc := range resp.Allocs { + if alloc.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("alloc %s for job %s not running: %s", alloc.ID, alloc.Job.Name, alloc.ClientStatus) + } + switch alloc.JobID { + case batchJob.ID: + batch++ + case serviceJob.ID: + service++ + case systemJob.ID: + system++ + } + } + // 1 system + 10 batch + 10 service = 21 + if system+batch+service != 21 { + return false, fmt.Errorf("wrong number of allocs: system %d/1, batch %d/10, service %d/10", system, batch, service) + } + return true, nil + }, func(err error) { + if resp, err := rpc.NodeGetAllocs(c2.NodeID()); err == nil { + for i, alloc := range resp.Allocs { + t.Logf("%d alloc %s job %s status %s prev %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus, alloc.PreviousAllocation) + } + } + t.Fatalf("failed waiting for all allocs to start: %v", err) + }) + + // Wait for all service allocs to be replaced + jobs, err := rpc.JobList() + require.Nil(err) + t.Logf("%d jobs", len(jobs.Jobs)) + for _, job := range jobs.Jobs { + t.Logf("job: %s status: %s %s", job.Name, job.Status, job.StatusDescription) + } + + allocs, err := rpc.AllocAll() + require.Nil(err) + + sort.Slice(allocs, func(i, j int) bool { + r := strings.Compare(allocs[i].Job.Name, allocs[j].Job.Name) + switch { + case r < 0: + return true + case r == 0: + return allocs[i].ModifyIndex < allocs[j].ModifyIndex + case r > 0: + return false + } + panic("unreachable") + }) + + t.Logf("%d allocs", len(allocs)) + for _, alloc := range allocs { + t.Logf("job: %s node: %s alloc: %s desired: %s actual: %s replaces: %s", alloc.Job.Name, alloc.NodeID[:6], alloc.ID, alloc.DesiredStatus, alloc.ClientStatus, alloc.PreviousAllocation) + } +} diff --git a/nomad/leader.go b/nomad/leader.go index 51aa737b3099..b81b65d23232 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -267,6 +267,9 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { go s.replicateACLTokens(stopCh) } + // Start Node Drainer + go s.startNodeDrainer(stopCh) + // Setup any enterprise systems required. if err := s.establishEnterpriseLeadership(stopCh); err != nil { return err diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 3a8588b9cbad..1d96e556b39b 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -97,6 +97,7 @@ func Job() *structs.Job { Delay: 5 * time.Second, DelayFunction: "linear", }, + Migrate: structs.DefaultMigrateStrategy(), Tasks: []*structs.Task{ { Name: "web", diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 12fffbce2a5e..3ef43ccf6903 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -87,6 +87,11 @@ func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUp return fmt.Errorf("invalid status for node") } + // Default to eligible for scheduling if unset + if args.Node.SchedulingEligibility == "" { + args.Node.SchedulingEligibility = structs.NodeSchedulingEligible + } + // Set the timestamp when the node is registered args.Node.StatusUpdatedAt = time.Now().Unix() diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 8e988232318d..089af0f5853a 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -415,7 +415,10 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri return false, "node does not exist", nil } else if node.Status != structs.NodeStatusReady { return false, "node is not ready for placements", nil + } else if node.SchedulingEligibility == structs.NodeSchedulingIneligible { + return false, "node is not eligible for draining", nil } else if node.Drain { + // Deprecate in favor of scheduling eligibility and remove post-0.8 return false, "node is draining", nil } diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 6156a3c75020..67a02f348976 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -6,6 +6,7 @@ import ( "io" "log" "sort" + "time" "github.com/hashicorp/go-memdb" multierror "github.com/hashicorp/go-multierror" @@ -635,6 +636,17 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er // Update the drain in the copy copyNode.Drain = drain + //FIXME + if drain { + copyNode.DrainStrategy = &structs.DrainStrategy{ + StartTime: time.Now().UnixNano(), + Deadline: 10 * time.Second, + } + copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible + } else { + copyNode.DrainStrategy = nil + copyNode.SchedulingEligibility = structs.NodeSchedulingEligible + } copyNode.ModifyIndex = index // Insert the node diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 54c89fb95e20..68975ec69963 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1142,6 +1142,7 @@ const ( // ShouldDrainNode checks if a given node status should trigger an // evaluation. Some states don't require any further action. +//TODO(schmichael) Update for drainv2?! func ShouldDrainNode(status string) bool { switch status { case NodeStatusInit, NodeStatusReady: @@ -1163,6 +1164,44 @@ func ValidNodeStatus(status string) bool { } } +const ( + NodeSchedulingEligible = "eligbile" + NodeSchedulingIneligible = "ineligible" +) + +// DrainStrategy describes a Node's drain behavior. +type DrainStrategy struct { + // StartTime as nanoseconds since Unix epoch indicating when a drain + // began for deadline calcuations. + StartTime int64 + + // Deadline is the duration after StartTime when the remaining + // allocations on a draining Node should be told to stop. + Deadline time.Duration +} + +func (d *DrainStrategy) Copy() *DrainStrategy { + if d == nil { + return nil + } + + nd := new(DrainStrategy) + *nd = *d + return nd +} + +// DeadlineTime returns the Time this drain's deadline will be reached or the +// zero value for Time if DrainStrategy is nil or Duration is <= 0. +func (d *DrainStrategy) DeadlineTime() time.Time { + if d == nil { + return time.Time{} + } + if d.Deadline <= 0 { + return time.Time{} + } + return time.Unix(0, d.StartTime).Add(d.Deadline) +} + // Node is a representation of a schedulable client node type Node struct { // ID is a unique identifier for the node. It can be constructed @@ -1224,9 +1263,18 @@ type Node struct { // Drain is controlled by the servers, and not the client. // If true, no jobs will be scheduled to this node, and existing - // allocations will be drained. + // allocations will be drained. Superceded by DrainStrategy in Nomad + // 0.8 but kept for backward compat. Drain bool + // DrainStrategy determines the node's draining behavior. Will be nil + // when Drain=false. + DrainStrategy *DrainStrategy + + // SchedulingEligibility determines whether this node will receive new + // placements. + SchedulingEligibility string + // Status of this node Status string @@ -1249,9 +1297,10 @@ type Node struct { ModifyIndex uint64 } -// Ready returns if the node is ready for running allocations +// Ready returns true if the node is ready for running allocations func (n *Node) Ready() bool { - return n.Status == NodeStatusReady && !n.Drain + // Drain is checked directly to support pre-0.8 Node data + return n.Status == NodeStatusReady && !n.Drain && n.SchedulingEligibility == NodeSchedulingEligible } func (n *Node) Copy() *Node { @@ -1261,6 +1310,7 @@ func (n *Node) Copy() *Node { nn := new(Node) *nn = *n nn.Attributes = helper.CopyMapStringString(nn.Attributes) + nn.DrainStrategy = nn.DrainStrategy.Copy() nn.Resources = nn.Resources.Copy() nn.Reserved = nn.Reserved.Copy() nn.Links = helper.CopyMapStringString(nn.Links) @@ -1300,34 +1350,36 @@ func (n *Node) Stub() *NodeListStub { addr, _, _ := net.SplitHostPort(n.HTTPAddr) return &NodeListStub{ - Address: addr, - ID: n.ID, - Datacenter: n.Datacenter, - Name: n.Name, - NodeClass: n.NodeClass, - Version: n.Attributes["nomad.version"], - Drain: n.Drain, - Status: n.Status, - StatusDescription: n.StatusDescription, - CreateIndex: n.CreateIndex, - ModifyIndex: n.ModifyIndex, + Address: addr, + ID: n.ID, + Datacenter: n.Datacenter, + Name: n.Name, + NodeClass: n.NodeClass, + Version: n.Attributes["nomad.version"], + Drain: n.Drain, + SchedulingEligibility: n.SchedulingEligibility, + Status: n.Status, + StatusDescription: n.StatusDescription, + CreateIndex: n.CreateIndex, + ModifyIndex: n.ModifyIndex, } } // NodeListStub is used to return a subset of job information // for the job list type NodeListStub struct { - Address string - ID string - Datacenter string - Name string - NodeClass string - Version string - Drain bool - Status string - StatusDescription string - CreateIndex uint64 - ModifyIndex uint64 + Address string + ID string + Datacenter string + Name string + NodeClass string + Version string + Drain bool + SchedulingEligibility string + Status string + StatusDescription string + CreateIndex uint64 + ModifyIndex uint64 } // Networks defined for a task on the Resources struct. @@ -2898,6 +2950,64 @@ func NewReschedulePolicy(jobType string) *ReschedulePolicy { return nil } +const ( + MigrateStrategyHealthChecks = "checks" + MigrateStrategyHealthStates = "task_states" +) + +type MigrateStrategy struct { + MaxParallel int + HealthCheck string + MinHealthyTime time.Duration + HealthyDeadline time.Duration +} + +// DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations +// that lack an update strategy. +// +// This function should match its counterpart in api/tasks.go +func DefaultMigrateStrategy() *MigrateStrategy { + return &MigrateStrategy{ + MaxParallel: 1, + HealthCheck: MigrateStrategyHealthChecks, + MinHealthyTime: 10 * time.Second, + HealthyDeadline: 5 * time.Minute, + } +} + +func (m *MigrateStrategy) Validate() error { + var mErr multierror.Error + + if m.MaxParallel < 0 { + multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) + } + + switch m.HealthCheck { + case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: + // ok + case "": + if m.MaxParallel > 0 { + multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) + } + default: + multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) + } + + if m.MinHealthyTime < 0 { + multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) + } + + if m.HealthyDeadline < 0 { + multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) + } + + if m.MinHealthyTime > m.HealthyDeadline { + multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) + } + + return mErr.ErrorOrNil() +} + // TaskGroup is an atomic unit of placement. Each task group belongs to // a job and may contain any number of tasks. A task group support running // in many replicas using the same configuration.. @@ -2912,6 +3022,9 @@ type TaskGroup struct { // Update is used to control the update strategy for this task group Update *UpdateStrategy + // Migrate is used to control the migration strategy for this task group + Migrate *MigrateStrategy + // Constraints can be specified at a task group level and apply to // all the tasks contained. Constraints []*Constraint @@ -3059,6 +3172,20 @@ func (tg *TaskGroup) Validate(j *Job) error { } } + // Validate the migration strategy + switch j.Type { + case JobTypeService: + if tg.Count == 1 && tg.Migrate != nil { + mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should not have a migration strategy with a count = 1", tg.Name)) + } else if err := tg.Migrate.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } + default: + if tg.Migrate != nil { + mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) + } + } + // Check for duplicate tasks, that there is only leader task if any, // and no duplicated static ports tasks := make(map[string]int) @@ -5837,6 +5964,7 @@ const ( EvalTriggerJobRegister = "job-register" EvalTriggerJobDeregister = "job-deregister" EvalTriggerPeriodicJob = "periodic-job" + EvalTriggerNodeDrain = "node-drain" EvalTriggerNodeUpdate = "node-update" EvalTriggerScheduled = "scheduled" EvalTriggerRollingUpdate = "rolling-update" diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 94dbc8a4b60f..32758359b8c4 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -117,8 +117,9 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error { // Verify the evaluation trigger reason is understood switch eval.TriggeredBy { - case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate, - structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate, + case structs.EvalTriggerJobRegister, structs.EvalTriggerJobDeregister, + structs.EvalTriggerNodeDrain, structs.EvalTriggerNodeUpdate, + structs.EvalTriggerRollingUpdate, structs.EvalTriggerPeriodicJob, structs.EvalTriggerMaxPlans, structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerRetryFailedAlloc: default: diff --git a/scheduler/util.go b/scheduler/util.go index 17b7942accda..3417356014b6 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -249,6 +249,9 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]int if node.Drain { continue } + if node.SchedulingEligibility != structs.NodeSchedulingEligible { + continue + } if _, ok := dcMap[node.Datacenter]; !ok { continue } @@ -315,9 +318,10 @@ func taintedNodes(state State, allocs []*structs.Allocation) (map[string]*struct out[alloc.NodeID] = nil continue } - if structs.ShouldDrainNode(node.Status) || node.Drain { - out[alloc.NodeID] = node - } + //FIXME is this right? + //if structs.ShouldDrainNode(node.Status) || node.Drain { + // out[alloc.NodeID] = node + //} } return out, nil } diff --git a/testutil/rpcapi/rcpapi.go b/testutil/rpcapi/rcpapi.go new file mode 100644 index 000000000000..71e5be057ea0 --- /dev/null +++ b/testutil/rpcapi/rcpapi.go @@ -0,0 +1,114 @@ +package rpcapi + +import ( + "net/rpc" + + msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" + "github.com/hashicorp/nomad/nomad/structs" +) + +type RPC struct { + Region string + Namespace string + codec rpc.ClientCodec +} + +func NewRPC(codec rpc.ClientCodec) *RPC { + return &RPC{ + Region: "global", + Namespace: structs.DefaultNamespace, + codec: codec, + } +} + +// AllocAll calls Alloc.List + Alloc.GetAllocs to return all allocs. +func (r *RPC) AllocAll() ([]*structs.Allocation, error) { + listResp, err := r.AllocList() + if err != nil { + return nil, err + } + + ids := make([]string, 0, len(listResp.Allocations)) + for _, a := range listResp.Allocations { + ids = append(ids, a.ID) + } + + allocsResp, err := r.AllocGetAllocs(ids) + if err != nil { + return nil, err + } + return allocsResp.Allocs, nil +} + +// Alloc.List RPC +func (r *RPC) AllocList() (*structs.AllocListResponse, error) { + get := &structs.AllocListRequest{ + QueryOptions: structs.QueryOptions{ + Region: r.Region, + Namespace: r.Namespace, + }, + } + + var resp structs.AllocListResponse + if err := msgpackrpc.CallWithCodec(r.codec, "Alloc.List", get, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +// Alloc.GetAllocs RPC +func (r *RPC) AllocGetAllocs(ids []string) (*structs.AllocsGetResponse, error) { + get := &structs.AllocsGetRequest{ + AllocIDs: ids, + QueryOptions: structs.QueryOptions{ + Region: r.Region, + Namespace: r.Namespace, + }, + } + var resp structs.AllocsGetResponse + if err := msgpackrpc.CallWithCodec(r.codec, "Alloc.GetAllocs", get, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +// Job.List RPC +func (r *RPC) JobList() (*structs.JobListResponse, error) { + get := &structs.JobListRequest{ + QueryOptions: structs.QueryOptions{ + Region: r.Region, + Namespace: r.Namespace, + }, + } + + var resp structs.JobListResponse + if err := msgpackrpc.CallWithCodec(r.codec, "Job.List", get, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +// Node.List RPC +func (r *RPC) NodeList() (*structs.NodeListResponse, error) { + get := &structs.NodeListRequest{ + QueryOptions: structs.QueryOptions{Region: r.Region}, + } + var resp structs.NodeListResponse + if err := msgpackrpc.CallWithCodec(r.codec, "Node.List", get, &resp); err != nil { + return nil, err + } + return &resp, nil +} + +// Node.GetAllocs RPC +func (r *RPC) NodeGetAllocs(nodeID string) (*structs.NodeAllocsResponse, error) { + get := &structs.NodeSpecificRequest{ + NodeID: nodeID, + QueryOptions: structs.QueryOptions{Region: r.Region}, + } + var resp structs.NodeAllocsResponse + if err := msgpackrpc.CallWithCodec(r.codec, "Node.GetAllocs", get, &resp); err != nil { + return nil, err + } + return &resp, nil +} From 587d4e264b5c41a72e682e5dffaecadbc9259b0f Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 16 Feb 2018 15:07:49 -0800 Subject: [PATCH 02/79] testlog: override testlogger with envvar --- helper/testlog/testlog.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/helper/testlog/testlog.go b/helper/testlog/testlog.go index b72fcfb28bef..709bd9d54745 100644 --- a/helper/testlog/testlog.go +++ b/helper/testlog/testlog.go @@ -6,8 +6,14 @@ package testlog import ( "io" "log" + "os" ) +// UseStdout returns true if NOMAD_TEST_STDOUT=1 and sends logs to stdout. +func UseStdout() bool { + return os.Getenv("NOMAD_TEST_STDOUT") == "1" +} + // LogPrinter is the methods of testing.T (or testing.B) needed by the test // logger. type LogPrinter interface { @@ -27,11 +33,17 @@ func (w *writer) Write(p []byte) (n int, err error) { // NewWriter creates a new io.Writer backed by a Logger. func NewWriter(t LogPrinter) io.Writer { + if UseStdout() { + return os.Stdout + } return &writer{t} } // New returns a new test logger. See https://golang.org/pkg/log/#New func New(t LogPrinter, prefix string, flag int) *log.Logger { + if UseStdout() { + return log.New(os.Stdout, prefix, flag) + } return log.New(&writer{t}, prefix, flag) } From 91e8fd098f6614bb333db3e9b96d3870c055ee67 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 19 Feb 2018 17:14:54 -0800 Subject: [PATCH 03/79] mock_driver: improve Kill() logging --- client/driver/mock_driver.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/driver/mock_driver.go b/client/driver/mock_driver.go index 09a86f72deda..ffa6b09774ef 100644 --- a/client/driver/mock_driver.go +++ b/client/driver/mock_driver.go @@ -379,7 +379,7 @@ func (h *mockDriverHandle) Signal(s os.Signal) error { // Kill kills a mock task func (h *mockDriverHandle) Kill() error { - h.logger.Printf("[DEBUG] driver.mock: killing task %q after kill timeout: %v", h.taskName, h.killTimeout) + h.logger.Printf("[DEBUG] driver.mock: killing task %q after %s or kill timeout: %v", h.taskName, h.killAfter, h.killTimeout) select { case <-h.doneCh: case <-time.After(h.killAfter): From 48d637dad191075f2da6a1d1d72810f10ca7442b Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 21 Feb 2018 10:58:04 -0800 Subject: [PATCH 04/79] RPC, FSM, State Store for marking DesiredTransistion fix build tag --- api/allocations.go | 10 +++ client/driver/mock_driver_testing.go | 2 +- nomad/alloc_endpoint.go | 33 ++++++++ nomad/alloc_endpoint_test.go | 58 ++++++++++++++ nomad/fsm.go | 18 +++++ nomad/fsm_test.go | 42 +++++++++++ nomad/mock/mock.go | 5 +- nomad/node_endpoint.go | 2 +- nomad/state/state_store.go | 57 ++++++++++++++ nomad/state/state_store_test.go | 52 +++++++++++++ nomad/structs/structs.go | 37 +++++++++ scheduler/generic_sched_test.go | 108 ++++----------------------- scheduler/reconcile.go | 1 + scheduler/reconcile_test.go | 8 ++ scheduler/reconcile_util.go | 5 +- scheduler/system_sched.go | 2 +- scheduler/system_sched_test.go | 4 + scheduler/testing.go | 8 +- scheduler/util.go | 37 +++++---- scheduler/util_test.go | 7 ++ 20 files changed, 379 insertions(+), 117 deletions(-) diff --git a/api/allocations.go b/api/allocations.go index 68047ee5b462..89206dadee0b 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -81,6 +81,7 @@ type Allocation struct { Metrics *AllocationMetric DesiredStatus string DesiredDescription string + DesiredTransistion DesiredTransistion ClientStatus string ClientDescription string TaskStates map[string]*TaskState @@ -205,3 +206,12 @@ type RescheduleEvent struct { // PrevNodeID is the node ID of the previous allocation PrevNodeID string } + +// DesiredTransistion is used to mark an allocation as having a desired state +// transistion. This information can be used by the scheduler to make the +// correct decision. +type DesiredTransistion struct { + // Migrate is used to indicate that this allocation should be stopped and + // migrated to another node. + Migrate *bool +} diff --git a/client/driver/mock_driver_testing.go b/client/driver/mock_driver_testing.go index 1b1e861a8915..8a712205e4aa 100644 --- a/client/driver/mock_driver_testing.go +++ b/client/driver/mock_driver_testing.go @@ -1,4 +1,4 @@ -//+build nomad_test +// +build nomad_test package driver diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index 033a1a0103aa..a7f5e3bdc2ac 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -1,6 +1,7 @@ package nomad import ( + "fmt" "time" "github.com/armon/go-metrics" @@ -200,3 +201,35 @@ func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest, } return a.srv.blockingRPC(&opts) } + +// UpdateDesiredTransistion is used to update the desired transistions of an +// allocation. +func (a *Alloc) UpdateDesiredTransistion(args *structs.AllocUpdateDesiredTransistionRequest, reply *structs.GenericResponse) error { + if done, err := a.srv.forward("Alloc.UpdateDesiredTransistion", args, args, reply); done { + return err + } + defer metrics.MeasureSince([]string{"nomad", "alloc", "update_desired_transistion"}, time.Now()) + + // Check that it is a management token. + if aclObj, err := a.srv.ResolveToken(args.AuthToken); err != nil { + return err + } else if aclObj != nil && !aclObj.IsManagement() { + return structs.ErrPermissionDenied + } + + // Ensure at least a single alloc + if len(args.Allocs) == 0 { + return fmt.Errorf("must update at least one allocation") + } + + // Commit this update via Raft + _, index, err := a.srv.raftApply(structs.AllocUpdateDesiredTransistionRequestType, args) + if err != nil { + a.srv.logger.Printf("[ERR] nomad.allocs: AllocUpdateDesiredTransistionRequest failed: %v", err) + return err + } + + // Setup the response + reply.Index = index + return nil +} diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go index abb36178681c..f898f2b7dd9f 100644 --- a/nomad/alloc_endpoint_test.go +++ b/nomad/alloc_endpoint_test.go @@ -7,11 +7,13 @@ import ( "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/acl" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestAllocEndpoint_List(t *testing.T) { @@ -481,3 +483,59 @@ func TestAllocEndpoint_GetAllocs_Blocking(t *testing.T) { t.Fatalf("bad: %#v", resp.Allocs) } } + +func TestAllocEndpoint_UpdateDesiredTransistion(t *testing.T) { + t.Parallel() + require := require.New(t) + + s1, _ := TestACLServer(t, nil) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + alloc := mock.Alloc() + alloc2 := mock.Alloc() + state := s1.fsm.State() + require.Nil(state.UpsertJobSummary(998, mock.JobSummary(alloc.JobID))) + require.Nil(state.UpsertJobSummary(999, mock.JobSummary(alloc2.JobID))) + require.Nil(state.UpsertAllocs(1000, []*structs.Allocation{alloc, alloc2})) + + t1 := &structs.DesiredTransistion{ + Migrate: helper.BoolToPtr(true), + } + + // Update the allocs desired status + get := &structs.AllocUpdateDesiredTransistionRequest{ + Allocs: map[string]*structs.DesiredTransistion{ + alloc.ID: t1, + alloc2.ID: t1, + }, + WriteRequest: structs.WriteRequest{ + Region: "global", + }, + } + + // Try without permissions + var resp structs.GenericResponse + err := msgpackrpc.CallWithCodec(codec, "Alloc.UpdateDesiredTransistion", get, &resp) + require.NotNil(err) + require.True(structs.IsErrPermissionDenied(err)) + + // Try with permissions + get.WriteRequest.AuthToken = s1.getLeaderAcl() + var resp2 structs.GenericResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Alloc.UpdateDesiredTransistion", get, &resp2)) + require.NotZero(resp2.Index) + + // Look up the allocations + out1, err := state.AllocByID(nil, alloc.ID) + require.Nil(err) + out2, err := state.AllocByID(nil, alloc.ID) + require.Nil(err) + + require.NotNil(out1.DesiredTransistion.Migrate) + require.NotNil(out2.DesiredTransistion.Migrate) + require.True(*out1.DesiredTransistion.Migrate) + require.True(*out2.DesiredTransistion.Migrate) +} diff --git a/nomad/fsm.go b/nomad/fsm.go index 21a785b6750f..a1d9113cada2 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -240,6 +240,8 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyUpsertNodeEvent(buf[1:], log.Index) case structs.JobBatchDeregisterRequestType: return n.applyBatchDeregisterJob(buf[1:], log.Index) + case structs.AllocUpdateDesiredTransistionRequestType: + return n.applyAllocUpdateDesiredTransition(buf[1:], log.Index) } // Check enterprise only message types. @@ -651,6 +653,22 @@ func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} return nil } +// applyAllocUpdateDesiredTransition is used to update the desired transistions +// of a set of allocations. +func (n *nomadFSM) applyAllocUpdateDesiredTransition(buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update_desired_transistion"}, time.Now()) + var req structs.AllocUpdateDesiredTransistionRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.UpdateAllocsDesiredTransistions(index, req.Allocs); err != nil { + n.logger.Printf("[ERR] nomad.fsm: UpdateAllocsDesiredTransistions failed: %v", err) + return err + } + return nil +} + // applyReconcileSummaries reconciles summaries for all the jobs func (n *nomadFSM) applyReconcileSummaries(buf []byte, index uint64) interface{} { if err := n.state.ReconcileJobSummaries(index); err != nil { diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 5c2ed08cb112..a04f1cd2f1c1 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -1241,6 +1241,48 @@ func TestFSM_UpdateAllocFromClient(t *testing.T) { require.Equal(eval, res) } +func TestFSM_UpdateAllocDesiredTransistion(t *testing.T) { + t.Parallel() + fsm := testFSM(t) + state := fsm.State() + require := require.New(t) + + alloc := mock.Alloc() + alloc2 := mock.Alloc() + alloc2.Job = alloc.Job + alloc2.JobID = alloc.JobID + state.UpsertJobSummary(9, mock.JobSummary(alloc.JobID)) + state.UpsertAllocs(10, []*structs.Allocation{alloc, alloc2}) + + t1 := &structs.DesiredTransistion{ + Migrate: helper.BoolToPtr(true), + } + + req := structs.AllocUpdateDesiredTransistionRequest{ + Allocs: map[string]*structs.DesiredTransistion{ + alloc.ID: t1, + alloc2.ID: t1, + }, + } + buf, err := structs.Encode(structs.AllocUpdateDesiredTransistionRequestType, req) + require.Nil(err) + + resp := fsm.Apply(makeLog(buf)) + require.Nil(resp) + + // Verify we are registered + ws := memdb.NewWatchSet() + out1, err := fsm.State().AllocByID(ws, alloc.ID) + require.Nil(err) + out2, err := fsm.State().AllocByID(ws, alloc2.ID) + require.Nil(err) + + require.NotNil(out1.DesiredTransistion.Migrate) + require.NotNil(out2.DesiredTransistion.Migrate) + require.True(*out1.DesiredTransistion.Migrate) + require.True(*out2.DesiredTransistion.Migrate) +} + func TestFSM_UpsertVaultAccessor(t *testing.T) { t.Parallel() fsm := testFSM(t) diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 1d96e556b39b..6c2a3f42e0a3 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -54,8 +54,9 @@ func Node() *structs.Node { "database": "mysql", "version": "5.6", }, - NodeClass: "linux-medium-pci", - Status: structs.NodeStatusReady, + NodeClass: "linux-medium-pci", + Status: structs.NodeStatusReady, + SchedulingEligibility: structs.NodeSchedulingEligible, } node.ComputeClass() return node diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 3ef43ccf6903..182817392bdf 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -822,7 +822,7 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene // Ensure that evals aren't set from client RPCs // We create them here before the raft update if len(args.Evals) != 0 { - return fmt.Errorf("evals field must not be set ") + return fmt.Errorf("evals field must not be set") } // Update modified timestamp for client initiated allocation updates diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 67a02f348976..1c67327ae4ca 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -2008,6 +2008,63 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation return nil } +// UpdateAllocsDesiredTransistions is used to update a set of allocations +// desired transistions. +func (s *StateStore) UpdateAllocsDesiredTransistions(index uint64, allocs map[string]*structs.DesiredTransistion) error { + txn := s.db.Txn(true) + defer txn.Abort() + + // Handle each of the updated allocations + for id, transistion := range allocs { + if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transistion); err != nil { + return err + } + } + + // Update the indexes + if err := txn.Insert("index", &IndexEntry{"allocs", index}); err != nil { + return fmt.Errorf("index update failed: %v", err) + } + + txn.Commit() + return nil +} + +// nestedUpdateAllocDesiredTransition is used to nest an update of an +// allocations desired transistion +func (s *StateStore) nestedUpdateAllocDesiredTransition( + txn *memdb.Txn, index uint64, allocID string, + transistion *structs.DesiredTransistion) error { + + // Look for existing alloc + existing, err := txn.First("allocs", "id", allocID) + if err != nil { + return fmt.Errorf("alloc lookup failed: %v", err) + } + + // Nothing to do if this does not exist + if existing == nil { + return nil + } + exist := existing.(*structs.Allocation) + + // Copy everything from the existing allocation + copyAlloc := exist.Copy() + + // Merge the desired transistions + copyAlloc.DesiredTransistion.Merge(transistion) + + // Update the modify index + copyAlloc.ModifyIndex = index + + // Update the allocation + if err := txn.Insert("allocs", copyAlloc); err != nil { + return fmt.Errorf("alloc insert failed: %v", err) + } + + return nil +} + // AllocByID is used to lookup an allocation by its ID func (s *StateStore) AllocByID(ws memdb.WatchSet, id string) (*structs.Allocation, error) { txn := s.db.Txn(false) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index d176e178b9a9..4fd2173f94cf 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -3823,6 +3823,58 @@ func TestStateStore_UpdateAlloc_NoJob(t *testing.T) { } } +func TestStateStore_UpdateAllocDesiredTransistion(t *testing.T) { + t.Parallel() + require := require.New(t) + + state := testStateStore(t) + alloc := mock.Alloc() + + require.Nil(state.UpsertJob(999, alloc.Job)) + require.Nil(state.UpsertAllocs(1000, []*structs.Allocation{alloc})) + + t1 := &structs.DesiredTransistion{ + Migrate: helper.BoolToPtr(true), + } + t2 := &structs.DesiredTransistion{ + Migrate: helper.BoolToPtr(false), + } + + m := map[string]*structs.DesiredTransistion{alloc.ID: t1} + require.Nil(state.UpdateAllocsDesiredTransistions(1001, m)) + + ws := memdb.NewWatchSet() + out, err := state.AllocByID(ws, alloc.ID) + require.Nil(err) + require.NotNil(out.DesiredTransistion.Migrate) + require.True(*out.DesiredTransistion.Migrate) + require.EqualValues(1000, out.CreateIndex) + require.EqualValues(1001, out.ModifyIndex) + + index, err := state.Index("allocs") + require.Nil(err) + require.EqualValues(1001, index) + + m = map[string]*structs.DesiredTransistion{alloc.ID: t2} + require.Nil(state.UpdateAllocsDesiredTransistions(1002, m)) + + ws = memdb.NewWatchSet() + out, err = state.AllocByID(ws, alloc.ID) + require.Nil(err) + require.NotNil(out.DesiredTransistion.Migrate) + require.False(*out.DesiredTransistion.Migrate) + require.EqualValues(1000, out.CreateIndex) + require.EqualValues(1002, out.ModifyIndex) + + index, err = state.Index("allocs") + require.Nil(err) + require.EqualValues(1002, index) + + // Try with a bogus alloc id + m = map[string]*structs.DesiredTransistion{uuid.Generate(): t2} + require.Nil(state.UpdateAllocsDesiredTransistions(1003, m)) +} + func TestStateStore_JobSummary(t *testing.T) { state := testStateStore(t) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 68975ec69963..e50921c27cb5 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -78,6 +78,7 @@ const ( AutopilotRequestType UpsertNodeEventsType JobBatchDeregisterRequestType + AllocUpdateDesiredTransistionRequestType ) const ( @@ -573,6 +574,16 @@ type AllocUpdateRequest struct { WriteRequest } +// AllocUpdateDesiredTransistionRequest is used to submit changes to allocations +// desired transistion state. +type AllocUpdateDesiredTransistionRequest struct { + // Allocs is the mapping of allocation ids to their desired state + // transistion + Allocs map[string]*DesiredTransistion + + WriteRequest +} + // AllocListRequest is used to request a list of allocations type AllocListRequest struct { QueryOptions @@ -5338,6 +5349,28 @@ func (re *RescheduleEvent) Copy() *RescheduleEvent { return copy } +// DesiredTransistion is used to mark an allocation as having a desired state +// transistion. This information can be used by the scheduler to make the +// correct decision. +type DesiredTransistion struct { + // Migrate is used to indicate that this allocation should be stopped and + // migrated to another node. + Migrate *bool +} + +// Merge merges the two desired transitions, preferring the values from the +// passed in object. +func (d *DesiredTransistion) Merge(o *DesiredTransistion) { + if o.Migrate != nil { + d.Migrate = o.Migrate + } +} + +// ShouldMigrate returns whether the transistion object dictates a migration. +func (d *DesiredTransistion) ShouldMigrate() bool { + return d.Migrate != nil && *d.Migrate +} + const ( AllocDesiredStatusRun = "run" // Allocation should run AllocDesiredStatusStop = "stop" // Allocation should stop @@ -5399,6 +5432,10 @@ type Allocation struct { // DesiredStatusDescription is meant to provide more human useful information DesiredDescription string + // DesiredTransistion is used to indicate that a state transistion + // is desired for a given reason. + DesiredTransistion DesiredTransistion + // Status of the allocation on the client ClientStatus string diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 5b21034eb9cb..d1bbf4710334 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -2211,6 +2211,7 @@ func TestServiceSched_NodeDown(t *testing.T) { // Register a node node := mock.Node() + node.Status = structs.NodeStatusDown noErr(t, h.State.UpsertNode(h.NextIndex(), node)) // Generate a fake job with allocations and an update policy. @@ -2235,18 +2236,19 @@ func TestServiceSched_NodeDown(t *testing.T) { allocs[9].DesiredStatus = structs.AllocDesiredStatusRun allocs[9].ClientStatus = structs.AllocClientStatusComplete - noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) - // Mark some allocs as running - ws := memdb.NewWatchSet() for i := 0; i < 4; i++ { - out, _ := h.State.AllocByID(ws, allocs[i].ID) + out := allocs[i] out.ClientStatus = structs.AllocClientStatusRunning - noErr(t, h.State.UpdateAllocsFromClient(h.NextIndex(), []*structs.Allocation{out})) } - // Mark the node as down - noErr(t, h.State.UpdateNodeStatus(h.NextIndex(), node.ID, structs.NodeStatusDown)) + // Mark appropriate allocs for migration + for i := 0; i < 7; i++ { + out := allocs[i] + out.DesiredTransistion.Migrate = helper.BoolToPtr(true) + } + + noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) // Create a mock evaluation to deal with drain eval := &structs.Evaluation{ @@ -2365,6 +2367,7 @@ func TestServiceSched_NodeDrain(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = fmt.Sprintf("my-job.web[%d]", i) + alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) allocs = append(allocs, alloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) @@ -2447,9 +2450,10 @@ func TestServiceSched_NodeDrain_Down(t *testing.T) { // Set the desired state of the allocs to stop var stop []*structs.Allocation - for i := 0; i < 10; i++ { + for i := 0; i < 6; i++ { newAlloc := allocs[i].Copy() newAlloc.ClientStatus = structs.AllocDesiredStatusStop + newAlloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) stop = append(stop, newAlloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), stop)) @@ -2466,7 +2470,7 @@ func TestServiceSched_NodeDrain_Down(t *testing.T) { // Mark some of the allocations as complete var complete []*structs.Allocation for i := 6; i < 10; i++ { - newAlloc := stop[i].Copy() + newAlloc := allocs[i].Copy() newAlloc.TaskStates = make(map[string]*structs.TaskState) newAlloc.TaskStates["web"] = &structs.TaskState{ State: structs.TaskStateDead, @@ -2552,6 +2556,7 @@ func TestServiceSched_NodeDrain_Queued_Allocations(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = fmt.Sprintf("my-job.web[%d]", i) + alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) allocs = append(allocs, alloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) @@ -2583,88 +2588,6 @@ func TestServiceSched_NodeDrain_Queued_Allocations(t *testing.T) { } } -func TestServiceSched_NodeDrain_UpdateStrategy(t *testing.T) { - h := NewHarness(t) - - // Register a draining node - node := mock.Node() - node.Drain = true - noErr(t, h.State.UpsertNode(h.NextIndex(), node)) - - // Create some nodes - for i := 0; i < 10; i++ { - node := mock.Node() - noErr(t, h.State.UpsertNode(h.NextIndex(), node)) - } - - // Generate a fake job with allocations and an update policy. - job := mock.Job() - mp := 5 - u := structs.DefaultUpdateStrategy.Copy() - u.MaxParallel = mp - u.Stagger = time.Second - job.TaskGroups[0].Update = u - - noErr(t, h.State.UpsertJob(h.NextIndex(), job)) - - var allocs []*structs.Allocation - for i := 0; i < 10; i++ { - alloc := mock.Alloc() - alloc.Job = job - alloc.JobID = job.ID - alloc.NodeID = node.ID - alloc.Name = fmt.Sprintf("my-job.web[%d]", i) - allocs = append(allocs, alloc) - } - noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) - - // Create a mock evaluation to deal with drain - eval := &structs.Evaluation{ - Namespace: structs.DefaultNamespace, - ID: uuid.Generate(), - Priority: 50, - TriggeredBy: structs.EvalTriggerNodeUpdate, - JobID: job.ID, - NodeID: node.ID, - Status: structs.EvalStatusPending, - } - noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) - - // Process the evaluation - err := h.Process(NewServiceScheduler, eval) - if err != nil { - t.Fatalf("err: %v", err) - } - - // Ensure a single plan - if len(h.Plans) != 1 { - t.Fatalf("bad: %#v", h.Plans) - } - plan := h.Plans[0] - - // Ensure the plan evicted all allocs - if len(plan.NodeUpdate[node.ID]) != mp { - t.Fatalf("bad: %#v", plan) - } - - // Ensure the plan allocated - var planned []*structs.Allocation - for _, allocList := range plan.NodeAllocation { - planned = append(planned, allocList...) - } - if len(planned) != mp { - t.Fatalf("bad: %#v", plan) - } - - // Ensure there is a followup eval. - if len(h.CreateEvals) != 1 || - h.CreateEvals[0].TriggeredBy != structs.EvalTriggerRollingUpdate { - t.Fatalf("bad: %#v", h.CreateEvals) - } - - h.AssertEvalStatus(t, structs.EvalStatusComplete) -} - func TestServiceSched_RetryLimit(t *testing.T) { h := NewHarness(t) h.Planner = &RejectPlan{h} @@ -3755,6 +3678,7 @@ func TestBatchSched_NodeDrain_Running_OldJob(t *testing.T) { // Create an update job job2 := job.Copy() job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"} + job2.Version++ noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) // Create a mock evaluation to register the job @@ -4021,10 +3945,10 @@ func TestServiceSched_NodeDrain_Sticky(t *testing.T) { // Create an alloc on the draining node alloc := mock.Alloc() alloc.Name = "my-job.web[0]" - alloc.DesiredStatus = structs.AllocDesiredStatusStop alloc.NodeID = node.ID alloc.Job.TaskGroups[0].Count = 1 alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true + alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) noErr(t, h.State.UpsertJob(h.NextIndex(), alloc.Job)) noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 3bfd1a89e14d..cdc375510750 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -499,6 +499,7 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { }) } + // TODO Deprecate // We need to create a followup evaluation. if followup && strategy != nil && a.result.followupEvalWait < strategy.Stagger { a.result.followupEvalWait = strategy.Stagger diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 34f6eddbfa0c..a9188fa42ee5 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -927,6 +927,7 @@ func TestReconciler_DrainNode(t *testing.T) { for i := 0; i < 2; i++ { n := mock.Node() n.ID = allocs[i].NodeID + allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } @@ -979,6 +980,7 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) { for i := 0; i < 2; i++ { n := mock.Node() n.ID = allocs[i].NodeID + allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } @@ -1032,6 +1034,7 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) { for i := 0; i < 3; i++ { n := mock.Node() n.ID = allocs[i].NodeID + allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } @@ -2213,6 +2216,7 @@ func TestReconciler_PausedOrFailedDeployment_Migrations(t *testing.T) { for i := 0; i < 3; i++ { n := mock.Node() n.ID = allocs[i].NodeID + allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } @@ -2286,6 +2290,7 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { tainted := make(map[string]*structs.Node, 1) n := mock.Node() n.ID = allocs[11].NodeID + allocs[11].DesiredTransistion.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n @@ -3025,6 +3030,7 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { n.Status = structs.NodeStatusDown } else { n.Drain = true + allocs[2+i].DesiredTransistion.Migrate = helper.BoolToPtr(true) } tainted[n.ID] = n } @@ -3110,6 +3116,7 @@ func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) { n.Status = structs.NodeStatusDown } else { n.Drain = true + allocs[6+i].DesiredTransistion.Migrate = helper.BoolToPtr(true) } tainted[n.ID] = n } @@ -3435,6 +3442,7 @@ func TestReconciler_TaintedNode_MultiGroups(t *testing.T) { for i := 0; i < 15; i++ { n := mock.Node() n.ID = allocs[i].NodeID + allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index db3a5ff1e3d5..fc8d619fb661 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -214,11 +214,14 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi untainted[alloc.ID] = alloc continue } + if !alloc.TerminalStatus() { if n == nil || n.TerminalStatus() { lost[alloc.ID] = alloc - } else { + } else if alloc.DesiredTransistion.ShouldMigrate() { migrate[alloc.ID] = alloc + } else { + untainted[alloc.ID] = alloc } } else { untainted[alloc.ID] = alloc diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go index d30608c8b724..4fa2d20f673a 100644 --- a/scheduler/system_sched.go +++ b/scheduler/system_sched.go @@ -62,7 +62,7 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error { switch eval.TriggeredBy { case structs.EvalTriggerJobRegister, structs.EvalTriggerNodeUpdate, structs.EvalTriggerJobDeregister, structs.EvalTriggerRollingUpdate, - structs.EvalTriggerDeploymentWatcher: + structs.EvalTriggerDeploymentWatcher, structs.EvalTriggerNodeDrain: default: desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy) diff --git a/scheduler/system_sched_test.go b/scheduler/system_sched_test.go index 8cd1a0c6474a..7303ea1708df 100644 --- a/scheduler/system_sched_test.go +++ b/scheduler/system_sched_test.go @@ -7,6 +7,7 @@ import ( "time" memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" @@ -971,6 +972,7 @@ func TestSystemSched_NodeDown(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" + alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) // Create a mock evaluation to deal with drain @@ -1099,6 +1101,7 @@ func TestSystemSched_NodeDrain(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" + alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) // Create a mock evaluation to deal with drain @@ -1412,6 +1415,7 @@ func TestSystemSched_PlanWithDrainedNode(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" + alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) alloc.TaskGroup = "web" alloc2 := mock.Alloc() diff --git a/scheduler/testing.go b/scheduler/testing.go index a04b99ce860c..47a6caaeb004 100644 --- a/scheduler/testing.go +++ b/scheduler/testing.go @@ -2,12 +2,11 @@ package scheduler import ( "fmt" - "log" - "os" "sync" "time" memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/mitchellh/go-testing-interface" @@ -40,6 +39,7 @@ func (r *RejectPlan) ReblockEval(*structs.Evaluation) error { // store copy and provides the planner interface. It can be extended for various // testing uses or for invoking the scheduler without side effects. type Harness struct { + t testing.T State *state.StateStore Planner Planner @@ -58,6 +58,7 @@ type Harness struct { func NewHarness(t testing.T) *Harness { state := state.TestStateStore(t) h := &Harness{ + t: t, State: state, nextIndex: 1, } @@ -68,6 +69,7 @@ func NewHarness(t testing.T) *Harness { // purposes. func NewHarnessWithState(t testing.T, state *state.StateStore) *Harness { return &Harness{ + t: t, State: state, nextIndex: 1, } @@ -201,7 +203,7 @@ func (h *Harness) Snapshot() State { // Scheduler is used to return a new scheduler from // a snapshot of current state using the harness for planning. func (h *Harness) Scheduler(factory Factory) Scheduler { - logger := log.New(os.Stderr, "", log.LstdFlags) + logger := testlog.Logger(h.t) return factory(logger, h.Snapshot(), h) } diff --git a/scheduler/util.go b/scheduler/util.go index 3417356014b6..fcac79d1c87e 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -104,20 +104,26 @@ func diffAllocs(job *structs.Job, taintedNodes map[string]*structs.Node, goto IGNORE } - if node == nil || node.TerminalStatus() { - result.lost = append(result.lost, allocTuple{ - Name: name, - TaskGroup: tg, - Alloc: exist, - }) + if !exist.TerminalStatus() { + if node == nil || node.TerminalStatus() { + result.lost = append(result.lost, allocTuple{ + Name: name, + TaskGroup: tg, + Alloc: exist, + }) + } else if exist.DesiredTransistion.ShouldMigrate() { + result.migrate = append(result.migrate, allocTuple{ + Name: name, + TaskGroup: tg, + Alloc: exist, + }) + } else { + goto IGNORE + } } else { - // This is the drain case - result.migrate = append(result.migrate, allocTuple{ - Name: name, - TaskGroup: tg, - Alloc: exist, - }) + goto IGNORE } + continue } @@ -318,10 +324,9 @@ func taintedNodes(state State, allocs []*structs.Allocation) (map[string]*struct out[alloc.NodeID] = nil continue } - //FIXME is this right? - //if structs.ShouldDrainNode(node.Status) || node.Drain { - // out[alloc.NodeID] = node - //} + if structs.ShouldDrainNode(node.Status) || node.Drain { + out[alloc.NodeID] = node + } } return out, nil } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index cb96e83ea283..f2b339d38eff 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -7,6 +7,7 @@ import ( "reflect" "testing" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/state" @@ -90,6 +91,9 @@ func TestDiffAllocs(t *testing.T) { NodeID: "drainNode", Name: "my-job.web[2]", Job: oldJob, + DesiredTransistion: structs.DesiredTransistion{ + Migrate: helper.BoolToPtr(true), + }, }, // Mark the 4th lost { @@ -219,6 +223,9 @@ func TestDiffSystemAllocs(t *testing.T) { NodeID: drainNode.ID, Name: "my-job.web[0]", Job: oldJob, + DesiredTransistion: structs.DesiredTransistion{ + Migrate: helper.BoolToPtr(true), + }, }, // Mark as lost on a dead node { From 7deabe958d43720009bba6223db2e2b4f50dc39f Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 21 Feb 2018 17:22:06 -0800 Subject: [PATCH 05/79] drainer: switch to job based watching --- nomad/drain.go | 455 +++++++++++++++++++++----------------------- nomad/drain_test.go | 6 +- 2 files changed, 223 insertions(+), 238 deletions(-) diff --git a/nomad/drain.go b/nomad/drain.go index a1dc99972029..01732db1448a 100644 --- a/nomad/drain.go +++ b/nomad/drain.go @@ -13,6 +13,12 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +// jobKey is a tuple of namespace+jobid for use as a map key by job +type jobKey struct { + ns string + jobid string +} + // drainingJob contains the Job and allocations for that job meant to be used // when collecting all allocations for a job with at least one allocation on a // draining node. @@ -48,19 +54,14 @@ func makeTaskGroupKey(a *structs.Allocation) string { // stopAllocs tracks allocs to drain by a unique TG key type stopAllocs struct { - perTaskGroup map[string]int - allocBatch []*structs.Allocation + allocBatch []*structs.Allocation // namespace+jobid -> Job - jobBatch map[string]*structs.Job + jobBatch map[jobKey]*structs.Job } //FIXME this method does an awful lot func (s *stopAllocs) add(j *structs.Job, a *structs.Allocation) { - // Increment the counter for how many allocs in this task group are being stopped - tgKey := makeTaskGroupKey(a) - s.perTaskGroup[tgKey]++ - // Update the allocation a.ModifyTime = time.Now().UnixNano() a.DesiredStatus = structs.AllocDesiredStatusStop @@ -69,8 +70,7 @@ func (s *stopAllocs) add(j *structs.Job, a *structs.Allocation) { s.allocBatch = append(s.allocBatch, a) // Add job to the job batch - jobKey := strings.Join([]string{j.Namespace, j.ID}, "-") - s.jobBatch[jobKey] = j + s.jobBatch[jobKey{a.Namespace, a.JobID}] = j } // startNodeDrainer should be called in establishLeadership by the leader. @@ -87,7 +87,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { } }() - nodes, nodesIndex, drainingAllocs, allocsIndex := initDrainer(s.logger, state) + nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(s.logger, state) // Wait for a node's drain deadline to expire var nextDeadline time.Time @@ -108,8 +108,9 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { go nodeWatcher.run(ctx) // Watch for drained allocations to be replaced - prevAllocs := newPrevAllocWatcher(s.logger, drainingAllocs, allocsIndex, state) - go prevAllocs.run(ctx) + // Watch for changes in allocs for jobs with allocs on draining nodes + jobWatcher := newJobWatcher(s.logger, drainingJobs, allocsIndex, state) + go jobWatcher.run(ctx) for { //TODO this method of async node updates means we could make @@ -117,16 +118,43 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { //possible outcome of this is that an allocation could be //stopped on a node that recently had its drain cancelled which //doesn't seem like that bad of a pathological case + s.logger.Printf("[TRACE] nomad.drain: LOOP next deadline: %s (%s)", nextDeadline, time.Until(nextDeadline)) select { case nodes = <-nodeWatcher.nodesCh: // update draining nodes - //TODO remove allocs from draining list with node ids not in this map s.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes)) - case drainedID := <-prevAllocs.allocsCh: - // drained alloc has been replaced - //TODO instead of modifying a view of draining allocs here created a shared map like prevallocs - delete(drainingAllocs, drainedID) - s.logger.Printf("[TRACE] nomad.drain: running due to alloc change (%s replaced)", drainedID) + + // update deadline timer + changed := false + for _, n := range nodes { + if nextDeadline.IsZero() { + nextDeadline = n.DrainStrategy.DeadlineTime() + changed = true + continue + } + + if deadline := n.DrainStrategy.DeadlineTime(); deadline.Before(nextDeadline) { + nextDeadline = deadline + changed = true + } + } + + // if changed reset the timer + if changed { + s.logger.Printf("[TRACE] nomad.drain: new node deadline: %s", nextDeadline) + if !deadlineTimer.Stop() { + // timer may have been recv'd in a + // previous loop, so don't block + select { + case <-deadlineTimer.C: + default: + } + } + deadlineTimer.Reset(time.Until(nextDeadline)) + } + + case jobs := <-jobWatcher.WaitCh(): + s.logger.Printf("[TRACE] nomad.drain: running due to alloc change (%d jobs updated)", len(jobs)) case when := <-deadlineTimer.C: // deadline for a node was reached s.logger.Printf("[TRACE] nomad.drain: running due to deadline reached (at %s)", when) @@ -148,7 +176,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { } now := time.Now() // for determing deadlines in a consistent way - // namespace -> job id -> {job, allocs} + // job key -> {job, allocs} // Collect all allocs for all jobs with at least one // alloc on a draining node. // Invariants: @@ -156,7 +184,15 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // - No batch jobs unless their node's deadline is reached // - No entries with 0 allocs //TODO could this be a helper method on prevAllocWatcher - drainable := map[string]map[string]*drainingJob{} + drainable := map[jobKey]*drainingJob{} + + // track jobs we've looked up before and know we shouldn't + // consider for draining eg system jobs + skipJob := map[jobKey]struct{}{} + + // track number of "up" allocs per task group (not terminal and + // have a deployment status) + upPerTG := map[string]int{} // Collect all drainable jobs for nodeID, node := range nodes { @@ -169,37 +205,45 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // track number of allocs left on this node to be drained allocsLeft := false for _, alloc := range allocs { - if _, ok := drainable[alloc.Namespace]; !ok { - // namespace does not exist - drainable[alloc.Namespace] = make(map[string]*drainingJob) - } + jobkey := jobKey{alloc.Namespace, alloc.JobID} - if _, ok := drainable[alloc.Namespace][alloc.JobID]; ok { + if _, ok := drainable[jobkey]; ok { // already found continue } + if _, ok := skipJob[jobkey]; ok { + // already looked up and skipped + continue + } + // job does not found yet job, err := snapshot.JobByID(nil, alloc.Namespace, alloc.JobID) if err != nil { //FIXME panic(err) } - //TODO check for job == nil? // Don't bother collecting system jobs if job.Type == structs.JobTypeSystem { + skipJob[jobkey] = struct{}{} + s.logger.Printf("[TRACE] nomad.drain: skipping system job %s", job.Name) continue } - // If a drainable alloc isn't yet stopping this - // node has allocs left to be drained + // If alloc isn't yet terminal this node has + // allocs left to be drained if !alloc.TerminalStatus() { - allocsLeft = true + if !allocsLeft { + s.logger.Printf("[TRACE] nomad.drain: node %s has allocs left to drain", nodeID[:6]) + allocsLeft = true + } } // Don't bother collecting batch jobs for nodes that haven't hit their deadline if job.Type == structs.JobTypeBatch && node.DrainStrategy.DeadlineTime().After(now) { + s.logger.Printf("[TRACE] nomad.drain: not draining batch job %s because deadline isn't for %s", job.Name, node.DrainStrategy.DeadlineTime().Sub(now)) + skipJob[jobkey] = struct{}{} continue } @@ -209,100 +253,109 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { panic(err) } - drainable[alloc.Namespace][alloc.JobID] = &drainingJob{ + // Count the number of down (terminal or nil deployment status) per task group + if job.Type == structs.JobTypeService { + n := 0 + for _, a := range jobAllocs { + if !a.TerminalStatus() && a.DeploymentStatus != nil { + upPerTG[makeTaskGroupKey(a)]++ + n++ + } + } + s.logger.Printf("[TRACE] nomad.drain: job %s has %d task groups running", job.Name, n) + } + + drainable[jobkey] = &drainingJob{ job: job, allocs: jobAllocs, } + + jobWatcher.watch(jobkey, nodeID) } // if node has no allocs, it's done draining! if !allocsLeft { + s.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain", nodeID) + jobWatcher.nodeDone(nodeID) delete(nodes, nodeID) doneNodes[nodeID] = node } } - // Initialize stoplist with a count of allocs already draining per task group - //TODO wrap this up in a new func + // stoplist are the allocations to stop and their jobs to emit + // evaluations for stoplist := &stopAllocs{ - perTaskGroup: make(map[string]int, len(drainingAllocs)), - allocBatch: make([]*structs.Allocation, len(drainingAllocs)), - jobBatch: make(map[string]*structs.Job), - } - // initialize perTaskGroup to be the number of total *currently draining* allocations per task group - for _, a := range drainingAllocs { - stoplist.perTaskGroup[a.tgKey]++ + allocBatch: make([]*structs.Allocation, 0, len(drainable)), + jobBatch: make(map[jobKey]*structs.Job), } // deadlineNodes is a map of node IDs that have reached their // deadline and allocs that will be stopped due to deadline deadlineNodes := map[string]int{} - //TODO build drain list considering deadline & max_parallel - for _, drainingJobs := range drainable { - for _, drainingJob := range drainingJobs { - for _, alloc := range drainingJob.allocs { - // Already draining/dead allocs don't need to be drained - if alloc.TerminalStatus() { - continue - } + // build drain list considering deadline & max_parallel + for _, drainingJob := range drainable { + for _, alloc := range drainingJob.allocs { + // Already draining/dead allocs don't need to be drained + if alloc.TerminalStatus() { + continue + } - node, ok := nodes[alloc.NodeID] - if !ok { - // Alloc's node is not draining so not elligible for draining! - continue - } + node, ok := nodes[alloc.NodeID] + if !ok { + // Alloc's node is not draining so not elligible for draining! + continue + } - if node.DrainStrategy.DeadlineTime().Before(now) { - s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) - // Alloc's Node has reached its deadline - stoplist.add(drainingJob.job, alloc) + tgKey := makeTaskGroupKey(alloc) - deadlineNodes[node.ID]++ + if node.DrainStrategy.DeadlineTime().Before(now) { + s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + // Alloc's Node has reached its deadline + stoplist.add(drainingJob.job, alloc) + upPerTG[tgKey]-- - //FIXME purge from watchlist? - continue - } + deadlineNodes[node.ID]++ + continue + } - // Batch jobs are only stopped when the node - // deadline is reached which has already been - // done. - if drainingJob.job.Type == structs.JobTypeBatch { - continue - } + // Batch jobs are only stopped when the node + // deadline is reached which has already been + // done. + if drainingJob.job.Type == structs.JobTypeBatch { + continue + } - // Stop allocs with count=1, max_parallel==0, or draining how many allocs are - // already draining for this task - // group, drain and track this alloc - tgKey := makeTaskGroupKey(alloc) + s.logger.Printf("[TRACE] nomad.drain: considering job %s alloc %s count %d maxp %d up %d", + drainingJob.job.Name, alloc.ID[:6], tg.Count, tg.Migrate.MaxParallel, upPerTG[tgKey]) - //FIXME change this to be based off of the sum(deploymentstatus!=nil && clientstatus==running) for this task group - if tg.Migrate.MaxParallel > stoplist.perTaskGroup[tgKey] { - s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to max parallel", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) - // More migrations are allowed, add to stoplist - stoplist.add(drainingJob.job, alloc) + // Count - MaxParalell = minimum number of allocations that must be "up" + minUp := (tg.Count - tg.Migrate.MaxParallel) - // Also add to prevAllocWatcher - prevAllocs.watch(alloc.ID) - } + // If minimum is < the current number up it is safe to stop one. + if minUp < upPerTG[tgKey] { + s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to max parallel", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + // More migrations are allowed, add to stoplist + stoplist.add(drainingJob.job, alloc) + upPerTG[tgKey]-- } } } @@ -310,6 +363,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // log drains due to node deadlines for nodeID, remaining := range deadlineNodes { s.logger.Printf("[DEBUG] nomad.drain: node %s drain deadline reached; stopping %d remaining allocs", nodeID, remaining) + jobWatcher.nodeDone(nodeID) } if len(stoplist.allocBatch) > 0 { @@ -425,17 +479,16 @@ func (n *nodeWatcher) run(ctx context.Context) { newNodes := resp.([]*structs.Node) n.logger.Printf("[TRACE] nomad.drain: %d nodes to consider", len(newNodes)) //FIXME remove for _, newNode := range newNodes { - if _, ok := n.nodes[newNode.ID]; ok { - // Node was draining + if existingNode, ok := n.nodes[newNode.ID]; ok { + // Node was draining, see if it has changed if !newNode.Drain { // Node stopped draining delete(n.nodes, newNode.ID) changed = true - } else { + } else if !newNode.DrainStrategy.DeadlineTime().Equal(existingNode.DrainStrategy.DeadlineTime()) { // Update deadline n.nodes[newNode.ID] = newNode - //FIXME set changed if it changed? - //changed = true + changed = true } } else { // Node was not draining @@ -492,73 +545,78 @@ func (n *nodeWatcher) queryNodeDrain(ws memdb.WatchSet, state *state.StateStore) return resp, index, nil } -// prevAllocWatcher monitors allocation updates for allocations which replace -// draining allocations. -type prevAllocWatcher struct { - // watchList is a map of alloc ids to look for in PreviousAllocation - // fields of new allocs - watchList map[string]struct{} - watchListMu sync.Mutex +type jobWatcher struct { + // allocsIndex to start watching from + allocsIndex uint64 - state *state.StateStore + // job -> node.ID + jobs map[jobKey]string + jobsMu sync.Mutex - // allocIndex to start watching from - allocIndex uint64 + jobsCh chan map[jobKey]struct{} - // allocsCh is sent Allocation.IDs as they're removed from the watchList - allocsCh chan string + state *state.StateStore logger *log.Logger } -// newPrevAllocWatcher creates a new prevAllocWatcher watching drainingAllocs -// from allocIndex in the state store. Must call run to start watching. -func newPrevAllocWatcher(logger *log.Logger, drainingAllocs map[string]drainingAlloc, allocIndex uint64, - state *state.StateStore) *prevAllocWatcher { - - watchList := make(map[string]struct{}, len(drainingAllocs)) - for allocID := range drainingAllocs { - watchList[allocID] = struct{}{} +func newJobWatcher(logger *log.Logger, jobs map[jobKey]string, allocsIndex uint64, state *state.StateStore) *jobWatcher { + return &jobWatcher{ + allocsIndex: allocsIndex, + logger: logger, + jobs: jobs, + jobsCh: make(chan map[jobKey]struct{}), + state: state, } +} + +func (j *jobWatcher) watch(k jobKey, nodeID string) { + j.logger.Printf("[TRACE] nomad.drain: watching job %s on draining node %s", k.jobid, nodeID[:6]) + j.jobsMu.Lock() + j.jobs[k] = nodeID + j.jobsMu.Unlock() +} - return &prevAllocWatcher{ - watchList: watchList, - state: state, - allocIndex: allocIndex, - allocsCh: make(chan string, 8), //FIXME 8? really? what should this be - logger: logger, +func (j *jobWatcher) nodeDone(nodeID string) { + j.jobsMu.Lock() + defer j.jobsMu.Unlock() + for k, v := range j.jobs { + if v == nodeID { + j.logger.Printf("[TRACE] nomad.drain: UNwatching job %s on done draining node %s", k.jobid, nodeID[:6]) + delete(j.jobs, k) + } } } -// watch for an allocation ID to be replaced. -func (p *prevAllocWatcher) watch(allocID string) { - p.watchListMu.Lock() - defer p.watchListMu.Unlock() - p.watchList[allocID] = struct{}{} +func (j *jobWatcher) WaitCh() <-chan map[jobKey]struct{} { + return j.jobsCh } -// run the prevAllocWatcher and send replaced draining alloc IDs on allocsCh. -func (p *prevAllocWatcher) run(ctx context.Context) { - // index to watch from +func (j *jobWatcher) run(ctx context.Context) { var resp interface{} var err error for { + //FIXME have watchAllocs create a closure and give it a copy of j.jobs to remove locking? //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? - resp, p.allocIndex, err = p.state.BlockingQuery(p.queryPrevAlloc, p.allocIndex, ctx) + var newIndex uint64 + resp, newIndex, err = j.state.BlockingQuery(j.watchAllocs, j.allocsIndex, ctx) if err != nil { if err == context.Canceled { - p.logger.Printf("[TRACE] nomad.drain: previous allocation watcher shutting down") + j.logger.Printf("[TRACE] nomad.drain: job watcher shutting down") return } - p.logger.Printf("[ERR] nomad.drain: error blocking on alloc updates: %v", err) + j.logger.Printf("[ERR] nomad.drain: error blocking on alloc updates: %v", err) return } - allocIDs := resp.([]string) - for _, id := range allocIDs { + j.logger.Printf("[TRACE] nomad.drain: job watcher old index: %d new index: %d", j.allocsIndex, newIndex) + j.allocsIndex = newIndex + + changedJobs := resp.(map[jobKey]struct{}) + if len(changedJobs) > 0 { select { - case p.allocsCh <- id: + case j.jobsCh <- changedJobs: case <-ctx.Done(): return } @@ -566,8 +624,7 @@ func (p *prevAllocWatcher) run(ctx context.Context) { } } -// queryPrevAlloc is the BlockingQuery func for scanning for replacement allocs -func (p *prevAllocWatcher) queryPrevAlloc(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { +func (j *jobWatcher) watchAllocs(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { iter, err := state.Allocs(ws) if err != nil { return nil, 0, err @@ -578,11 +635,10 @@ func (p *prevAllocWatcher) queryPrevAlloc(ws memdb.WatchSet, state *state.StateS return nil, 0, err } - //FIXME do fine grained locking around watclist mutations? - p.watchListMu.Lock() - defer p.watchListMu.Unlock() + skipped := 0 - resp := make([]string, 0, len(p.watchList)) + // job ids + resp := map[jobKey]struct{}{} for { raw := iter.Next() @@ -591,26 +647,35 @@ func (p *prevAllocWatcher) queryPrevAlloc(ws memdb.WatchSet, state *state.StateS } alloc := raw.(*structs.Allocation) - _, ok := p.watchList[alloc.PreviousAllocation] + + j.jobsMu.Lock() + _, ok := j.jobs[jobKey{alloc.Namespace, alloc.JobID}] + j.jobsMu.Unlock() + if !ok { - // PreviousAllocation not in watchList, skip it + // alloc is not part of a draining job + skipped++ continue } - // If the migration health is set on the replacement alloc we can stop watching the drained alloc + // don't wake drain loop if alloc hasn't updated its health if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { - delete(p.watchList, alloc.PreviousAllocation) - resp = append(resp, alloc.PreviousAllocation) + j.logger.Printf("[TRACE] nomad.drain: job watcher found alloc %s - deployment status: %t", alloc.ID[:6], *alloc.DeploymentStatus.Healthy) + resp[jobKey{alloc.Namespace, alloc.JobID}] = struct{}{} + } else { + j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring alloc %s - no deployment status", alloc.ID[:6]) } } + j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring %d allocs - not part of draining job at index %d", skipped, index) + return resp, index, nil } // initDrainer initializes the node drainer state and returns a list of // draining nodes as well as allocs that are draining that should be watched // for a replacement. -func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*structs.Node, uint64, map[string]drainingAlloc, uint64) { +func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*structs.Node, uint64, map[jobKey]string, uint64) { // StateStore.Snapshot never returns an error so don't bother checking it snapshot, _ := state.Snapshot() now := time.Now() @@ -624,9 +689,8 @@ func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*struc // map of draining nodes keyed by node ID nodes := map[string]*structs.Node{} - //FIXME rollup by composite namespace+job.ID+tg key? - // List of draining allocs by namespace and job: namespace -> job.ID -> alloc.ID -> *Allocation - allocsByNS := map[string]map[string]map[string]*structs.Allocation{} + // map of draining job IDs keyed by {namespace, job id} -> node.ID + jobs := map[jobKey]string{} for { raw := iter.Next() @@ -655,88 +719,7 @@ func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*struc } for _, alloc := range allocs { - //FIXME is it safe to assume the drainer set the desired status to stop? - if alloc.DesiredStatus == structs.AllocDesiredStatusStop { - if allocsByJob, ok := allocsByNS[alloc.Namespace]; ok { - if allocs, ok := allocsByJob[alloc.JobID]; ok { - allocs[alloc.ID] = alloc - } else { - // First alloc for job - allocsByJob[alloc.JobID] = map[string]*structs.Allocation{alloc.ID: alloc} - } - } else { - // First alloc in namespace - allocsByNS[alloc.Namespace] = map[string]map[string]*structs.Allocation{ - alloc.JobID: map[string]*structs.Allocation{alloc.ID: alloc}, - } - } - } - } - } - - // drainingAllocs is the list of all allocations that are currently - // draining and waiting for a replacement - drainingAllocs := map[string]drainingAlloc{} - - for ns, allocsByJobs := range allocsByNS { - for jobID, allocs := range allocsByJobs { - for allocID, alloc := range allocs { - job, err := snapshot.JobByID(nil, ns, jobID) - if err != nil { - logger.Printf("[ERR] nomad.drain: error getting job %q for alloc %q: %v", alloc.JobID, allocID, err) - //FIXME - panic(err) - } - - // Don't track drains for stopped or gc'd jobs - if job == nil || job.Status == structs.JobStatusDead { - continue - } - - jobAllocs, err := snapshot.AllocsByJob(nil, ns, jobID, true) - if err != nil { - //FIXME - panic(err) - } - - // Remove drained allocs for replacement allocs - for _, alloc := range jobAllocs { - if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { - delete(allocs, alloc.PreviousAllocation) - } - } - - //FIXME why are we doing a nested loop over allocs? - // Any remaining allocs need to be tracked - for allocID, alloc := range allocs { - tg := job.LookupTaskGroup(alloc.TaskGroup) - if tg == nil { - logger.Printf("[DEBUG] nomad.drain: unable to find task group %q for alloc %q", alloc.TaskGroup, allocID) - continue - } - - if tg.Migrate == nil { - // No migrate strategy so don't track - continue - } - - //FIXME Remove this? ModifyTime is not updated as expected - - // alloc.ModifyTime + HealthyDeadline is >= the - // healthy deadline for the allocation, so we - // can stop tracking it at that time. - deadline := time.Unix(0, alloc.ModifyTime).Add(tg.Migrate.HealthyDeadline) - - if deadline.After(now) { - // deadline already reached; don't bother tracking - continue - } - - // Draining allocation hasn't been replaced or - // reached its deadline; track it! - drainingAllocs[allocID] = newDrainingAlloc(alloc, deadline) - } - } + jobs[jobKey{alloc.Namespace, alloc.JobID}] = node.ID } } @@ -748,5 +731,5 @@ func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*struc if allocsIndex == 0 { allocsIndex = 1 } - return nodes, nodesIndex, drainingAllocs, allocsIndex + return nodes, nodesIndex, jobs, allocsIndex } diff --git a/nomad/drain_test.go b/nomad/drain_test.go index bf1ec875de3a..e611fbdee2cb 100644 --- a/nomad/drain_test.go +++ b/nomad/drain_test.go @@ -62,6 +62,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { systemJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() systemJob.TaskGroups[0].Tasks[0].Services = nil + // Batch job will run until the node's drain deadline is reached batchJob := mock.Job() batchJob.Name = "batch-job" batchJob.Type = structs.JobTypeBatch @@ -134,6 +135,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus) } } + server.logger.Println("----------------------------------------------------------------------quitting--------------------------------------------------------") t.Fatalf("failed waiting for all allocs to start: %v", err) }) @@ -182,10 +184,10 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Logf("%d alloc %s job %s status %s prev %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus, alloc.PreviousAllocation) } } - t.Fatalf("failed waiting for all allocs to start: %v", err) + server.logger.Println("----------------------------------------------------------------------quitting--------------------------------------------------------") + t.Errorf("failed waiting for all allocs to migrate: %v", err) }) - // Wait for all service allocs to be replaced jobs, err := rpc.JobList() require.Nil(err) t.Logf("%d jobs", len(jobs.Jobs)) From 832b1d5694465f86fbdcafeba4e4ccfd0f749ad5 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Thu, 22 Feb 2018 17:38:44 -0800 Subject: [PATCH 06/79] switch to new raft DesiredTransition message --- api/allocations.go | 8 +-- nomad/alloc_endpoint.go | 12 ++--- nomad/alloc_endpoint_test.go | 48 +++++++++++++---- nomad/drain.go | 92 +++++++++++---------------------- nomad/drain_test.go | 16 ++++-- nomad/fsm.go | 17 +++--- nomad/fsm_test.go | 32 ++++++++---- nomad/state/state_store.go | 23 +++++---- nomad/state/state_store_test.go | 37 ++++++++----- nomad/structs/structs.go | 31 ++++++----- scheduler/generic_sched_test.go | 10 ++-- scheduler/reconcile_test.go | 16 +++--- scheduler/reconcile_util.go | 2 +- scheduler/system_sched_test.go | 6 +-- scheduler/util.go | 2 +- scheduler/util_test.go | 4 +- testutil/rpcapi/rcpapi.go | 28 ++++++++++ 17 files changed, 228 insertions(+), 156 deletions(-) diff --git a/api/allocations.go b/api/allocations.go index 89206dadee0b..c3759806741f 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -81,7 +81,7 @@ type Allocation struct { Metrics *AllocationMetric DesiredStatus string DesiredDescription string - DesiredTransistion DesiredTransistion + DesiredTransition DesiredTransition ClientStatus string ClientDescription string TaskStates map[string]*TaskState @@ -207,10 +207,10 @@ type RescheduleEvent struct { PrevNodeID string } -// DesiredTransistion is used to mark an allocation as having a desired state -// transistion. This information can be used by the scheduler to make the +// DesiredTransition is used to mark an allocation as having a desired state +// transition. This information can be used by the scheduler to make the // correct decision. -type DesiredTransistion struct { +type DesiredTransition struct { // Migrate is used to indicate that this allocation should be stopped and // migrated to another node. Migrate *bool diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index a7f5e3bdc2ac..405136ca8cc1 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -202,13 +202,13 @@ func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest, return a.srv.blockingRPC(&opts) } -// UpdateDesiredTransistion is used to update the desired transistions of an +// UpdateDesiredTransition is used to update the desired transitions of an // allocation. -func (a *Alloc) UpdateDesiredTransistion(args *structs.AllocUpdateDesiredTransistionRequest, reply *structs.GenericResponse) error { - if done, err := a.srv.forward("Alloc.UpdateDesiredTransistion", args, args, reply); done { +func (a *Alloc) UpdateDesiredTransition(args *structs.AllocUpdateDesiredTransitionRequest, reply *structs.GenericResponse) error { + if done, err := a.srv.forward("Alloc.UpdateDesiredTransition", args, args, reply); done { return err } - defer metrics.MeasureSince([]string{"nomad", "alloc", "update_desired_transistion"}, time.Now()) + defer metrics.MeasureSince([]string{"nomad", "alloc", "update_desired_transition"}, time.Now()) // Check that it is a management token. if aclObj, err := a.srv.ResolveToken(args.AuthToken); err != nil { @@ -223,9 +223,9 @@ func (a *Alloc) UpdateDesiredTransistion(args *structs.AllocUpdateDesiredTransis } // Commit this update via Raft - _, index, err := a.srv.raftApply(structs.AllocUpdateDesiredTransistionRequestType, args) + _, index, err := a.srv.raftApply(structs.AllocUpdateDesiredTransitionRequestType, args) if err != nil { - a.srv.logger.Printf("[ERR] nomad.allocs: AllocUpdateDesiredTransistionRequest failed: %v", err) + a.srv.logger.Printf("[ERR] nomad.allocs: AllocUpdateDesiredTransitionRequest failed: %v", err) return err } diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go index f898f2b7dd9f..5d309d7c3b96 100644 --- a/nomad/alloc_endpoint_test.go +++ b/nomad/alloc_endpoint_test.go @@ -484,7 +484,7 @@ func TestAllocEndpoint_GetAllocs_Blocking(t *testing.T) { } } -func TestAllocEndpoint_UpdateDesiredTransistion(t *testing.T) { +func TestAllocEndpoint_UpdateDesiredTransition(t *testing.T) { t.Parallel() require := require.New(t) @@ -501,16 +501,38 @@ func TestAllocEndpoint_UpdateDesiredTransistion(t *testing.T) { require.Nil(state.UpsertJobSummary(999, mock.JobSummary(alloc2.JobID))) require.Nil(state.UpsertAllocs(1000, []*structs.Allocation{alloc, alloc2})) - t1 := &structs.DesiredTransistion{ + t1 := &structs.DesiredTransition{ Migrate: helper.BoolToPtr(true), } // Update the allocs desired status - get := &structs.AllocUpdateDesiredTransistionRequest{ - Allocs: map[string]*structs.DesiredTransistion{ + get := &structs.AllocUpdateDesiredTransitionRequest{ + Allocs: map[string]*structs.DesiredTransition{ alloc.ID: t1, alloc2.ID: t1, }, + Evals: []*structs.Evaluation{ + { + ID: uuid.Generate(), + Namespace: alloc.Namespace, + Priority: alloc.Job.Priority, + Type: alloc.Job.Type, + TriggeredBy: structs.EvalTriggerNodeDrain, + JobID: alloc.Job.ID, + JobModifyIndex: alloc.Job.ModifyIndex, + Status: structs.EvalStatusPending, + }, + { + ID: uuid.Generate(), + Namespace: alloc2.Namespace, + Priority: alloc2.Job.Priority, + Type: alloc2.Job.Type, + TriggeredBy: structs.EvalTriggerNodeDrain, + JobID: alloc2.Job.ID, + JobModifyIndex: alloc2.Job.ModifyIndex, + Status: structs.EvalStatusPending, + }, + }, WriteRequest: structs.WriteRequest{ Region: "global", }, @@ -518,14 +540,14 @@ func TestAllocEndpoint_UpdateDesiredTransistion(t *testing.T) { // Try without permissions var resp structs.GenericResponse - err := msgpackrpc.CallWithCodec(codec, "Alloc.UpdateDesiredTransistion", get, &resp) + err := msgpackrpc.CallWithCodec(codec, "Alloc.UpdateDesiredTransition", get, &resp) require.NotNil(err) require.True(structs.IsErrPermissionDenied(err)) // Try with permissions get.WriteRequest.AuthToken = s1.getLeaderAcl() var resp2 structs.GenericResponse - require.Nil(msgpackrpc.CallWithCodec(codec, "Alloc.UpdateDesiredTransistion", get, &resp2)) + require.Nil(msgpackrpc.CallWithCodec(codec, "Alloc.UpdateDesiredTransition", get, &resp2)) require.NotZero(resp2.Index) // Look up the allocations @@ -533,9 +555,15 @@ func TestAllocEndpoint_UpdateDesiredTransistion(t *testing.T) { require.Nil(err) out2, err := state.AllocByID(nil, alloc.ID) require.Nil(err) + e1, err := state.EvalByID(nil, get.Evals[0].ID) + require.Nil(err) + e2, err := state.EvalByID(nil, get.Evals[1].ID) + require.Nil(err) - require.NotNil(out1.DesiredTransistion.Migrate) - require.NotNil(out2.DesiredTransistion.Migrate) - require.True(*out1.DesiredTransistion.Migrate) - require.True(*out2.DesiredTransistion.Migrate) + require.NotNil(out1.DesiredTransition.Migrate) + require.NotNil(out2.DesiredTransition.Migrate) + require.NotNil(e1) + require.NotNil(e2) + require.True(*out1.DesiredTransition.Migrate) + require.True(*out2.DesiredTransition.Migrate) } diff --git a/nomad/drain.go b/nomad/drain.go index 01732db1448a..f0e1dd59b89f 100644 --- a/nomad/drain.go +++ b/nomad/drain.go @@ -8,6 +8,7 @@ import ( "time" memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" @@ -54,20 +55,17 @@ func makeTaskGroupKey(a *structs.Allocation) string { // stopAllocs tracks allocs to drain by a unique TG key type stopAllocs struct { - allocBatch []*structs.Allocation + allocBatch map[string]*structs.DesiredTransition // namespace+jobid -> Job jobBatch map[jobKey]*structs.Job } -//FIXME this method does an awful lot func (s *stopAllocs) add(j *structs.Job, a *structs.Allocation) { - // Update the allocation - a.ModifyTime = time.Now().UnixNano() - a.DesiredStatus = structs.AllocDesiredStatusStop - - // Add alloc to the allocation batch - s.allocBatch = append(s.allocBatch, a) + // Add the desired migration transition to the batch + s.allocBatch[a.ID] = &structs.DesiredTransition{ + Migrate: helper.BoolToPtr(true), + } // Add job to the job batch s.jobBatch[jobKey{a.Namespace, a.JobID}] = j @@ -204,6 +202,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // track number of allocs left on this node to be drained allocsLeft := false + deadlineReached := node.DrainStrategy.DeadlineTime().Before(now) for _, alloc := range allocs { jobkey := jobKey{alloc.Namespace, alloc.JobID} @@ -224,13 +223,6 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { panic(err) } - // Don't bother collecting system jobs - if job.Type == structs.JobTypeSystem { - skipJob[jobkey] = struct{}{} - s.logger.Printf("[TRACE] nomad.drain: skipping system job %s", job.Name) - continue - } - // If alloc isn't yet terminal this node has // allocs left to be drained if !alloc.TerminalStatus() { @@ -240,9 +232,10 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { } } - // Don't bother collecting batch jobs for nodes that haven't hit their deadline - if job.Type == structs.JobTypeBatch && node.DrainStrategy.DeadlineTime().After(now) { - s.logger.Printf("[TRACE] nomad.drain: not draining batch job %s because deadline isn't for %s", job.Name, node.DrainStrategy.DeadlineTime().Sub(now)) + // Don't bother collecting system/batch jobs for nodes that haven't hit their deadline + if job.Type != structs.JobTypeService && !deadlineReached { + s.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s", + job.Type, job.Name, node.DrainStrategy.DeadlineTime().Sub(now)) skipJob[jobkey] = struct{}{} continue } @@ -273,26 +266,21 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { jobWatcher.watch(jobkey, nodeID) } - // if node has no allocs, it's done draining! - if !allocsLeft { - s.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain", nodeID) + // if node has no allocs or has hit its deadline, it's done draining! + if !allocsLeft || deadlineReached { + s.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain or has reached deadline", nodeID) jobWatcher.nodeDone(nodeID) - delete(nodes, nodeID) doneNodes[nodeID] = node } } - // stoplist are the allocations to stop and their jobs to emit + // stoplist are the allocations to migrate and their jobs to emit // evaluations for stoplist := &stopAllocs{ - allocBatch: make([]*structs.Allocation, 0, len(drainable)), + allocBatch: make(map[string]*structs.DesiredTransition), jobBatch: make(map[jobKey]*structs.Job), } - // deadlineNodes is a map of node IDs that have reached their - // deadline and allocs that will be stopped due to deadline - deadlineNodes := map[string]int{} - // build drain list considering deadline & max_parallel for _, drainingJob := range drainable { for _, alloc := range drainingJob.allocs { @@ -315,14 +303,13 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { stoplist.add(drainingJob.job, alloc) upPerTG[tgKey]-- - deadlineNodes[node.ID]++ continue } - // Batch jobs are only stopped when the node - // deadline is reached which has already been - // done. - if drainingJob.job.Type == structs.JobTypeBatch { + // Batch/System jobs are only stopped when the + // node deadline is reached which has already + // been done. + if drainingJob.job.Type != structs.JobTypeService { continue } @@ -360,32 +347,9 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { } } - // log drains due to node deadlines - for nodeID, remaining := range deadlineNodes { - s.logger.Printf("[DEBUG] nomad.drain: node %s drain deadline reached; stopping %d remaining allocs", nodeID, remaining) - jobWatcher.nodeDone(nodeID) - } - if len(stoplist.allocBatch) > 0 { s.logger.Printf("[DEBUG] nomad.drain: stopping %d alloc(s) for %d job(s)", len(stoplist.allocBatch), len(stoplist.jobBatch)) - // Stop allocs in stoplist and add them to drainingAllocs + prevAllocWatcher - batch := &structs.AllocUpdateRequest{ - Alloc: stoplist.allocBatch, - WriteRequest: structs.WriteRequest{Region: s.config.Region}, - } - - // Commit this update via Raft - //TODO Not the right request - _, index, err := s.raftApply(structs.AllocClientUpdateRequestType, batch) - if err != nil { - //FIXME - panic(err) - } - - //TODO i bet there's something useful to do with this index - _ = index - // Reevaluate affected jobs evals := make([]*structs.Evaluation, 0, len(stoplist.jobBatch)) for _, job := range stoplist.jobBatch { @@ -401,17 +365,23 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { }) } - evalUpdate := &structs.EvalUpdateRequest{ + // Send raft request + batch := &structs.AllocUpdateDesiredTransitionRequest{ + Allocs: stoplist.allocBatch, Evals: evals, WriteRequest: structs.WriteRequest{Region: s.config.Region}, } - // Commit this evaluation via Raft - _, _, err = s.raftApply(structs.EvalUpdateRequestType, evalUpdate) + // Commit this update via Raft + //TODO Not the right request + _, index, err := s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, batch) if err != nil { //FIXME panic(err) } + + //TODO i bet there's something useful to do with this index + _ = index } // Unset drain for nodes done draining @@ -429,6 +399,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { panic(err) } s.logger.Printf("[INFO] nomad.drain: node %s (%s) completed draining", nodeID, node.Name) + delete(nodes, nodeID) } } } @@ -529,8 +500,7 @@ func (n *nodeWatcher) queryNodeDrain(ws memdb.WatchSet, state *state.StateStore) return nil, 0, err } - //FIXME initial cap? - resp := make([]*structs.Node, 0, 1) + resp := make([]*structs.Node, 0, 8) for { raw := iter.Next() diff --git a/nomad/drain_test.go b/nomad/drain_test.go index e611fbdee2cb..0b343549e7ce 100644 --- a/nomad/drain_test.go +++ b/nomad/drain_test.go @@ -15,6 +15,8 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" "github.com/hashicorp/nomad/testutil/rpcapi" + "github.com/kr/pretty" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -188,9 +190,16 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Errorf("failed waiting for all allocs to migrate: %v", err) }) + node1, err := rpc.NodeGet(c1.NodeID()) + assert := assert.New(t) + require.Nil(err) + assert.False(node1.Node.Drain) + assert.Nil(node1.Node.DrainStrategy) + assert.Equal(structs.NodeSchedulingIneligible, node1.Node.SchedulingEligibility) + jobs, err := rpc.JobList() require.Nil(err) - t.Logf("%d jobs", len(jobs.Jobs)) + t.Logf("--> %d jobs", len(jobs.Jobs)) for _, job := range jobs.Jobs { t.Logf("job: %s status: %s %s", job.Name, job.Status, job.StatusDescription) } @@ -211,8 +220,9 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { panic("unreachable") }) - t.Logf("%d allocs", len(allocs)) + t.Logf("--> %d allocs", len(allocs)) for _, alloc := range allocs { - t.Logf("job: %s node: %s alloc: %s desired: %s actual: %s replaces: %s", alloc.Job.Name, alloc.NodeID[:6], alloc.ID, alloc.DesiredStatus, alloc.ClientStatus, alloc.PreviousAllocation) + t.Logf("job: %s node: %s alloc: %s desired_status: %s desired_transition: %s actual: %s replaces: %s", + alloc.Job.Name, alloc.NodeID[:6], alloc.ID[:6], alloc.DesiredStatus, pretty.Sprint(alloc.DesiredTransition.Migrate), alloc.ClientStatus, alloc.PreviousAllocation) } } diff --git a/nomad/fsm.go b/nomad/fsm.go index a1d9113cada2..c8babc50ddb2 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -240,7 +240,7 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyUpsertNodeEvent(buf[1:], log.Index) case structs.JobBatchDeregisterRequestType: return n.applyBatchDeregisterJob(buf[1:], log.Index) - case structs.AllocUpdateDesiredTransistionRequestType: + case structs.AllocUpdateDesiredTransitionRequestType: return n.applyAllocUpdateDesiredTransition(buf[1:], log.Index) } @@ -653,17 +653,22 @@ func (n *nomadFSM) applyAllocClientUpdate(buf []byte, index uint64) interface{} return nil } -// applyAllocUpdateDesiredTransition is used to update the desired transistions +// applyAllocUpdateDesiredTransition is used to update the desired transitions // of a set of allocations. func (n *nomadFSM) applyAllocUpdateDesiredTransition(buf []byte, index uint64) interface{} { - defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update_desired_transistion"}, time.Now()) - var req structs.AllocUpdateDesiredTransistionRequest + defer metrics.MeasureSince([]string{"nomad", "fsm", "alloc_update_desired_transition"}, time.Now()) + var req structs.AllocUpdateDesiredTransitionRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpdateAllocsDesiredTransistions(index, req.Allocs); err != nil { - n.logger.Printf("[ERR] nomad.fsm: UpdateAllocsDesiredTransistions failed: %v", err) + if err := n.state.UpdateAllocsDesiredTransitions(index, req.Allocs, req.Evals); err != nil { + n.logger.Printf("[ERR] nomad.fsm: UpdateAllocsDesiredTransitions failed: %v", err) + return err + } + + if err := n.upsertEvals(index, req.Evals); err != nil { + n.logger.Printf("[ERR] nomad.fsm: AllocUpdateDesiredTransition failed to upsert %d eval(s): %v", len(req.Evals), err) return err } return nil diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index a04f1cd2f1c1..a61a9e84fa49 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -12,6 +12,7 @@ import ( "github.com/google/go-cmp/cmp" memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" @@ -1241,7 +1242,7 @@ func TestFSM_UpdateAllocFromClient(t *testing.T) { require.Equal(eval, res) } -func TestFSM_UpdateAllocDesiredTransistion(t *testing.T) { +func TestFSM_UpdateAllocDesiredTransition(t *testing.T) { t.Parallel() fsm := testFSM(t) state := fsm.State() @@ -1254,17 +1255,28 @@ func TestFSM_UpdateAllocDesiredTransistion(t *testing.T) { state.UpsertJobSummary(9, mock.JobSummary(alloc.JobID)) state.UpsertAllocs(10, []*structs.Allocation{alloc, alloc2}) - t1 := &structs.DesiredTransistion{ + t1 := &structs.DesiredTransition{ Migrate: helper.BoolToPtr(true), } - req := structs.AllocUpdateDesiredTransistionRequest{ - Allocs: map[string]*structs.DesiredTransistion{ + eval := &structs.Evaluation{ + ID: uuid.Generate(), + Namespace: alloc.Namespace, + Priority: alloc.Job.Priority, + Type: alloc.Job.Type, + TriggeredBy: structs.EvalTriggerNodeDrain, + JobID: alloc.Job.ID, + JobModifyIndex: alloc.Job.ModifyIndex, + Status: structs.EvalStatusPending, + } + req := structs.AllocUpdateDesiredTransitionRequest{ + Allocs: map[string]*structs.DesiredTransition{ alloc.ID: t1, alloc2.ID: t1, }, + Evals: []*structs.Evaluation{eval}, } - buf, err := structs.Encode(structs.AllocUpdateDesiredTransistionRequestType, req) + buf, err := structs.Encode(structs.AllocUpdateDesiredTransitionRequestType, req) require.Nil(err) resp := fsm.Apply(makeLog(buf)) @@ -1276,11 +1288,13 @@ func TestFSM_UpdateAllocDesiredTransistion(t *testing.T) { require.Nil(err) out2, err := fsm.State().AllocByID(ws, alloc2.ID) require.Nil(err) + _, err = fsm.State().EvalByID(ws, eval.ID) + require.Nil(err) - require.NotNil(out1.DesiredTransistion.Migrate) - require.NotNil(out2.DesiredTransistion.Migrate) - require.True(*out1.DesiredTransistion.Migrate) - require.True(*out2.DesiredTransistion.Migrate) + require.NotNil(out1.DesiredTransition.Migrate) + require.NotNil(out2.DesiredTransition.Migrate) + require.True(*out1.DesiredTransition.Migrate) + require.True(*out2.DesiredTransition.Migrate) } func TestFSM_UpsertVaultAccessor(t *testing.T) { diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 1c67327ae4ca..90af315012f5 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -644,8 +644,9 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er } copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible } else { + // When stopping a drain unset the strategy but leave the node + // ineligible for scheduling copyNode.DrainStrategy = nil - copyNode.SchedulingEligibility = structs.NodeSchedulingEligible } copyNode.ModifyIndex = index @@ -2008,15 +2009,17 @@ func (s *StateStore) upsertAllocsImpl(index uint64, allocs []*structs.Allocation return nil } -// UpdateAllocsDesiredTransistions is used to update a set of allocations -// desired transistions. -func (s *StateStore) UpdateAllocsDesiredTransistions(index uint64, allocs map[string]*structs.DesiredTransistion) error { +// UpdateAllocsDesiredTransitions is used to update a set of allocations +// desired transitions. +func (s *StateStore) UpdateAllocsDesiredTransitions(index uint64, allocs map[string]*structs.DesiredTransition, + evals []*structs.Evaluation) error { + txn := s.db.Txn(true) defer txn.Abort() // Handle each of the updated allocations - for id, transistion := range allocs { - if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transistion); err != nil { + for id, transition := range allocs { + if err := s.nestedUpdateAllocDesiredTransition(txn, index, id, transition); err != nil { return err } } @@ -2031,10 +2034,10 @@ func (s *StateStore) UpdateAllocsDesiredTransistions(index uint64, allocs map[st } // nestedUpdateAllocDesiredTransition is used to nest an update of an -// allocations desired transistion +// allocations desired transition func (s *StateStore) nestedUpdateAllocDesiredTransition( txn *memdb.Txn, index uint64, allocID string, - transistion *structs.DesiredTransistion) error { + transition *structs.DesiredTransition) error { // Look for existing alloc existing, err := txn.First("allocs", "id", allocID) @@ -2051,8 +2054,8 @@ func (s *StateStore) nestedUpdateAllocDesiredTransition( // Copy everything from the existing allocation copyAlloc := exist.Copy() - // Merge the desired transistions - copyAlloc.DesiredTransistion.Merge(transistion) + // Merge the desired transitions + copyAlloc.DesiredTransition.Merge(transition) // Update the modify index copyAlloc.ModifyIndex = index diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 4fd2173f94cf..bac9839c298f 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -3823,7 +3823,7 @@ func TestStateStore_UpdateAlloc_NoJob(t *testing.T) { } } -func TestStateStore_UpdateAllocDesiredTransistion(t *testing.T) { +func TestStateStore_UpdateAllocDesiredTransition(t *testing.T) { t.Parallel() require := require.New(t) @@ -3833,21 +3833,32 @@ func TestStateStore_UpdateAllocDesiredTransistion(t *testing.T) { require.Nil(state.UpsertJob(999, alloc.Job)) require.Nil(state.UpsertAllocs(1000, []*structs.Allocation{alloc})) - t1 := &structs.DesiredTransistion{ + t1 := &structs.DesiredTransition{ Migrate: helper.BoolToPtr(true), } - t2 := &structs.DesiredTransistion{ + t2 := &structs.DesiredTransition{ Migrate: helper.BoolToPtr(false), } + eval := &structs.Evaluation{ + ID: uuid.Generate(), + Namespace: alloc.Namespace, + Priority: alloc.Job.Priority, + Type: alloc.Job.Type, + TriggeredBy: structs.EvalTriggerNodeDrain, + JobID: alloc.Job.ID, + JobModifyIndex: alloc.Job.ModifyIndex, + Status: structs.EvalStatusPending, + } + evals := []*structs.Evaluation{eval} - m := map[string]*structs.DesiredTransistion{alloc.ID: t1} - require.Nil(state.UpdateAllocsDesiredTransistions(1001, m)) + m := map[string]*structs.DesiredTransition{alloc.ID: t1} + require.Nil(state.UpdateAllocsDesiredTransitions(1001, m, evals)) ws := memdb.NewWatchSet() out, err := state.AllocByID(ws, alloc.ID) require.Nil(err) - require.NotNil(out.DesiredTransistion.Migrate) - require.True(*out.DesiredTransistion.Migrate) + require.NotNil(out.DesiredTransition.Migrate) + require.True(*out.DesiredTransition.Migrate) require.EqualValues(1000, out.CreateIndex) require.EqualValues(1001, out.ModifyIndex) @@ -3855,14 +3866,14 @@ func TestStateStore_UpdateAllocDesiredTransistion(t *testing.T) { require.Nil(err) require.EqualValues(1001, index) - m = map[string]*structs.DesiredTransistion{alloc.ID: t2} - require.Nil(state.UpdateAllocsDesiredTransistions(1002, m)) + m = map[string]*structs.DesiredTransition{alloc.ID: t2} + require.Nil(state.UpdateAllocsDesiredTransitions(1002, m, evals)) ws = memdb.NewWatchSet() out, err = state.AllocByID(ws, alloc.ID) require.Nil(err) - require.NotNil(out.DesiredTransistion.Migrate) - require.False(*out.DesiredTransistion.Migrate) + require.NotNil(out.DesiredTransition.Migrate) + require.False(*out.DesiredTransition.Migrate) require.EqualValues(1000, out.CreateIndex) require.EqualValues(1002, out.ModifyIndex) @@ -3871,8 +3882,8 @@ func TestStateStore_UpdateAllocDesiredTransistion(t *testing.T) { require.EqualValues(1002, index) // Try with a bogus alloc id - m = map[string]*structs.DesiredTransistion{uuid.Generate(): t2} - require.Nil(state.UpdateAllocsDesiredTransistions(1003, m)) + m = map[string]*structs.DesiredTransition{uuid.Generate(): t2} + require.Nil(state.UpdateAllocsDesiredTransitions(1003, m, evals)) } func TestStateStore_JobSummary(t *testing.T) { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index e50921c27cb5..6f6a98a6fb70 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -78,7 +78,7 @@ const ( AutopilotRequestType UpsertNodeEventsType JobBatchDeregisterRequestType - AllocUpdateDesiredTransistionRequestType + AllocUpdateDesiredTransitionRequestType ) const ( @@ -574,12 +574,15 @@ type AllocUpdateRequest struct { WriteRequest } -// AllocUpdateDesiredTransistionRequest is used to submit changes to allocations -// desired transistion state. -type AllocUpdateDesiredTransistionRequest struct { +// AllocUpdateDesiredTransitionRequest is used to submit changes to allocations +// desired transition state. +type AllocUpdateDesiredTransitionRequest struct { // Allocs is the mapping of allocation ids to their desired state - // transistion - Allocs map[string]*DesiredTransistion + // transition + Allocs map[string]*DesiredTransition + + // Evals is the set of evaluations to create + Evals []*Evaluation WriteRequest } @@ -5349,10 +5352,10 @@ func (re *RescheduleEvent) Copy() *RescheduleEvent { return copy } -// DesiredTransistion is used to mark an allocation as having a desired state -// transistion. This information can be used by the scheduler to make the +// DesiredTransition is used to mark an allocation as having a desired state +// transition. This information can be used by the scheduler to make the // correct decision. -type DesiredTransistion struct { +type DesiredTransition struct { // Migrate is used to indicate that this allocation should be stopped and // migrated to another node. Migrate *bool @@ -5360,14 +5363,14 @@ type DesiredTransistion struct { // Merge merges the two desired transitions, preferring the values from the // passed in object. -func (d *DesiredTransistion) Merge(o *DesiredTransistion) { +func (d *DesiredTransition) Merge(o *DesiredTransition) { if o.Migrate != nil { d.Migrate = o.Migrate } } -// ShouldMigrate returns whether the transistion object dictates a migration. -func (d *DesiredTransistion) ShouldMigrate() bool { +// ShouldMigrate returns whether the transition object dictates a migration. +func (d *DesiredTransition) ShouldMigrate() bool { return d.Migrate != nil && *d.Migrate } @@ -5432,9 +5435,9 @@ type Allocation struct { // DesiredStatusDescription is meant to provide more human useful information DesiredDescription string - // DesiredTransistion is used to indicate that a state transistion + // DesiredTransition is used to indicate that a state transition // is desired for a given reason. - DesiredTransistion DesiredTransistion + DesiredTransition DesiredTransition // Status of the allocation on the client ClientStatus string diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index d1bbf4710334..fd677f952db3 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -2245,7 +2245,7 @@ func TestServiceSched_NodeDown(t *testing.T) { // Mark appropriate allocs for migration for i := 0; i < 7; i++ { out := allocs[i] - out.DesiredTransistion.Migrate = helper.BoolToPtr(true) + out.DesiredTransition.Migrate = helper.BoolToPtr(true) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) @@ -2367,7 +2367,7 @@ func TestServiceSched_NodeDrain(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = fmt.Sprintf("my-job.web[%d]", i) - alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) + alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) allocs = append(allocs, alloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) @@ -2453,7 +2453,7 @@ func TestServiceSched_NodeDrain_Down(t *testing.T) { for i := 0; i < 6; i++ { newAlloc := allocs[i].Copy() newAlloc.ClientStatus = structs.AllocDesiredStatusStop - newAlloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) + newAlloc.DesiredTransition.Migrate = helper.BoolToPtr(true) stop = append(stop, newAlloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), stop)) @@ -2556,7 +2556,7 @@ func TestServiceSched_NodeDrain_Queued_Allocations(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = fmt.Sprintf("my-job.web[%d]", i) - alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) + alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) allocs = append(allocs, alloc) } noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) @@ -3948,7 +3948,7 @@ func TestServiceSched_NodeDrain_Sticky(t *testing.T) { alloc.NodeID = node.ID alloc.Job.TaskGroups[0].Count = 1 alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true - alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) + alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) noErr(t, h.State.UpsertJob(h.NextIndex(), alloc.Job)) noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index a9188fa42ee5..a00471fba603 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -927,7 +927,7 @@ func TestReconciler_DrainNode(t *testing.T) { for i := 0; i < 2; i++ { n := mock.Node() n.ID = allocs[i].NodeID - allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) + allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } @@ -980,7 +980,7 @@ func TestReconciler_DrainNode_ScaleUp(t *testing.T) { for i := 0; i < 2; i++ { n := mock.Node() n.ID = allocs[i].NodeID - allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) + allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } @@ -1034,7 +1034,7 @@ func TestReconciler_DrainNode_ScaleDown(t *testing.T) { for i := 0; i < 3; i++ { n := mock.Node() n.ID = allocs[i].NodeID - allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) + allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } @@ -2216,7 +2216,7 @@ func TestReconciler_PausedOrFailedDeployment_Migrations(t *testing.T) { for i := 0; i < 3; i++ { n := mock.Node() n.ID = allocs[i].NodeID - allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) + allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } @@ -2290,7 +2290,7 @@ func TestReconciler_DrainNode_Canary(t *testing.T) { tainted := make(map[string]*structs.Node, 1) n := mock.Node() n.ID = allocs[11].NodeID - allocs[11].DesiredTransistion.Migrate = helper.BoolToPtr(true) + allocs[11].DesiredTransition.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n @@ -3030,7 +3030,7 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { n.Status = structs.NodeStatusDown } else { n.Drain = true - allocs[2+i].DesiredTransistion.Migrate = helper.BoolToPtr(true) + allocs[2+i].DesiredTransition.Migrate = helper.BoolToPtr(true) } tainted[n.ID] = n } @@ -3116,7 +3116,7 @@ func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) { n.Status = structs.NodeStatusDown } else { n.Drain = true - allocs[6+i].DesiredTransistion.Migrate = helper.BoolToPtr(true) + allocs[6+i].DesiredTransition.Migrate = helper.BoolToPtr(true) } tainted[n.ID] = n } @@ -3442,7 +3442,7 @@ func TestReconciler_TaintedNode_MultiGroups(t *testing.T) { for i := 0; i < 15; i++ { n := mock.Node() n.ID = allocs[i].NodeID - allocs[i].DesiredTransistion.Migrate = helper.BoolToPtr(true) + allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) n.Drain = true tainted[n.ID] = n } diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index fc8d619fb661..5527aecb4ffc 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -218,7 +218,7 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi if !alloc.TerminalStatus() { if n == nil || n.TerminalStatus() { lost[alloc.ID] = alloc - } else if alloc.DesiredTransistion.ShouldMigrate() { + } else if alloc.DesiredTransition.ShouldMigrate() { migrate[alloc.ID] = alloc } else { untainted[alloc.ID] = alloc diff --git a/scheduler/system_sched_test.go b/scheduler/system_sched_test.go index 7303ea1708df..3d78b7061366 100644 --- a/scheduler/system_sched_test.go +++ b/scheduler/system_sched_test.go @@ -972,7 +972,7 @@ func TestSystemSched_NodeDown(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" - alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) + alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) // Create a mock evaluation to deal with drain @@ -1101,7 +1101,7 @@ func TestSystemSched_NodeDrain(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" - alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) + alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) // Create a mock evaluation to deal with drain @@ -1415,7 +1415,7 @@ func TestSystemSched_PlanWithDrainedNode(t *testing.T) { alloc.JobID = job.ID alloc.NodeID = node.ID alloc.Name = "my-job.web[0]" - alloc.DesiredTransistion.Migrate = helper.BoolToPtr(true) + alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) alloc.TaskGroup = "web" alloc2 := mock.Alloc() diff --git a/scheduler/util.go b/scheduler/util.go index fcac79d1c87e..c0943e126380 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -111,7 +111,7 @@ func diffAllocs(job *structs.Job, taintedNodes map[string]*structs.Node, TaskGroup: tg, Alloc: exist, }) - } else if exist.DesiredTransistion.ShouldMigrate() { + } else if exist.DesiredTransition.ShouldMigrate() { result.migrate = append(result.migrate, allocTuple{ Name: name, TaskGroup: tg, diff --git a/scheduler/util_test.go b/scheduler/util_test.go index f2b339d38eff..7fde4fa65718 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -91,7 +91,7 @@ func TestDiffAllocs(t *testing.T) { NodeID: "drainNode", Name: "my-job.web[2]", Job: oldJob, - DesiredTransistion: structs.DesiredTransistion{ + DesiredTransition: structs.DesiredTransition{ Migrate: helper.BoolToPtr(true), }, }, @@ -223,7 +223,7 @@ func TestDiffSystemAllocs(t *testing.T) { NodeID: drainNode.ID, Name: "my-job.web[0]", Job: oldJob, - DesiredTransistion: structs.DesiredTransistion{ + DesiredTransition: structs.DesiredTransition{ Migrate: helper.BoolToPtr(true), }, }, diff --git a/testutil/rpcapi/rcpapi.go b/testutil/rpcapi/rcpapi.go index 71e5be057ea0..795123fdabcc 100644 --- a/testutil/rpcapi/rcpapi.go +++ b/testutil/rpcapi/rcpapi.go @@ -72,6 +72,21 @@ func (r *RPC) AllocGetAllocs(ids []string) (*structs.AllocsGetResponse, error) { return &resp, nil } +// Eval.List RPC +func (r *RPC) EvalList() (*structs.EvalListResponse, error) { + get := &structs.EvalListRequest{ + QueryOptions: structs.QueryOptions{ + Region: r.Region, + Namespace: r.Namespace, + }, + } + var resp structs.EvalListResponse + if err := msgpackrpc.CallWithCodec(r.codec, "Eval.List", get, &resp); err != nil { + return nil, err + } + return &resp, nil +} + // Job.List RPC func (r *RPC) JobList() (*structs.JobListResponse, error) { get := &structs.JobListRequest{ @@ -112,3 +127,16 @@ func (r *RPC) NodeGetAllocs(nodeID string) (*structs.NodeAllocsResponse, error) } return &resp, nil } + +// Node.GetNode RPC +func (r *RPC) NodeGet(nodeID string) (*structs.SingleNodeResponse, error) { + get := &structs.NodeSpecificRequest{ + NodeID: nodeID, + QueryOptions: structs.QueryOptions{Region: r.Region}, + } + var resp structs.SingleNodeResponse + if err := msgpackrpc.CallWithCodec(r.codec, "Node.GetNode", get, &resp); err != nil { + return nil, err + } + return &resp, nil +} From a466f97cbafce2c720c1732b49d3301848a1ad1d Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 23 Feb 2018 16:45:57 -0800 Subject: [PATCH 07/79] scheduler: migrate non-terminal migrating allocs filterByTainted node should always migrate non-terminal migrating allocs --- scheduler/reconcile_util.go | 36 ++++++------ scheduler/reconcile_util_test.go | 99 ++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 18 deletions(-) diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 5527aecb4ffc..a7b0b814120f 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -199,33 +199,33 @@ func (a allocSet) filterByTainted(nodes map[string]*structs.Node) (untainted, mi migrate = make(map[string]*structs.Allocation) lost = make(map[string]*structs.Allocation) for _, alloc := range a { - n, ok := nodes[alloc.NodeID] - if !ok { + // Terminal allocs are always untainted as they should never be migrated + if alloc.TerminalStatus() { untainted[alloc.ID] = alloc continue } - // If the job is batch and finished successfully, the fact that the - // node is tainted does not mean it should be migrated or marked as - // lost as the work was already successfully finished. However for - // service/system jobs, tasks should never complete. The check of - // batch type, defends against client bugs. - if alloc.Job.Type == structs.JobTypeBatch && alloc.RanSuccessfully() { - untainted[alloc.ID] = alloc + // Non-terminal allocs that should migrate should always migrate + if alloc.DesiredTransition.ShouldMigrate() { + migrate[alloc.ID] = alloc continue } - if !alloc.TerminalStatus() { - if n == nil || n.TerminalStatus() { - lost[alloc.ID] = alloc - } else if alloc.DesiredTransition.ShouldMigrate() { - migrate[alloc.ID] = alloc - } else { - untainted[alloc.ID] = alloc - } - } else { + n, ok := nodes[alloc.NodeID] + if !ok { + // Node is untainted so alloc is untainted untainted[alloc.ID] = alloc + continue + } + + // Allocs on GC'd (nil) or lost nodes are Lost + if n == nil || n.TerminalStatus() { + lost[alloc.ID] = alloc + continue } + + // All other allocs are untainted + untainted[alloc.ID] = alloc } return } diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go index 3b45a55ed6d5..6d85dfb811ed 100644 --- a/scheduler/reconcile_util_test.go +++ b/scheduler/reconcile_util_test.go @@ -3,7 +3,9 @@ package scheduler import ( "testing" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" + "github.com/stretchr/testify/require" ) // Test that we properly create the bitmap even when the alloc set includes an @@ -29,3 +31,100 @@ func TestBitmapFrom(t *testing.T) { t.Fatalf("got %d; want %d", act, exp) } } + +func TestAllocSet_filterByTainted(t *testing.T) { + require := require.New(t) + + nodes := map[string]*structs.Node{ + "draining": &structs.Node{ + ID: "draining", + Drain: true, + }, + "lost": &structs.Node{ + ID: "lost", + Status: structs.NodeStatusDown, + }, + "nil": nil, + "normal": &structs.Node{ + ID: "normal", + Status: structs.NodeStatusReady, + }, + } + + batchJob := &structs.Job{ + Type: structs.JobTypeBatch, + } + + allocs := allocSet{ + // Non-terminal alloc with migrate=true should migrate on a draining node + "migrating1": { + ID: "migrating1", + ClientStatus: structs.AllocClientStatusRunning, + DesiredTransition: structs.DesiredTransition{helper.BoolToPtr(true)}, + Job: batchJob, + NodeID: "draining", + }, + // Non-terminal alloc with migrate=true should migrate on an unknown node + "migrating2": { + ID: "migrating2", + ClientStatus: structs.AllocClientStatusRunning, + DesiredTransition: structs.DesiredTransition{helper.BoolToPtr(true)}, + Job: batchJob, + NodeID: "nil", + }, + "untainted1": { + ID: "untainted1", + ClientStatus: structs.AllocClientStatusRunning, + Job: batchJob, + NodeID: "normal", + }, + // Terminal allocs are always untainted + "untainted2": { + ID: "untainted2", + ClientStatus: structs.AllocClientStatusComplete, + Job: batchJob, + NodeID: "normal", + }, + // Terminal allocs are always untainted, even on draining nodes + "untainted3": { + ID: "untainted3", + ClientStatus: structs.AllocClientStatusComplete, + Job: batchJob, + NodeID: "draining", + }, + // Terminal allocs are always untainted, even on lost nodes + "untainted4": { + ID: "untainted4", + ClientStatus: structs.AllocClientStatusComplete, + Job: batchJob, + NodeID: "lost", + }, + // Non-terminal allocs on lost nodes are lost + "lost1": { + ID: "lost1", + ClientStatus: structs.AllocClientStatusPending, + Job: batchJob, + NodeID: "lost", + }, + // Non-terminal allocs on lost nodes are lost + "lost2": { + ID: "lost2", + ClientStatus: structs.AllocClientStatusRunning, + Job: batchJob, + NodeID: "lost", + }, + } + + untainted, migrate, lost := allocs.filterByTainted(nodes) + require.Len(untainted, 4) + require.Contains(untainted, "untainted1") + require.Contains(untainted, "untainted2") + require.Contains(untainted, "untainted3") + require.Contains(untainted, "untainted4") + require.Len(migrate, 2) + require.Contains(migrate, "migrating1") + require.Contains(migrate, "migrating2") + require.Len(lost, 2) + require.Contains(lost, "lost1") + require.Contains(lost, "lost2") +} From 116c28c77c42dcb036fce2610c5e4b56dcf3ef35 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 26 Feb 2018 15:01:27 -0800 Subject: [PATCH 08/79] improve drain fsm/statestore tests --- nomad/fsm_test.go | 4 +- nomad/state/state_store_test.go | 81 ++++++++++++++++++++------------- 2 files changed, 52 insertions(+), 33 deletions(-) diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index a61a9e84fa49..90ba21f14e8e 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -1288,8 +1288,10 @@ func TestFSM_UpdateAllocDesiredTransition(t *testing.T) { require.Nil(err) out2, err := fsm.State().AllocByID(ws, alloc2.ID) require.Nil(err) - _, err = fsm.State().EvalByID(ws, eval.ID) + evalOut, err := fsm.State().EvalByID(ws, eval.ID) require.Nil(err) + require.NotNil(evalOut) + require.Equal(eval.ID, evalOut.ID) require.NotNil(out1.DesiredTransition.Migrate) require.NotNil(out2.DesiredTransition.Migrate) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index bac9839c298f..73c86bbd9f59 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -699,52 +699,69 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) { } func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { + require := require.New(t) state := testStateStore(t) node := mock.Node() - err := state.UpsertNode(1000, node) - if err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(state.UpsertNode(1000, node)) // Create a watchset so we can test that update node drain fires the watch ws := memdb.NewWatchSet() - if _, err := state.NodeByID(ws, node.ID); err != nil { - t.Fatalf("bad: %v", err) - } - err = state.UpdateNodeDrain(1001, node.ID, true) - if err != nil { - t.Fatalf("err: %v", err) - } + // Assert initial node state + { + out, err := state.NodeByID(ws, node.ID) + require.Nil(err) - if !watchFired(ws) { - t.Fatalf("bad") + require.False(out.Drain) + require.Nil(out.DrainStrategy) + require.Equal(structs.NodeSchedulingEligible, out.SchedulingEligibility) + if out.ModifyIndex != 1000 { + t.Fatalf("expected ModifyIndex=1000, found %d", out.ModifyIndex) + } } - ws = memdb.NewWatchSet() - out, err := state.NodeByID(ws, node.ID) - if err != nil { - t.Fatalf("err: %v", err) - } + // Start draining + { + require.Nil(state.UpdateNodeDrain(1001, node.ID, true)) + require.True(watchFired(ws)) - if !out.Drain { - t.Fatalf("bad: %#v", out) - } - if out.ModifyIndex != 1001 { - t.Fatalf("bad: %#v", out) - } + ws = memdb.NewWatchSet() + out, err := state.NodeByID(ws, node.ID) + require.Nil(err) - index, err := state.Index("nodes") - if err != nil { - t.Fatalf("err: %v", err) - } - if index != 1001 { - t.Fatalf("bad: %d", index) + require.True(out.Drain) + require.NotNil(out.DrainStrategy) + require.Equal(structs.NodeSchedulingIneligible, out.SchedulingEligibility) + if out.ModifyIndex != 1001 { + t.Fatalf("expected ModifyIndex=1001, found %d", out.ModifyIndex) + } + + index, err := state.Index("nodes") + require.Nil(err) + if index != 1001 { + t.Fatalf("expected index=1001, found %d", index) + } + + require.False(watchFired(ws)) } - if watchFired(ws) { - t.Fatalf("bad") + // Stop draining (no need to retest watch behavior) + { + require.Nil(state.UpdateNodeDrain(1002, node.ID, false)) + + out, err := state.NodeByID(nil, node.ID) + require.Nil(err) + + require.False(out.Drain) + require.Nil(out.DrainStrategy) + if out.ModifyIndex != 1002 { + t.Fatalf("expected ModifyIndex=1002, found %d", out.ModifyIndex) + } + + // Scheduling eligibility should *not* flip back to eligible after + // draining stops. + require.Equal(structs.NodeSchedulingIneligible, out.SchedulingEligibility) } } From 1773de9e30b6ac9be6e3a839154b27097c8b655f Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 23 Feb 2018 10:42:43 -0800 Subject: [PATCH 09/79] Node.Drain takes strategy --- nomad/drain_test.go | 5 +- nomad/fsm.go | 2 +- nomad/fsm_test.go | 34 +++++----- nomad/mock/mock.go | 5 ++ nomad/node_endpoint.go | 21 ++----- nomad/node_endpoint_test.go | 107 +++++++++++++++----------------- nomad/state/state_store.go | 21 +++---- nomad/state/state_store_test.go | 73 ++++++---------------- nomad/structs/structs.go | 33 +++++++--- 9 files changed, 131 insertions(+), 170 deletions(-) diff --git a/nomad/drain_test.go b/nomad/drain_test.go index 0b343549e7ce..c47e0d401548 100644 --- a/nomad/drain_test.go +++ b/nomad/drain_test.go @@ -143,9 +143,12 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { // Start draining node 1 //FIXME update drain rpc to skip fsm manipulation and use api + strategy := &structs.DrainStrategy{ + Deadline: -1 * time.Second, + } node, err := state.NodeByID(nil, c1.NodeID()) require.Nil(err) - require.Nil(state.UpdateNodeDrain(node.ModifyIndex+1, node.ID, true)) + require.Nil(state.UpdateNodeDrain(node.ModifyIndex+1, node.ID, strategy, 101)) // Start node 2 c2 := client.TestClient(t, func(conf *config.Config) { diff --git a/nomad/fsm.go b/nomad/fsm.go index c8babc50ddb2..58d1527514a1 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -328,7 +328,7 @@ func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpdateNodeDrain(index, req.NodeID, req.Drain); err != nil { + if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy, req.UpdateTime); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) return err } diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 90ba21f14e8e..b9fc4845da82 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -280,6 +280,7 @@ func TestFSM_UpdateNodeStatus(t *testing.T) { func TestFSM_UpdateNodeDrain(t *testing.T) { t.Parallel() + require := require.New(t) fsm := testFSM(t) node := mock.Node() @@ -287,38 +288,31 @@ func TestFSM_UpdateNodeDrain(t *testing.T) { Node: node, } buf, err := structs.Encode(structs.NodeRegisterRequestType, req) - if err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(err) resp := fsm.Apply(makeLog(buf)) - if resp != nil { - t.Fatalf("resp: %v", resp) - } + require.Nil(resp) + strategy := &structs.DrainStrategy{ + Deadline: 10 * time.Second, + } req2 := structs.NodeUpdateDrainRequest{ - NodeID: node.ID, - Drain: true, + NodeID: node.ID, + DrainStrategy: strategy, + UpdateTime: 101, } buf, err = structs.Encode(structs.NodeUpdateDrainRequestType, req2) - if err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(err) resp = fsm.Apply(makeLog(buf)) - if resp != nil { - t.Fatalf("resp: %v", resp) - } + require.Nil(resp) // Verify we are NOT registered ws := memdb.NewWatchSet() node, err = fsm.State().NodeByID(ws, req.Node.ID) - if err != nil { - t.Fatalf("err: %v", err) - } - if !node.Drain { - t.Fatalf("bad node: %#v", node) - } + require.Nil(err) + require.True(node.Drain) + require.Equal(node.DrainStrategy, strategy) } func TestFSM_RegisterJob(t *testing.T) { diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 6c2a3f42e0a3..aef9c475f011 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -196,6 +196,10 @@ func SystemJob() *structs.Job { Delay: 1 * time.Minute, Mode: structs.RestartPolicyModeDelay, }, + ReschedulePolicy: &structs.ReschedulePolicy{ + Attempts: 2, + Interval: 10 * time.Minute, + }, EphemeralDisk: structs.DefaultEphemeralDisk(), Tasks: []*structs.Task{ { @@ -240,6 +244,7 @@ func PeriodicJob() *structs.Job { Spec: "*/30 * * * *", } job.Status = structs.JobStatusRunning + job.TaskGroups[0].Migrate = nil return job } diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 182817392bdf..2631939ad13d 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -434,28 +434,15 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, } // Update the timestamp to - node.StatusUpdatedAt = time.Now().Unix() + args.UpdateTime = time.Now().Unix() // Commit this update via Raft - var index uint64 - if node.Drain != args.Drain { - _, index, err = n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) - if err != nil { - n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err) - return err - } - reply.NodeModifyIndex = index - } - - // Always attempt to create Node evaluations because there may be a System - // job registered that should be evaluated. - evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) + _, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) if err != nil { - n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) + n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err) return err } - reply.EvalIDs = evalIDs - reply.EvalCreateIndex = evalIndex + reply.NodeModifyIndex = index // Set the reply index reply.Index = index diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 375ca8731cb3..0de46ed22c61 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -747,6 +747,7 @@ func TestClientEndpoint_UpdateStatus_HeartbeatOnly_Advertise(t *testing.T) { func TestClientEndpoint_UpdateDrain(t *testing.T) { t.Parallel() + require := require.New(t) s1 := TestServer(t, nil) defer s1.Shutdown() codec := rpcClient(t, s1) @@ -761,34 +762,29 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { // Fetch the response var resp structs.NodeUpdateResponse - if err := msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp); err != nil { - t.Fatalf("err: %v", err) + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp)) + + strategy := &structs.DrainStrategy{ + Deadline: 10 * time.Second, } // Update the status dereg := &structs.NodeUpdateDrainRequest{ - NodeID: node.ID, - Drain: true, - WriteRequest: structs.WriteRequest{Region: "global"}, + NodeID: node.ID, + DrainStrategy: strategy, + WriteRequest: structs.WriteRequest{Region: "global"}, } var resp2 structs.NodeDrainUpdateResponse - if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2); err != nil { - t.Fatalf("err: %v", err) - } - if resp2.Index == 0 { - t.Fatalf("bad index: %d", resp2.Index) - } + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2)) + require.NotZero(resp2.Index) // Check for the node in the FSM state := s1.fsm.State() ws := memdb.NewWatchSet() out, err := state.NodeByID(ws, node.ID) - if err != nil { - t.Fatalf("err: %v", err) - } - if !out.Drain { - t.Fatalf("bad: %#v", out) - } + require.Nil(err) + require.True(out.Drain) + require.Equal(strategy, out.DrainStrategy) } func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { @@ -797,13 +793,13 @@ func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { defer s1.Shutdown() codec := rpcClient(t, s1) testutil.WaitForLeader(t, s1.RPC) - assert := assert.New(t) + require := require.New(t) // Create the node node := mock.Node() state := s1.fsm.State() - assert.Nil(state.UpsertNode(1, node), "UpsertNode") + require.Nil(state.UpsertNode(1, node), "UpsertNode") // Create the policy and tokens validToken := mock.CreatePolicyAndToken(t, state, 1001, "test-valid", mock.NodePolicy(acl.PolicyWrite)) @@ -811,22 +807,24 @@ func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { // Update the status without a token and expect failure dereg := &structs.NodeUpdateDrainRequest{ - NodeID: node.ID, - Drain: true, + NodeID: node.ID, + DrainStrategy: &structs.DrainStrategy{ + Deadline: 10 * time.Second, + }, WriteRequest: structs.WriteRequest{Region: "global"}, } { var resp structs.NodeDrainUpdateResponse err := msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp) - assert.NotNil(err, "RPC") - assert.Equal(err.Error(), structs.ErrPermissionDenied.Error()) + require.NotNil(err, "RPC") + require.Equal(err.Error(), structs.ErrPermissionDenied.Error()) } // Try with a valid token dereg.AuthToken = validToken.SecretID { var resp structs.NodeDrainUpdateResponse - assert.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp), "RPC") + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp), "RPC") } // Try with a invalid token @@ -834,15 +832,15 @@ func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { { var resp structs.NodeDrainUpdateResponse err := msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp) - assert.NotNil(err, "RPC") - assert.Equal(err.Error(), structs.ErrPermissionDenied.Error()) + require.NotNil(err, "RPC") + require.Equal(err.Error(), structs.ErrPermissionDenied.Error()) } // Try with a root token dereg.AuthToken = root.SecretID { var resp structs.NodeDrainUpdateResponse - assert.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp), "RPC") + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp), "RPC") } } @@ -854,6 +852,7 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { defer s1.Shutdown() codec := rpcClient(t, s1) testutil.WaitForLeader(t, s1.RPC) + require := require.New(t) // Register a node node := mock.Node() @@ -863,9 +862,7 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { } // Fetch the response var resp structs.NodeUpdateResponse - if err := msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp); err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp)) // Register a service job var jobResp structs.JobRegisterResponse @@ -878,15 +875,12 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { Namespace: job.Namespace, }, } - if err := msgpackrpc.CallWithCodec(codec, "Job.Register", jobReq, &jobResp); err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Register", jobReq, &jobResp)) // Register a system job var jobResp1 structs.JobRegisterResponse - job1 := mock.Job() + job1 := mock.SystemJob() job1.TaskGroups[0].Count = 1 - job1.Type = structs.JobTypeSystem jobReq1 := &structs.JobRegisterRequest{ Job: job1, WriteRequest: structs.WriteRequest{ @@ -894,9 +888,7 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { Namespace: job1.Namespace, }, } - if err := msgpackrpc.CallWithCodec(codec, "Job.Register", jobReq1, &jobResp1); err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Register", jobReq1, &jobResp1)) // Wait for the scheduler to create an allocation testutil.WaitForResult(func() (bool, error) { @@ -916,14 +908,14 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { // Drain the node dereg := &structs.NodeUpdateDrainRequest{ - NodeID: node.ID, - Drain: true, + NodeID: node.ID, + DrainStrategy: &structs.DrainStrategy{ + Deadline: -1 * time.Second, + }, WriteRequest: structs.WriteRequest{Region: "global"}, } var resp2 structs.NodeDrainUpdateResponse - if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2); err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", dereg, &resp2)) // Mark the node as down node.Status = structs.NodeStatusDown @@ -931,9 +923,7 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { Node: node, WriteRequest: structs.WriteRequest{Region: "global"}, } - if err := msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp); err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp)) // Ensure that the allocation has transitioned to lost testutil.WaitForResult(func() (bool, error) { @@ -956,7 +946,7 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { ModifyIndex: summary.ModifyIndex, } if !reflect.DeepEqual(summary, expectedSummary) { - return false, fmt.Errorf("expected: %#v, actual: %#v", expectedSummary, summary) + return false, fmt.Errorf("Service: expected: %#v, actual: %#v", expectedSummary, summary) } summary1, err := s1.fsm.state.JobSummaryByID(ws, job1.Namespace, job1.ID) @@ -976,7 +966,7 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { ModifyIndex: summary1.ModifyIndex, } if !reflect.DeepEqual(summary1, expectedSummary1) { - return false, fmt.Errorf("expected: %#v, actual: %#v", expectedSummary1, summary1) + return false, fmt.Errorf("System: expected: %#v, actual: %#v", expectedSummary1, summary1) } return true, nil }, func(err error) { @@ -2378,7 +2368,10 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { // Node drain updates trigger watches. time.AfterFunc(100*time.Millisecond, func() { - if err := state.UpdateNodeDrain(3, node.ID, true); err != nil { + s := &structs.DrainStrategy{ + Deadline: 10 * time.Second, + } + if err := state.UpdateNodeDrain(3, node.ID, s, 101); err != nil { t.Fatalf("err: %v", err) } }) @@ -2402,12 +2395,12 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { // Node status update triggers watches time.AfterFunc(100*time.Millisecond, func() { - if err := state.UpdateNodeStatus(4, node.ID, structs.NodeStatusDown); err != nil { + if err := state.UpdateNodeStatus(40, node.ID, structs.NodeStatusDown); err != nil { t.Fatalf("err: %v", err) } }) - req.MinQueryIndex = 3 + req.MinQueryIndex = 38 var resp3 structs.NodeListResponse start = time.Now() if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp3); err != nil { @@ -2417,8 +2410,8 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { if elapsed := time.Since(start); elapsed < 100*time.Millisecond { t.Fatalf("should block (returned in %s) %#v", elapsed, resp3) } - if resp3.Index != 4 { - t.Fatalf("Bad index: %d %d", resp3.Index, 4) + if resp3.Index != 40 { + t.Fatalf("Bad index: %d %d", resp3.Index, 40) } if len(resp3.Nodes) != 1 || resp3.Nodes[0].Status != structs.NodeStatusDown { t.Fatalf("bad: %#v", resp3.Nodes) @@ -2426,12 +2419,12 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { // Node delete triggers watches. time.AfterFunc(100*time.Millisecond, func() { - if err := state.DeleteNode(5, node.ID); err != nil { + if err := state.DeleteNode(50, node.ID); err != nil { t.Fatalf("err: %v", err) } }) - req.MinQueryIndex = 4 + req.MinQueryIndex = 45 var resp4 structs.NodeListResponse start = time.Now() if err := msgpackrpc.CallWithCodec(codec, "Node.List", req, &resp4); err != nil { @@ -2441,8 +2434,8 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { if elapsed := time.Since(start); elapsed < 100*time.Millisecond { t.Fatalf("should block (returned in %s) %#v", elapsed, resp4) } - if resp4.Index != 5 { - t.Fatalf("Bad index: %d %d", resp4.Index, 5) + if resp4.Index != 50 { + t.Fatalf("Bad index: %d %d", resp4.Index, 50) } if len(resp4.Nodes) != 0 { t.Fatalf("bad: %#v", resp4.Nodes) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 90af315012f5..45c595d04b36 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -6,7 +6,6 @@ import ( "io" "log" "sort" - "time" "github.com/hashicorp/go-memdb" multierror "github.com/hashicorp/go-multierror" @@ -617,7 +616,9 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error } // UpdateNodeDrain is used to update the drain of a node -func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) error { +func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, + drain *structs.DrainStrategy, updateTime int64) error { + txn := s.db.Txn(true) defer txn.Abort() @@ -635,20 +636,18 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain bool) er copyNode := existingNode.Copy() // Update the drain in the copy - copyNode.Drain = drain - //FIXME - if drain { - copyNode.DrainStrategy = &structs.DrainStrategy{ - StartTime: time.Now().UnixNano(), - Deadline: 10 * time.Second, - } - copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible - } else { + copyNode.Drain = drain != nil // COMPAT: Remove in Nomad 0.9 + copyNode.DrainStrategy = drain + if drain == nil { // When stopping a drain unset the strategy but leave the node // ineligible for scheduling copyNode.DrainStrategy = nil + } else { + copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible } + copyNode.ModifyIndex = index + copyNode.StatusUpdatedAt = updateTime // Insert the node if err := txn.Insert("nodes", copyNode); err != nil { diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 73c86bbd9f59..af2e8cceb9dd 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -705,64 +705,31 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { require.Nil(state.UpsertNode(1000, node)) - // Create a watchset so we can test that update node drain fires the watch - ws := memdb.NewWatchSet() - - // Assert initial node state - { - out, err := state.NodeByID(ws, node.ID) - require.Nil(err) - - require.False(out.Drain) - require.Nil(out.DrainStrategy) - require.Equal(structs.NodeSchedulingEligible, out.SchedulingEligibility) - if out.ModifyIndex != 1000 { - t.Fatalf("expected ModifyIndex=1000, found %d", out.ModifyIndex) - } - } - - // Start draining - { - require.Nil(state.UpdateNodeDrain(1001, node.ID, true)) - require.True(watchFired(ws)) - - ws = memdb.NewWatchSet() - out, err := state.NodeByID(ws, node.ID) - require.Nil(err) - - require.True(out.Drain) - require.NotNil(out.DrainStrategy) - require.Equal(structs.NodeSchedulingIneligible, out.SchedulingEligibility) - if out.ModifyIndex != 1001 { - t.Fatalf("expected ModifyIndex=1001, found %d", out.ModifyIndex) - } - - index, err := state.Index("nodes") - require.Nil(err) - if index != 1001 { - t.Fatalf("expected index=1001, found %d", index) - } - - require.False(watchFired(ws)) + expectedTime := int64(101) + expectedDrain := &structs.DrainStrategy{ + Deadline: 10 * time.Second, } - // Stop draining (no need to retest watch behavior) - { - require.Nil(state.UpdateNodeDrain(1002, node.ID, false)) + // Create a watchset so we can test that update node drain fires the watch + ws := memdb.NewWatchSet() + _, err := state.NodeByID(ws, node.ID) + require.Nil(err) - out, err := state.NodeByID(nil, node.ID) - require.Nil(err) + require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain, expectedTime)) + require.True(watchFired(ws)) - require.False(out.Drain) - require.Nil(out.DrainStrategy) - if out.ModifyIndex != 1002 { - t.Fatalf("expected ModifyIndex=1002, found %d", out.ModifyIndex) - } + ws = memdb.NewWatchSet() + out, err := state.NodeByID(ws, node.ID) + require.Nil(err) + require.True(out.Drain) + require.NotNil(out.DrainStrategy) + require.Equal(out.DrainStrategy, expectedDrain) + require.EqualValues(1001, out.ModifyIndex) - // Scheduling eligibility should *not* flip back to eligible after - // draining stops. - require.Equal(structs.NodeSchedulingIneligible, out.SchedulingEligibility) - } + index, err := state.Index("nodes") + require.Nil(err) + require.EqualValues(1001, index) + require.False(watchFired(ws)) } func TestStateStore_AddSingleNodeEvent(t *testing.T) { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 6f6a98a6fb70..12c279e561f6 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -302,10 +302,12 @@ type NodeUpdateStatusRequest struct { WriteRequest } -// NodeUpdateDrainRequest is used for updating the drain status +// NodeUpdateDrainRequest is used for updating the drain strategy type NodeUpdateDrainRequest struct { - NodeID string - Drain bool + NodeID string + Drain bool // TODO Deprecate + DrainStrategy *DrainStrategy + UpdateTime int64 WriteRequest } @@ -871,10 +873,13 @@ type NodeUpdateResponse struct { // NodeDrainUpdateResponse is used to respond to a node drain update type NodeDrainUpdateResponse struct { - EvalIDs []string - EvalCreateIndex uint64 NodeModifyIndex uint64 QueryMeta + + // Deprecated in Nomad 0.8 as an evaluation is not immediately created but + // is instead handled by the drainer. + EvalIDs []string + EvalCreateIndex uint64 } // NodeAllocsResponse is used to return allocs for a single node @@ -1179,6 +1184,9 @@ func ValidNodeStatus(status string) bool { } const ( + // NodeSchedulingEligible and Ineligible marks the node as eligible or not, + // respectively, for receiving allocations. This is orthoginal to the node + // status being ready. NodeSchedulingEligible = "eligbile" NodeSchedulingIneligible = "ineligible" ) @@ -1192,6 +1200,10 @@ type DrainStrategy struct { // Deadline is the duration after StartTime when the remaining // allocations on a draining Node should be told to stop. Deadline time.Duration + + // IgnoreSystemJobs allows systems jobs to remain on the node even though it + // has been marked for draining. + IgnoreSystemJobs bool } func (d *DrainStrategy) Copy() *DrainStrategy { @@ -1275,6 +1287,7 @@ type Node struct { // attributes and capabilities. ComputedClass string + // COMPAT: Remove in Nomad 0.9 // Drain is controlled by the servers, and not the client. // If true, no jobs will be scheduled to this node, and existing // allocations will be drained. Superceded by DrainStrategy in Nomad @@ -1324,12 +1337,12 @@ func (n *Node) Copy() *Node { nn := new(Node) *nn = *n nn.Attributes = helper.CopyMapStringString(nn.Attributes) - nn.DrainStrategy = nn.DrainStrategy.Copy() nn.Resources = nn.Resources.Copy() nn.Reserved = nn.Reserved.Copy() nn.Links = helper.CopyMapStringString(nn.Links) nn.Meta = helper.CopyMapStringString(nn.Meta) nn.Events = copyNodeEvents(n.Events) + nn.DrainStrategy = nn.DrainStrategy.Copy() return nn } @@ -3189,10 +3202,10 @@ func (tg *TaskGroup) Validate(j *Job) error { // Validate the migration strategy switch j.Type { case JobTypeService: - if tg.Count == 1 && tg.Migrate != nil { - mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should not have a migration strategy with a count = 1", tg.Name)) - } else if err := tg.Migrate.Validate(); err != nil { - mErr.Errors = append(mErr.Errors, err) + if tg.Migrate != nil { + if err := tg.Migrate.Validate(); err != nil { + mErr.Errors = append(mErr.Errors, err) + } } default: if tg.Migrate != nil { From 2bdeacebffc4e9c3aab8706799512ee67b2aff2d Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 23 Feb 2018 15:56:36 -0800 Subject: [PATCH 10/79] Drain cli, api, http --- api/nodes.go | 80 ++++++++++++++++------- api/nodes_test.go | 14 +++-- command/agent/node_endpoint.go | 22 ++++--- command/agent/node_endpoint_test.go | 68 ++++++++++++-------- command/node.go | 19 ++++++ command/node_drain.go | 98 ++++++++++++++++++++++++++--- command/node_drain_test.go | 43 +++++++++++++ command/node_status.go | 2 +- commands.go | 16 ++++- main.go | 1 + nomad/structs/structs.go | 18 ++++-- 11 files changed, 298 insertions(+), 83 deletions(-) create mode 100644 command/node.go diff --git a/api/nodes.go b/api/nodes.go index 549eeea66639..4868fef7cfd4 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -3,7 +3,6 @@ package api import ( "fmt" "sort" - "strconv" "time" ) @@ -42,10 +41,24 @@ func (n *Nodes) Info(nodeID string, q *QueryOptions) (*Node, *QueryMeta, error) return &resp, qm, nil } -// ToggleDrain is used to toggle drain mode on/off for a given node. -func (n *Nodes) ToggleDrain(nodeID string, drain bool, q *WriteOptions) (*WriteMeta, error) { - drainArg := strconv.FormatBool(drain) - wm, err := n.client.write("/v1/node/"+nodeID+"/drain?enable="+drainArg, nil, nil, q) +// NodeUpdateDrainRequest is used to update the drain specification for a node. +type NodeUpdateDrainRequest struct { + // NodeID is the node to update the drain specification for. + NodeID string + + // DrainSpec is the drain specification to set for the node. A nil DrainSpec + // will disable draining. + DrainSpec *DrainSpec +} + +// UpdateDrain is used to update the drain strategy for a given node. +func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, q *WriteOptions) (*WriteMeta, error) { + req := &NodeUpdateDrainRequest{ + NodeID: nodeID, + DrainSpec: spec, + } + + wm, err := n.client.write("/v1/node/"+nodeID+"/drain", req, nil, q) if err != nil { return nil, err } @@ -108,25 +121,44 @@ type DriverInfo struct { // Node is used to deserialize a node entry. type Node struct { - ID string - Datacenter string - Name string - HTTPAddr string - TLSEnabled bool - Attributes map[string]string - Resources *Resources - Reserved *Resources - Links map[string]string - Meta map[string]string - NodeClass string - Drain bool - Status string - StatusDescription string - StatusUpdatedAt int64 - Events []*NodeEvent - Drivers map[string]*DriverInfo - CreateIndex uint64 - ModifyIndex uint64 + ID string + Datacenter string + Name string + HTTPAddr string + TLSEnabled bool + Attributes map[string]string + Resources *Resources + Reserved *Resources + Links map[string]string + Meta map[string]string + NodeClass string + Drain bool + DrainStrategy *DrainStrategy + SchedulingEligibility string + Status string + StatusDescription string + StatusUpdatedAt int64 + Events []*NodeEvent + Drivers map[string]*DriverInfo + CreateIndex uint64 + ModifyIndex uint64 +} + +// DrainStrategy describes a Node's drain behavior. +type DrainStrategy struct { + // DrainSpec is the user declared drain specification + DrainSpec +} + +// DrainSpec describes a Node's drain behavior. +type DrainSpec struct { + // Deadline is the duration after StartTime when the remaining + // allocations on a draining Node should be told to stop. + Deadline time.Duration + + // IgnoreSystemJobs allows systems jobs to remain on the node even though it + // has been marked for draining. + IgnoreSystemJobs bool } const ( diff --git a/api/nodes_test.go b/api/nodes_test.go index 06b960746942..e2c0a3c78136 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -174,7 +174,10 @@ func TestNodes_ToggleDrain(t *testing.T) { } // Toggle it on - wm, err := nodes.ToggleDrain(nodeID, true, nil) + spec := &DrainSpec{ + Deadline: 10 * time.Second, + } + wm, err := nodes.UpdateDrain(nodeID, spec, nil) if err != nil { t.Fatalf("err: %s", err) } @@ -185,12 +188,12 @@ func TestNodes_ToggleDrain(t *testing.T) { if err != nil { t.Fatalf("err: %s", err) } - if !out.Drain { - t.Fatalf("drain mode should be on") + if out.SchedulingEligibility != structs.NodeSchedulingIneligible { + t.Fatalf("bad eligibility: %v vs %v", out.SchedulingEligibility, structs.NodeSchedulingIneligible) } // Toggle off again - wm, err = nodes.ToggleDrain(nodeID, false, nil) + wm, err = nodes.UpdateDrain(nodeID, nil, nil) if err != nil { t.Fatalf("err: %s", err) } @@ -204,6 +207,9 @@ func TestNodes_ToggleDrain(t *testing.T) { if out.Drain { t.Fatalf("drain mode should be off") } + if out.DrainStrategy != nil { + t.Fatalf("drain strategy should be unset") + } } func TestNodes_Allocations(t *testing.T) { diff --git a/command/agent/node_endpoint.go b/command/agent/node_endpoint.go index fd396a67c40f..b22850873a56 100644 --- a/command/agent/node_endpoint.go +++ b/command/agent/node_endpoint.go @@ -2,9 +2,9 @@ package agent import ( "net/http" - "strconv" "strings" + "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/nomad/structs" ) @@ -101,19 +101,21 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request return nil, CodedError(405, ErrInvalidMethod) } - // Get the enable value - enableRaw := req.URL.Query().Get("enable") - if enableRaw == "" { - return nil, CodedError(400, "missing enable value") - } - enable, err := strconv.ParseBool(enableRaw) - if err != nil { - return nil, CodedError(400, "invalid enable value") + var drainRequest api.NodeUpdateDrainRequest + if err := decodeBody(req, &drainRequest); err != nil { + return nil, CodedError(400, err.Error()) } args := structs.NodeUpdateDrainRequest{ NodeID: nodeID, - Drain: enable, + } + if drainRequest.DrainSpec != nil { + args.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: drainRequest.DrainSpec.Deadline, + IgnoreSystemJobs: drainRequest.DrainSpec.IgnoreSystemJobs, + }, + } } s.parseWriteRequest(req, &args.WriteRequest) diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index a5566adc19fb..ac1bd00b7286 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -4,10 +4,13 @@ import ( "net/http" "net/http/httptest" "testing" + "time" + "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestHTTP_NodesList(t *testing.T) { @@ -238,6 +241,7 @@ func TestHTTP_NodeAllocations(t *testing.T) { func TestHTTP_NodeDrain(t *testing.T) { t.Parallel() + require := require.New(t) httpTest(t, nil, func(s *TestAgent) { // Create the node node := mock.Node() @@ -246,45 +250,55 @@ func TestHTTP_NodeDrain(t *testing.T) { WriteRequest: structs.WriteRequest{Region: "global"}, } var resp structs.NodeUpdateResponse - if err := s.Agent.RPC("Node.Register", &args, &resp); err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(s.Agent.RPC("Node.Register", &args, &resp)) - // Directly manipulate the state - state := s.Agent.server.State() - alloc1 := mock.Alloc() - alloc1.NodeID = node.ID - if err := state.UpsertJobSummary(999, mock.JobSummary(alloc1.JobID)); err != nil { - t.Fatal(err) - } - err := state.UpsertAllocs(1000, []*structs.Allocation{alloc1}) - if err != nil { - t.Fatalf("err: %v", err) + drainReq := api.NodeUpdateDrainRequest{ + NodeID: node.ID, + DrainSpec: &api.DrainSpec{ + Deadline: 10 * time.Second, + }, } // Make the HTTP request - req, err := http.NewRequest("POST", "/v1/node/"+node.ID+"/drain?enable=1", nil) - if err != nil { - t.Fatalf("err: %v", err) - } + buf := encodeReq(drainReq) + req, err := http.NewRequest("POST", "/v1/node/"+node.ID+"/drain", buf) + require.Nil(err) respW := httptest.NewRecorder() // Make the request obj, err := s.Server.NodeSpecificRequest(respW, req) - if err != nil { - t.Fatalf("err: %v", err) - } + require.Nil(err) // Check for the index - if respW.HeaderMap.Get("X-Nomad-Index") == "" { - t.Fatalf("missing index") - } + require.NotZero(respW.HeaderMap.Get("X-Nomad-Index")) // Check the response - upd := obj.(structs.NodeDrainUpdateResponse) - if len(upd.EvalIDs) == 0 { - t.Fatalf("bad: %v", upd) - } + _, ok := obj.(structs.NodeDrainUpdateResponse) + require.True(ok) + + // Check that the node has been updated + state := s.Agent.server.State() + out, err := state.NodeByID(nil, node.ID) + require.Nil(err) + require.True(out.Drain) + require.NotNil(out.DrainStrategy) + require.Equal(10*time.Second, out.DrainStrategy.Deadline) + + // Make the HTTP request to unset drain + drainReq.DrainSpec = nil + buf = encodeReq(drainReq) + req, err = http.NewRequest("POST", "/v1/node/"+node.ID+"/drain", buf) + require.Nil(err) + respW = httptest.NewRecorder() + + // Make the request + obj, err = s.Server.NodeSpecificRequest(respW, req) + require.Nil(err) + + out, err = state.NodeByID(nil, node.ID) + require.Nil(err) + require.False(out.Drain) + require.Nil(out.DrainStrategy) }) } diff --git a/command/node.go b/command/node.go new file mode 100644 index 000000000000..36436d9b7868 --- /dev/null +++ b/command/node.go @@ -0,0 +1,19 @@ +package command + +import "github.com/mitchellh/cli" + +type NodeCommand struct { + Meta +} + +func (f *NodeCommand) Help() string { + return "This command is accessed by using one of the subcommands below." +} + +func (f *NodeCommand) Synopsis() string { + return "Interact with nodes" +} + +func (f *NodeCommand) Run(args []string) int { + return cli.RunResultHelp +} diff --git a/command/node_drain.go b/command/node_drain.go index b40757b7c90b..c27068b97e59 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -3,18 +3,26 @@ package command import ( "fmt" "strings" + "time" + "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/api/contexts" "github.com/posener/complete" ) +var ( + // defaultDrainDuration is the default drain duration if it is not specified + // explicitly + defaultDrainDuration = 1 * time.Hour +) + type NodeDrainCommand struct { Meta } func (c *NodeDrainCommand) Help() string { helpText := ` -Usage: nomad node-drain [options] +Usage: nomad node drain [options] Toggles node draining on a specified node. It is required that either -enable or -disable is specified, but not both. @@ -32,8 +40,24 @@ Node Drain Options: -enable Enable draining for the specified node. + -deadline + Set the deadline by which all allocations must be moved off the node. + Remaining allocations after the deadline are forced removed from the node. + If unspecified, a default deadline of one hour is applied. + + -force + Force remove allocations off the node immediately. + + -no-deadline + No deadline allows the allocations to drain off the node without being force + stopped after a certain deadline. + + -ignore-system + Ignore system allows the drain to complete without stopping system job + allocations. + -self - Query the status of the local node. + Set the drain status of the local node. -yes Automatic yes to prompts. @@ -48,10 +72,14 @@ func (c *NodeDrainCommand) Synopsis() string { func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), complete.Flags{ - "-disable": complete.PredictNothing, - "-enable": complete.PredictNothing, - "-self": complete.PredictNothing, - "-yes": complete.PredictNothing, + "-disable": complete.PredictNothing, + "-enable": complete.PredictNothing, + "-deadline": complete.PredictAnything, + "-force": complete.PredictNothing, + "-no-deadline": complete.PredictNothing, + "-ignore-system": complete.PredictNothing, + "-self": complete.PredictNothing, + "-yes": complete.PredictNothing, }) } @@ -71,12 +99,18 @@ func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor { } func (c *NodeDrainCommand) Run(args []string) int { - var enable, disable, self, autoYes bool + var enable, disable, force, + noDeadline, ignoreSystem, self, autoYes bool + var deadline string flags := c.Meta.FlagSet("node-drain", FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.BoolVar(&enable, "enable", false, "Enable drain mode") flags.BoolVar(&disable, "disable", false, "Disable drain mode") + flags.StringVar(&deadline, "deadline", "", "Deadline after which allocations are force stopped") + flags.BoolVar(&force, "force", false, "Force immediate drain") + flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline") + flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node") flags.BoolVar(&self, "self", false, "") flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.") @@ -93,10 +127,46 @@ func (c *NodeDrainCommand) Run(args []string) int { // Check that we got a node ID args = flags.Args() if l := len(args); self && l != 0 || !self && l != 1 { - c.Ui.Error(c.Help()) + c.Ui.Error("Node ID must be specified if -self isn't being used") + return 1 + } + + // Validate a compatible set of flags were set + if disable && (deadline != "" || force || noDeadline || ignoreSystem) { + c.Ui.Error("-disable can't be combined with flags configuring drain strategy") + return 1 + } + if deadline != "" && (force || noDeadline) { + c.Ui.Error("-deadline can't be combined with -force or -no-deadline") + return 1 + } + if force && noDeadline { + c.Ui.Error("-force and -no-deadline are mutually exclusive") return 1 } + // Parse the duration + var d time.Duration + if force { + d = -1 * time.Second + } else if noDeadline { + d = 0 + } else if deadline != "" { + dur, err := time.ParseDuration(deadline) + if err != nil { + c.Ui.Error(fmt.Sprintf("Failed to parse deadline %q: %v", deadline, err)) + return 1 + } + if dur <= 0 { + c.Ui.Error("A positive drain duration must be given") + return 1 + } + + d = dur + } else { + d = defaultDrainDuration + } + // Get the HTTP client client, err := c.Meta.Client() if err != nil { @@ -186,9 +256,17 @@ func (c *NodeDrainCommand) Run(args []string) int { } } + var spec *api.DrainSpec + if enable { + spec = &api.DrainSpec{ + Deadline: d, + IgnoreSystemJobs: ignoreSystem, + } + } + // Toggle node draining - if _, err := client.Nodes().ToggleDrain(node.ID, enable, nil); err != nil { - c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err)) + if _, err := client.Nodes().UpdateDrain(node.ID, spec, nil); err != nil { + c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err)) return 1 } return 0 diff --git a/command/node_drain_test.go b/command/node_drain_test.go index 241845ab4878..20f63d95f571 100644 --- a/command/node_drain_test.go +++ b/command/node_drain_test.go @@ -85,6 +85,49 @@ func TestNodeDrainCommand_Fails(t *testing.T) { if out := ui.ErrorWriter.String(); !strings.Contains(out, "No node(s) with prefix or id") { t.Fatalf("expected not exist error, got: %s", out) } + ui.ErrorWriter.Reset() + + // Fail on disable being used with drain strategy flags + for _, flag := range []string{"-force", "-no-deadline", "-ignore-system"} { + if code := cmd.Run([]string{"-address=" + url, "-disable", flag, "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "combined with flags configuring drain strategy") { + t.Fatalf("got: %s", out) + } + ui.ErrorWriter.Reset() + } + + // Fail on setting a deadline plus deadline modifying flags + for _, flag := range []string{"-force", "-no-deadline"} { + if code := cmd.Run([]string{"-address=" + url, "-enable", "-deadline=10s", flag, "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "deadline can't be combined with") { + t.Fatalf("got: %s", out) + } + ui.ErrorWriter.Reset() + } + + // Fail on setting a force and no deadline + if code := cmd.Run([]string{"-address=" + url, "-enable", "-force", "-no-deadline", "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "mutually exclusive") { + t.Fatalf("got: %s", out) + } + ui.ErrorWriter.Reset() + + // Fail on setting a bad deadline + for _, flag := range []string{"-deadline=0s", "-deadline=-1s"} { + if code := cmd.Run([]string{"-address=" + url, "-enable", flag, "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "positive") { + t.Fatalf("got: %s", out) + } + ui.ErrorWriter.Reset() + } } func TestNodeDrainCommand_AutocompleteArgs(t *testing.T) { diff --git a/command/node_status.go b/command/node_status.go index cbce475346a9..b347b6b78853 100644 --- a/command/node_status.go +++ b/command/node_status.go @@ -37,7 +37,7 @@ type NodeStatusCommand struct { func (c *NodeStatusCommand) Help() string { helpText := ` -Usage: nomad node-status [options] +Usage: nomad node status [options] Display status information about a given node. The list of nodes returned includes only nodes which jobs may be scheduled to, and diff --git a/commands.go b/commands.go index 75155948bd21..9e27664af896 100644 --- a/commands.go +++ b/commands.go @@ -258,17 +258,31 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory { Meta: meta, }, nil }, + "node": func() (cli.Command, error) { + return &command.NodeCommand{ + Meta: meta, + }, nil + }, "node-drain": func() (cli.Command, error) { return &command.NodeDrainCommand{ Meta: meta, }, nil }, + "node drain": func() (cli.Command, error) { + return &command.NodeDrainCommand{ + Meta: meta, + }, nil + }, "node-status": func() (cli.Command, error) { return &command.NodeStatusCommand{ Meta: meta, }, nil }, - + "node status": func() (cli.Command, error) { + return &command.NodeStatusCommand{ + Meta: meta, + }, nil + }, "operator": func() (cli.Command, error) { return &command.OperatorCommand{ Meta: meta, diff --git a/main.go b/main.go index 4fe38fd6a998..3c178e145709 100644 --- a/main.go +++ b/main.go @@ -37,6 +37,7 @@ func RunCustom(args []string, commands map[string]cli.CommandFactory) int { case "quota list", "quota delete", "quota apply", "quota status", "quota inspect", "quota init": case "operator raft", "operator raft list-peers", "operator raft remove-peer": case "acl policy", "acl policy apply", "acl token", "acl token create": + case "node-drain", "node-status": default: commandsInclude = append(commandsInclude, k) } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 12c279e561f6..eec0964bef6f 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1191,12 +1191,8 @@ const ( NodeSchedulingIneligible = "ineligible" ) -// DrainStrategy describes a Node's drain behavior. -type DrainStrategy struct { - // StartTime as nanoseconds since Unix epoch indicating when a drain - // began for deadline calcuations. - StartTime int64 - +// DrainSpec describes a Node's desired drain behavior. +type DrainSpec struct { // Deadline is the duration after StartTime when the remaining // allocations on a draining Node should be told to stop. Deadline time.Duration @@ -1206,6 +1202,16 @@ type DrainStrategy struct { IgnoreSystemJobs bool } +// DrainStrategy describes a Node's drain behavior. +type DrainStrategy struct { + // DrainSpec is the user declared drain specification + DrainSpec + + // StartTime as nanoseconds since Unix epoch indicating when a drain + // began for deadline calcuations. + StartTime int64 +} + func (d *DrainStrategy) Copy() *DrainStrategy { if d == nil { return nil From 762db7c5d71f199167fa2f7be04963f525ba8a94 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 26 Feb 2018 14:34:32 -0800 Subject: [PATCH 11/79] Fix tests --- nomad/drain_test.go | 4 +++- nomad/fsm_test.go | 4 +++- nomad/node_endpoint_test.go | 16 ++++++++++++---- nomad/state/state_store_test.go | 4 +++- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/nomad/drain_test.go b/nomad/drain_test.go index c47e0d401548..13465ede0763 100644 --- a/nomad/drain_test.go +++ b/nomad/drain_test.go @@ -144,7 +144,9 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { // Start draining node 1 //FIXME update drain rpc to skip fsm manipulation and use api strategy := &structs.DrainStrategy{ - Deadline: -1 * time.Second, + DrainSpec: structs.DrainSpec{ + Deadline: -1 * time.Second, + }, } node, err := state.NodeByID(nil, c1.NodeID()) require.Nil(err) diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index b9fc4845da82..87c3a8b44d3c 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -294,7 +294,9 @@ func TestFSM_UpdateNodeDrain(t *testing.T) { require.Nil(resp) strategy := &structs.DrainStrategy{ - Deadline: 10 * time.Second, + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Second, + }, } req2 := structs.NodeUpdateDrainRequest{ NodeID: node.ID, diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 0de46ed22c61..5ce3cc2728a2 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -765,7 +765,9 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp)) strategy := &structs.DrainStrategy{ - Deadline: 10 * time.Second, + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Second, + }, } // Update the status @@ -809,7 +811,9 @@ func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { dereg := &structs.NodeUpdateDrainRequest{ NodeID: node.ID, DrainStrategy: &structs.DrainStrategy{ - Deadline: 10 * time.Second, + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Second, + }, }, WriteRequest: structs.WriteRequest{Region: "global"}, } @@ -910,7 +914,9 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { dereg := &structs.NodeUpdateDrainRequest{ NodeID: node.ID, DrainStrategy: &structs.DrainStrategy{ - Deadline: -1 * time.Second, + DrainSpec: structs.DrainSpec{ + Deadline: -1 * time.Second, + }, }, WriteRequest: structs.WriteRequest{Region: "global"}, } @@ -2369,7 +2375,9 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { // Node drain updates trigger watches. time.AfterFunc(100*time.Millisecond, func() { s := &structs.DrainStrategy{ - Deadline: 10 * time.Second, + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Second, + }, } if err := state.UpdateNodeDrain(3, node.ID, s, 101); err != nil { t.Fatalf("err: %v", err) diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index af2e8cceb9dd..3d97cf7f541b 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -707,7 +707,9 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { expectedTime := int64(101) expectedDrain := &structs.DrainStrategy{ - Deadline: 10 * time.Second, + DrainSpec: structs.DrainSpec{ + Deadline: -1 * time.Second, + }, } // Create a watchset so we can test that update node drain fires the watch From fba20fd58d1784605e46432e9ddfc918a71e853f Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 26 Feb 2018 15:06:01 -0800 Subject: [PATCH 12/79] Remove update time --- nomad/drain_test.go | 2 +- nomad/fsm.go | 2 +- nomad/fsm_test.go | 1 - nomad/node_endpoint.go | 3 --- nomad/node_endpoint_test.go | 2 +- nomad/state/state_store.go | 4 +--- nomad/state/state_store_test.go | 13 ++++++------- nomad/structs/structs.go | 1 - 8 files changed, 10 insertions(+), 18 deletions(-) diff --git a/nomad/drain_test.go b/nomad/drain_test.go index 13465ede0763..9bae27fe38d2 100644 --- a/nomad/drain_test.go +++ b/nomad/drain_test.go @@ -150,7 +150,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { } node, err := state.NodeByID(nil, c1.NodeID()) require.Nil(err) - require.Nil(state.UpdateNodeDrain(node.ModifyIndex+1, node.ID, strategy, 101)) + require.Nil(state.UpdateNodeDrain(node.ModifyIndex+1, node.ID, strategy)) // Start node 2 c2 := client.TestClient(t, func(conf *config.Config) { diff --git a/nomad/fsm.go b/nomad/fsm.go index 58d1527514a1..a946d523fd0d 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -328,7 +328,7 @@ func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy, req.UpdateTime); err != nil { + if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) return err } diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 87c3a8b44d3c..922ba2ca61bb 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -301,7 +301,6 @@ func TestFSM_UpdateNodeDrain(t *testing.T) { req2 := structs.NodeUpdateDrainRequest{ NodeID: node.ID, DrainStrategy: strategy, - UpdateTime: 101, } buf, err = structs.Encode(structs.NodeUpdateDrainRequestType, req2) require.Nil(err) diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 2631939ad13d..2aa02bfce191 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -433,9 +433,6 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, return fmt.Errorf("node not found") } - // Update the timestamp to - args.UpdateTime = time.Now().Unix() - // Commit this update via Raft _, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) if err != nil { diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 5ce3cc2728a2..7649cc2dbe06 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -2379,7 +2379,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { Deadline: 10 * time.Second, }, } - if err := state.UpdateNodeDrain(3, node.ID, s, 101); err != nil { + if err := state.UpdateNodeDrain(3, node.ID, s); err != nil { t.Fatalf("err: %v", err) } }) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 45c595d04b36..cfef5e4831d8 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -616,8 +616,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error } // UpdateNodeDrain is used to update the drain of a node -func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, - drain *structs.DrainStrategy, updateTime int64) error { +func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs.DrainStrategy) error { txn := s.db.Txn(true) defer txn.Abort() @@ -647,7 +646,6 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, } copyNode.ModifyIndex = index - copyNode.StatusUpdatedAt = updateTime // Insert the node if err := txn.Insert("nodes", copyNode); err != nil { diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 3d97cf7f541b..81e17eaabe4f 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -705,19 +705,18 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { require.Nil(state.UpsertNode(1000, node)) - expectedTime := int64(101) + // Create a watchset so we can test that update node drain fires the watch + ws := memdb.NewWatchSet() + _, err := state.NodeByID(ws, node.ID) + require.Nil(err) + expectedDrain := &structs.DrainStrategy{ DrainSpec: structs.DrainSpec{ Deadline: -1 * time.Second, }, } - // Create a watchset so we can test that update node drain fires the watch - ws := memdb.NewWatchSet() - _, err := state.NodeByID(ws, node.ID) - require.Nil(err) - - require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain, expectedTime)) + require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain)) require.True(watchFired(ws)) ws = memdb.NewWatchSet() diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index eec0964bef6f..d28c89fa8985 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -307,7 +307,6 @@ type NodeUpdateDrainRequest struct { NodeID string Drain bool // TODO Deprecate DrainStrategy *DrainStrategy - UpdateTime int64 WriteRequest } From a7833bc609ff110ca215c30e9f927d224c88d757 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 09:40:17 -0800 Subject: [PATCH 13/79] Upgrade path --- command/agent/node_endpoint.go | 26 ++++++++++++++++++++++++-- nomad/node_endpoint.go | 10 ++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/command/agent/node_endpoint.go b/command/agent/node_endpoint.go index b22850873a56..1a937447eb04 100644 --- a/command/agent/node_endpoint.go +++ b/command/agent/node_endpoint.go @@ -2,7 +2,9 @@ package agent import ( "net/http" + "strconv" "strings" + "time" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/nomad/structs" @@ -102,8 +104,28 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request } var drainRequest api.NodeUpdateDrainRequest - if err := decodeBody(req, &drainRequest); err != nil { - return nil, CodedError(400, err.Error()) + + // COMPAT: Remove in 0.9. Allow the old style enable query param. + // Get the enable parameter + enableRaw := req.URL.Query().Get("enable") + var enable bool + if enableRaw != "" { + var err error + enable, err = strconv.ParseBool(enableRaw) + if err != nil { + return nil, CodedError(400, "invalid enable value") + } + + // Use the force drain to have it keep the same behavior as old clients. + if enable { + drainRequest.DrainSpec = &api.DrainSpec{ + Deadline: -1 * time.Second, + } + } + } else { + if err := decodeBody(req, &drainRequest); err != nil { + return nil, CodedError(400, err.Error()) + } } args := structs.NodeUpdateDrainRequest{ diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 2aa02bfce191..082491e89370 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -433,6 +433,16 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, return fmt.Errorf("node not found") } + // COMPAT: Remove in 0.9. Attempt to upgrade the request if it is of the old + // format. + if args.Drain && args.DrainStrategy == nil { + args.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: -1 * time.Second, // Force drain + }, + } + } + // Commit this update via Raft _, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) if err != nil { From 5c101de72581ce6382b87deebc470fddff3fa5af Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 09:41:55 -0800 Subject: [PATCH 14/79] flag comment --- command/node_drain.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/command/node_drain.go b/command/node_drain.go index c27068b97e59..0b92a0a8e990 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -54,7 +54,7 @@ Node Drain Options: -ignore-system Ignore system allows the drain to complete without stopping system job - allocations. + allocations. By default system jobs are stopped last. -self Set the drain status of the local node. From dcafa8b46027178a4bb602462f4b209f5a9fe253 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 26 Feb 2018 16:34:42 -0800 Subject: [PATCH 15/79] RPC/FSM/State Store for Eligibility --- nomad/fsm.go | 16 ++++++ nomad/fsm_test.go | 61 ++++++++++++++++++++++ nomad/node_endpoint.go | 56 ++++++++++++++++++++ nomad/node_endpoint_test.go | 91 +++++++++++++++++++++++++++++++++ nomad/state/state_store.go | 46 ++++++++++++++++- nomad/state/state_store_test.go | 46 +++++++++++++++++ nomad/structs/structs.go | 8 +++ 7 files changed, 322 insertions(+), 2 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index a946d523fd0d..7df2582dbb72 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -242,6 +242,8 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyBatchDeregisterJob(buf[1:], log.Index) case structs.AllocUpdateDesiredTransitionRequestType: return n.applyAllocUpdateDesiredTransition(buf[1:], log.Index) + case structs.NodeUpdateEligibilityRequestType: + return n.applyNodeEligibilityUpdate(buf[1:], log.Index) } // Check enterprise only message types. @@ -335,6 +337,20 @@ func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { return nil } +func (n *nomadFSM) applyNodeEligibilityUpdate(buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "node_eligibility_update"}, time.Now()) + var req structs.NodeUpdateEligibilityRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.UpdateNodeEligibility(index, req.NodeID, req.Eligibility); err != nil { + n.logger.Printf("[ERR] nomad.fsm: UpdateNodeEligibility failed: %v", err) + return err + } + return nil +} + func (n *nomadFSM) applyUpsertJob(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "register_job"}, time.Now()) var req structs.JobRegisterRequest diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 922ba2ca61bb..b834b432f5ff 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -316,6 +316,67 @@ func TestFSM_UpdateNodeDrain(t *testing.T) { require.Equal(node.DrainStrategy, strategy) } +func TestFSM_UpdateNodeEligibility(t *testing.T) { + t.Parallel() + require := require.New(t) + fsm := testFSM(t) + + node := mock.Node() + req := structs.NodeRegisterRequest{ + Node: node, + } + buf, err := structs.Encode(structs.NodeRegisterRequestType, req) + require.Nil(err) + + resp := fsm.Apply(makeLog(buf)) + require.Nil(resp) + + // Set the eligibility + req2 := structs.NodeUpdateEligibilityRequest{ + NodeID: node.ID, + Eligibility: structs.NodeSchedulingIneligible, + } + buf, err = structs.Encode(structs.NodeUpdateEligibilityRequestType, req2) + require.Nil(err) + + resp = fsm.Apply(makeLog(buf)) + require.Nil(resp) + + // Lookup the node and check + node, err = fsm.State().NodeByID(nil, req.Node.ID) + require.Nil(err) + require.Equal(node.SchedulingEligibility, structs.NodeSchedulingIneligible) + + // Update the drain + strategy := &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Second, + }, + } + req3 := structs.NodeUpdateDrainRequest{ + NodeID: node.ID, + DrainStrategy: strategy, + } + buf, err = structs.Encode(structs.NodeUpdateDrainRequestType, req3) + require.Nil(err) + resp = fsm.Apply(makeLog(buf)) + require.Nil(resp) + + // Try forcing eligibility + req4 := structs.NodeUpdateEligibilityRequest{ + NodeID: node.ID, + Eligibility: structs.NodeSchedulingEligible, + } + buf, err = structs.Encode(structs.NodeUpdateEligibilityRequestType, req4) + require.Nil(err) + + resp = fsm.Apply(makeLog(buf)) + require.NotNil(resp) + err, ok := resp.(error) + require.True(ok) + require.Contains(err.Error(), "draining") +} + func TestFSM_RegisterJob(t *testing.T) { t.Parallel() fsm := testFSM(t) diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 082491e89370..f46de16618df 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -456,6 +456,62 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, return nil } +// UpdateEligibility is used to update the scheduling eligibility of a node +func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest, + reply *structs.GenericResponse) error { + if done, err := n.srv.forward("Node.UpdateEligibility", args, args, reply); done { + return err + } + defer metrics.MeasureSince([]string{"nomad", "client", "update_eligibility"}, time.Now()) + + // Check node write permissions + if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { + return err + } else if aclObj != nil && !aclObj.AllowNodeWrite() { + return structs.ErrPermissionDenied + } + + // Verify the arguments + if args.NodeID == "" { + return fmt.Errorf("missing node ID for setting scheduling eligibility") + } + + // Look for the node + snap, err := n.srv.fsm.State().Snapshot() + if err != nil { + return err + } + ws := memdb.NewWatchSet() + node, err := snap.NodeByID(ws, args.NodeID) + if err != nil { + return err + } + if node == nil { + return fmt.Errorf("node not found") + } + + if node.DrainStrategy != nil && args.Eligibility == structs.NodeSchedulingEligible { + return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") + } + + // Commit this update via Raft + outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args) + if err != nil { + n.srv.logger.Printf("[ERR] nomad.client: eligibility update failed: %v", err) + return err + } + if outErr != nil { + if err, ok := outErr.(error); ok && err != nil { + n.srv.logger.Printf("[ERR] nomad.client: eligibility update failed: %v", err) + return err + } + } + + // Set the reply index + reply.Index = index + return nil +} + // Evaluate is used to force a re-evaluation of the node func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 7649cc2dbe06..87c418d0d8a9 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -980,6 +980,97 @@ func TestClientEndpoint_Drain_Down(t *testing.T) { }) } +func TestClientEndpoint_UpdateEligibility(t *testing.T) { + t.Parallel() + require := require.New(t) + s1 := TestServer(t, nil) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create the register request + node := mock.Node() + reg := &structs.NodeRegisterRequest{ + Node: node, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + // Fetch the response + var resp structs.NodeUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp)) + + // Update the eligibility + dereg := &structs.NodeUpdateEligibilityRequest{ + NodeID: node.ID, + Eligibility: structs.NodeSchedulingIneligible, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var resp2 structs.GenericResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateEligibility", dereg, &resp2)) + require.NotZero(resp2.Index) + + // Check for the node in the FSM + state := s1.fsm.State() + out, err := state.NodeByID(nil, node.ID) + require.Nil(err) + require.Equal(out.SchedulingEligibility, structs.NodeSchedulingIneligible) +} + +func TestClientEndpoint_UpdateEligibility_ACL(t *testing.T) { + t.Parallel() + s1, root := TestACLServer(t, nil) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + require := require.New(t) + + // Create the node + node := mock.Node() + state := s1.fsm.State() + + require.Nil(state.UpsertNode(1, node), "UpsertNode") + + // Create the policy and tokens + validToken := mock.CreatePolicyAndToken(t, state, 1001, "test-valid", mock.NodePolicy(acl.PolicyWrite)) + invalidToken := mock.CreatePolicyAndToken(t, state, 1003, "test-invalid", mock.NodePolicy(acl.PolicyRead)) + + // Update the status without a token and expect failure + dereg := &structs.NodeUpdateEligibilityRequest{ + NodeID: node.ID, + Eligibility: structs.NodeSchedulingIneligible, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + { + var resp structs.GenericResponse + err := msgpackrpc.CallWithCodec(codec, "Node.UpdateEligibility", dereg, &resp) + require.NotNil(err, "RPC") + require.Equal(err.Error(), structs.ErrPermissionDenied.Error()) + } + + // Try with a valid token + dereg.AuthToken = validToken.SecretID + { + var resp structs.GenericResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateEligibility", dereg, &resp), "RPC") + } + + // Try with a invalid token + dereg.AuthToken = invalidToken.SecretID + { + var resp structs.GenericResponse + err := msgpackrpc.CallWithCodec(codec, "Node.UpdateEligibility", dereg, &resp) + require.NotNil(err, "RPC") + require.Equal(err.Error(), structs.ErrPermissionDenied.Error()) + } + + // Try with a root token + dereg.AuthToken = root.SecretID + { + var resp structs.GenericResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateEligibility", dereg, &resp), "RPC") + } +} + func TestClientEndpoint_GetNode(t *testing.T) { t.Parallel() s1 := TestServer(t, nil) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index cfef5e4831d8..e48a940e86b5 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -509,7 +509,7 @@ func (s *StateStore) DeleteDeployment(index uint64, deploymentIDs []string) erro // UpsertNode is used to register a node or update a node definition // This is assumed to be triggered by the client, so we retain the value -// of drain which is set by the scheduler. +// of drain/eligibility which is set by the scheduler. func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { txn := s.db.Txn(true) defer txn.Abort() @@ -525,10 +525,12 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { exist := existing.(*structs.Node) node.CreateIndex = exist.CreateIndex node.ModifyIndex = index - node.Drain = exist.Drain // Retain the drain mode // Retain node events that have already been set on the node node.Events = exist.Events + + node.Drain = exist.Drain // Retain the drain mode + node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility } else { // Because this is the first time the node is being registered, we should // also create a node registration event @@ -659,6 +661,46 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs return nil } +// UpdateNodeEligibility is used to update the scheduling eligibility of a node +func (s *StateStore) UpdateNodeEligibility(index uint64, nodeID string, eligibility string) error { + + txn := s.db.Txn(true) + defer txn.Abort() + + // Lookup the node + existing, err := txn.First("nodes", "id", nodeID) + if err != nil { + return fmt.Errorf("node lookup failed: %v", err) + } + if existing == nil { + return fmt.Errorf("node not found") + } + + // Copy the existing node + existingNode := existing.(*structs.Node) + copyNode := existingNode.Copy() + + // Check if this is a valid action + if copyNode.DrainStrategy != nil && eligibility == structs.NodeSchedulingEligible { + return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") + } + + // Update the eligibility in the copy + copyNode.SchedulingEligibility = eligibility + copyNode.ModifyIndex = index + + // Insert the node + if err := txn.Insert("nodes", copyNode); err != nil { + return fmt.Errorf("node update failed: %v", err) + } + if err := txn.Insert("index", &IndexEntry{"nodes", index}); err != nil { + return fmt.Errorf("index update failed: %v", err) + } + + txn.Commit() + return nil +} + // UpsertNodeEvents adds the node events to the nodes, rotating events as // necessary. func (s *StateStore) UpsertNodeEvents(index uint64, nodeEvents map[string][]*structs.NodeEvent) error { diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 81e17eaabe4f..1bf1467deda5 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -822,6 +822,52 @@ func TestStateStore_NodeEvents_RetentionWindow(t *testing.T) { require.Equal(uint64(20), out.Events[len(out.Events)-1].CreateIndex) } +func TestStateStore_UpdateNodeEligibility(t *testing.T) { + require := require.New(t) + state := testStateStore(t) + node := mock.Node() + + err := state.UpsertNode(1000, node) + if err != nil { + t.Fatalf("err: %v", err) + } + + expectedEligibility := structs.NodeSchedulingIneligible + + // Create a watchset so we can test that update node drain fires the watch + ws := memdb.NewWatchSet() + if _, err := state.NodeByID(ws, node.ID); err != nil { + t.Fatalf("bad: %v", err) + } + + require.Nil(state.UpdateNodeEligibility(1001, node.ID, expectedEligibility)) + require.True(watchFired(ws)) + + ws = memdb.NewWatchSet() + out, err := state.NodeByID(ws, node.ID) + require.Nil(err) + require.Equal(out.SchedulingEligibility, expectedEligibility) + require.EqualValues(1001, out.ModifyIndex) + + index, err := state.Index("nodes") + require.Nil(err) + require.EqualValues(1001, index) + require.False(watchFired(ws)) + + // Set a drain strategy + expectedDrain := &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: -1 * time.Second, + }, + } + require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain)) + + // Try to set the node to eligible + err = state.UpdateNodeEligibility(1003, node.ID, structs.NodeSchedulingEligible) + require.NotNil(err) + require.Contains(err.Error(), "while it is draining") +} + func TestStateStore_Nodes(t *testing.T) { state := testStateStore(t) var nodes []*structs.Node diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index d28c89fa8985..e1d9b077752d 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -79,6 +79,7 @@ const ( UpsertNodeEventsType JobBatchDeregisterRequestType AllocUpdateDesiredTransitionRequestType + NodeUpdateEligibilityRequestType ) const ( @@ -310,6 +311,13 @@ type NodeUpdateDrainRequest struct { WriteRequest } +// NodeUpdateEligibilityRequest is used for updating the scheduling eligibility +type NodeUpdateEligibilityRequest struct { + NodeID string + Eligibility string + WriteRequest +} + // NodeEvaluateRequest is used to re-evaluate the node type NodeEvaluateRequest struct { NodeID string From 0fb9ba7732742555b91dd68577c50f0109a87f5e Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 12:59:27 -0800 Subject: [PATCH 16/79] HTTP and API --- api/nodes.go | 28 ++++++++++++ api/nodes_test.go | 66 +++++++++++++++++++++++++++++ command/agent/node_endpoint.go | 23 ++++++++++ command/agent/node_endpoint_test.go | 51 ++++++++++++++++++++++ nomad/node_endpoint.go | 6 +++ 5 files changed, 174 insertions(+) diff --git a/api/nodes.go b/api/nodes.go index 4868fef7cfd4..94fc206ce5ee 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -4,6 +4,8 @@ import ( "fmt" "sort" "time" + + "github.com/hashicorp/nomad/nomad/structs" ) // Nodes is used to query node-related API endpoints @@ -65,6 +67,32 @@ func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, q *WriteOptions) (*W return wm, nil } +// NodeUpdateEligibilityRequest is used to update the drain specification for a node. +type NodeUpdateEligibilityRequest struct { + // NodeID is the node to update the drain specification for. + NodeID string + Eligibility string +} + +// ToggleEligibility is used to update the scheduling eligibility of the node +func (n *Nodes) ToggleEligibility(nodeID string, eligible bool, q *WriteOptions) (*WriteMeta, error) { + e := structs.NodeSchedulingEligible + if !eligible { + e = structs.NodeSchedulingIneligible + } + + req := &NodeUpdateEligibilityRequest{ + NodeID: nodeID, + Eligibility: e, + } + + wm, err := n.client.write("/v1/node/"+nodeID+"/eligibility", req, nil, q) + if err != nil { + return nil, err + } + return wm, nil +} + // Allocations is used to return the allocations associated with a node. func (n *Nodes) Allocations(nodeID string, q *QueryOptions) ([]*Allocation, *QueryMeta, error) { var resp []*Allocation diff --git a/api/nodes_test.go b/api/nodes_test.go index e2c0a3c78136..22d61c4011af 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -212,6 +212,72 @@ func TestNodes_ToggleDrain(t *testing.T) { } } +func TestNodes_ToggleEligibility(t *testing.T) { + t.Parallel() + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { + c.DevMode = true + }) + defer s.Stop() + nodes := c.Nodes() + + // Wait for node registration and get the ID + var nodeID string + testutil.WaitForResult(func() (bool, error) { + out, _, err := nodes.List(nil) + if err != nil { + return false, err + } + if n := len(out); n != 1 { + return false, fmt.Errorf("expected 1 node, got: %d", n) + } + nodeID = out[0].ID + return true, nil + }, func(err error) { + t.Fatalf("err: %s", err) + }) + + // Check for eligibility + out, _, err := nodes.Info(nodeID, nil) + if err != nil { + t.Fatalf("err: %s", err) + } + if out.SchedulingEligibility != structs.NodeSchedulingEligible { + t.Fatalf("node should be eligible") + } + + // Toggle it off + wm, err := nodes.ToggleEligibility(nodeID, false, nil) + if err != nil { + t.Fatalf("err: %s", err) + } + assertWriteMeta(t, wm) + + // Check again + out, _, err = nodes.Info(nodeID, nil) + if err != nil { + t.Fatalf("err: %s", err) + } + if out.SchedulingEligibility != structs.NodeSchedulingIneligible { + t.Fatalf("bad eligibility: %v vs %v", out.SchedulingEligibility, structs.NodeSchedulingIneligible) + } + + // Toggle on + wm, err = nodes.ToggleEligibility(nodeID, true, nil) + if err != nil { + t.Fatalf("err: %s", err) + } + assertWriteMeta(t, wm) + + // Check again + out, _, err = nodes.Info(nodeID, nil) + if err != nil { + t.Fatalf("err: %s", err) + } + if out.SchedulingEligibility != structs.NodeSchedulingEligible { + t.Fatalf("bad eligibility: %v vs %v", out.SchedulingEligibility, structs.NodeSchedulingEligible) + } +} + func TestNodes_Allocations(t *testing.T) { t.Parallel() c, s := makeClient(t, nil, nil) diff --git a/command/agent/node_endpoint.go b/command/agent/node_endpoint.go index 1a937447eb04..a86df751c1ab 100644 --- a/command/agent/node_endpoint.go +++ b/command/agent/node_endpoint.go @@ -44,6 +44,9 @@ func (s *HTTPServer) NodeSpecificRequest(resp http.ResponseWriter, req *http.Req case strings.HasSuffix(path, "/drain"): nodeName := strings.TrimSuffix(path, "/drain") return s.nodeToggleDrain(resp, req, nodeName) + case strings.HasSuffix(path, "/eligibility"): + nodeName := strings.TrimSuffix(path, "/eligibility") + return s.nodeToggleEligibility(resp, req, nodeName) case strings.HasSuffix(path, "/purge"): nodeName := strings.TrimSuffix(path, "/purge") return s.nodePurge(resp, req, nodeName) @@ -149,6 +152,26 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request return out, nil } +func (s *HTTPServer) nodeToggleEligibility(resp http.ResponseWriter, req *http.Request, + nodeID string) (interface{}, error) { + if req.Method != "PUT" && req.Method != "POST" { + return nil, CodedError(405, ErrInvalidMethod) + } + + var drainRequest structs.NodeUpdateEligibilityRequest + if err := decodeBody(req, &drainRequest); err != nil { + return nil, CodedError(400, err.Error()) + } + s.parseWriteRequest(req, &drainRequest.WriteRequest) + + var out structs.GenericResponse + if err := s.agent.RPC("Node.UpdateEligibility", &drainRequest, &out); err != nil { + return nil, err + } + setIndex(resp, out.Index) + return nil, nil +} + func (s *HTTPServer) nodeQuery(resp http.ResponseWriter, req *http.Request, nodeID string) (interface{}, error) { if req.Method != "GET" { diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index ac1bd00b7286..e208f59b72e8 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -302,6 +302,57 @@ func TestHTTP_NodeDrain(t *testing.T) { }) } +func TestHTTP_NodeEligble(t *testing.T) { + t.Parallel() + require := require.New(t) + httpTest(t, nil, func(s *TestAgent) { + // Create the node + node := mock.Node() + args := structs.NodeRegisterRequest{ + Node: node, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var resp structs.NodeUpdateResponse + require.Nil(s.Agent.RPC("Node.Register", &args, &resp)) + + drainReq := api.NodeUpdateEligibilityRequest{ + NodeID: node.ID, + Eligibility: structs.NodeSchedulingIneligible, + } + + // Make the HTTP request + buf := encodeReq(drainReq) + req, err := http.NewRequest("POST", "/v1/node/"+node.ID+"/eligibility", buf) + require.Nil(err) + respW := httptest.NewRecorder() + + // Make the request + _, err = s.Server.NodeSpecificRequest(respW, req) + require.Nil(err) + + // Check for the index + require.NotZero(respW.HeaderMap.Get("X-Nomad-Index")) + + // Check that the node has been updated + state := s.Agent.server.State() + out, err := state.NodeByID(nil, node.ID) + require.Nil(err) + require.Equal(structs.NodeSchedulingIneligible, out.SchedulingEligibility) + + // Make the HTTP request to set something invalid + drainReq.Eligibility = "foo" + buf = encodeReq(drainReq) + req, err = http.NewRequest("POST", "/v1/node/"+node.ID+"/eligibility", buf) + require.Nil(err) + respW = httptest.NewRecorder() + + // Make the request + _, err = s.Server.NodeSpecificRequest(respW, req) + require.NotNil(err) + require.Contains(err.Error(), "invalid") + }) +} + func TestHTTP_NodePurge(t *testing.T) { t.Parallel() httpTest(t, nil, func(s *TestAgent) { diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index f46de16618df..5cf5aa587d7e 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -494,6 +494,12 @@ func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest, return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") } + switch args.Eligibility { + case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible: + default: + return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility) + } + // Commit this update via Raft outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args) if err != nil { From 378c56629405f5c1dcd9dcf1e871a84b109e9ef7 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 13:54:27 -0800 Subject: [PATCH 17/79] node eligibility command --- command/node_drain.go | 2 + command/node_eligibility.go | 168 +++++++++++++++++++++++++++++++ command/node_eligibility_test.go | 124 +++++++++++++++++++++++ commands.go | 5 + main.go | 2 +- 5 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 command/node_eligibility.go create mode 100644 command/node_eligibility_test.go diff --git a/command/node_drain.go b/command/node_drain.go index 0b92a0a8e990..18bd695c4256 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -269,5 +269,7 @@ func (c *NodeDrainCommand) Run(args []string) int { c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err)) return 1 } + + c.Ui.Output(fmt.Sprintf("Node %q drain strategy set", node.ID)) return 0 } diff --git a/command/node_eligibility.go b/command/node_eligibility.go new file mode 100644 index 000000000000..2db14ddc29b8 --- /dev/null +++ b/command/node_eligibility.go @@ -0,0 +1,168 @@ +package command + +import ( + "fmt" + "strings" + + "github.com/hashicorp/nomad/api/contexts" + "github.com/posener/complete" +) + +type NodeEligibilityCommand struct { + Meta +} + +func (c *NodeEligibilityCommand) Help() string { + helpText := ` +Usage: nomad node eligibility [options] + + Toggles the nodes scheduling eligibility. When a node is marked as ineligible, + no new allocations will be placed on it but existing allocations will remain. + To remove existing allocations, use the node drain command. + + It is required that either -enable or -disable is specified, but not both. + The -self flag is useful to drain the local node. + +General Options: + + ` + generalOptionsUsage() + ` + +Node Eligibility Options: + + -disable + Mark the specified node as ineligible for new allocations. + + -enable + Mark the specified node as eligible for new allocations. + + -self + Set the eligibility of the local node. +` + return strings.TrimSpace(helpText) +} + +func (c *NodeEligibilityCommand) Synopsis() string { + return "Toggle scheduling eligibility for a given node" +} + +func (c *NodeEligibilityCommand) AutocompleteFlags() complete.Flags { + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{ + "-disable": complete.PredictNothing, + "-enable": complete.PredictNothing, + "-self": complete.PredictNothing, + }) +} + +func (c *NodeEligibilityCommand) AutocompleteArgs() complete.Predictor { + return complete.PredictFunc(func(a complete.Args) []string { + client, err := c.Meta.Client() + if err != nil { + return nil + } + + resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Nodes, nil) + if err != nil { + return []string{} + } + return resp.Matches[contexts.Nodes] + }) +} + +func (c *NodeEligibilityCommand) Run(args []string) int { + var enable, disable, self bool + + flags := c.Meta.FlagSet("node-eligibility", FlagSetClient) + flags.Usage = func() { c.Ui.Output(c.Help()) } + flags.BoolVar(&enable, "enable", false, "Mark node as eligibile for scheduling") + flags.BoolVar(&disable, "disable", false, "Mark node as ineligibile for scheduling") + flags.BoolVar(&self, "self", false, "") + + if err := flags.Parse(args); err != nil { + return 1 + } + + // Check that we got either enable or disable, but not both. + if (enable && disable) || (!enable && !disable) { + c.Ui.Error(c.Help()) + return 1 + } + + // Check that we got a node ID + args = flags.Args() + if l := len(args); self && l != 0 || !self && l != 1 { + c.Ui.Error("Node ID must be specified if -self isn't being used") + return 1 + } + + // Get the HTTP client + client, err := c.Meta.Client() + if err != nil { + c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) + return 1 + } + + // If -self flag is set then determine the current node. + var nodeID string + if !self { + nodeID = args[0] + } else { + var err error + if nodeID, err = getLocalNodeID(client); err != nil { + c.Ui.Error(err.Error()) + return 1 + } + } + + // Check if node exists + if len(nodeID) == 1 { + c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters.")) + return 1 + } + + nodeID = sanatizeUUIDPrefix(nodeID) + nodes, _, err := client.Nodes().PrefixList(nodeID) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err)) + return 1 + } + // Return error if no nodes are found + if len(nodes) == 0 { + c.Ui.Error(fmt.Sprintf("No node(s) with prefix or id %q found", nodeID)) + return 1 + } + if len(nodes) > 1 { + // Format the nodes list that matches the prefix so that the user + // can create a more specific request + out := make([]string, len(nodes)+1) + out[0] = "ID|Datacenter|Name|Class|Drain|Status" + for i, node := range nodes { + out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s", + node.ID, + node.Datacenter, + node.Name, + node.NodeClass, + node.Drain, + node.Status) + } + // Dump the output + c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", formatList(out))) + return 1 + } + + // Prefix lookup matched a single node + node, _, err := client.Nodes().Info(nodes[0].ID, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err)) + return 1 + } + + // Toggle node eligibility + if _, err := client.Nodes().ToggleEligibility(node.ID, enable, nil); err != nil { + c.Ui.Error(fmt.Sprintf("Error updating scheduling eligibility: %s", err)) + return 1 + } + + c.Ui.Output(fmt.Sprintf("Node %q scheduling eligibility set", node.ID)) + return 0 +} diff --git a/command/node_eligibility_test.go b/command/node_eligibility_test.go new file mode 100644 index 000000000000..3129fe86a19b --- /dev/null +++ b/command/node_eligibility_test.go @@ -0,0 +1,124 @@ +package command + +import ( + "fmt" + "strings" + "testing" + + "github.com/hashicorp/nomad/testutil" + "github.com/mitchellh/cli" + "github.com/posener/complete" + "github.com/stretchr/testify/assert" +) + +func TestNodeEligibilityCommand_Implements(t *testing.T) { + t.Parallel() + var _ cli.Command = &NodeEligibilityCommand{} +} + +func TestNodeEligibilityCommand_Fails(t *testing.T) { + t.Parallel() + srv, _, url := testServer(t, false, nil) + defer srv.Shutdown() + + ui := new(cli.MockUi) + cmd := &NodeEligibilityCommand{Meta: Meta{Ui: ui}} + + // Fails on misuse + if code := cmd.Run([]string{"some", "bad", "args"}); code != 1 { + t.Fatalf("expected exit code 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, cmd.Help()) { + t.Fatalf("expected help output, got: %s", out) + } + ui.ErrorWriter.Reset() + + // Fails on connection failure + if code := cmd.Run([]string{"-address=nope", "-enable", "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { + t.Fatalf("expected exit code 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "Error toggling") { + t.Fatalf("expected failed toggle error, got: %s", out) + } + ui.ErrorWriter.Reset() + + // Fails on non-existent node + if code := cmd.Run([]string{"-address=" + url, "-enable", "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "No node(s) with prefix or id") { + t.Fatalf("expected not exist error, got: %s", out) + } + ui.ErrorWriter.Reset() + + // Fails if both enable and disable specified + if code := cmd.Run([]string{"-enable", "-disable", "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, cmd.Help()) { + t.Fatalf("expected help output, got: %s", out) + } + ui.ErrorWriter.Reset() + + // Fails if neither enable or disable specified + if code := cmd.Run([]string{"12345678-abcd-efab-cdef-123456789abc"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, cmd.Help()) { + t.Fatalf("expected help output, got: %s", out) + } + ui.ErrorWriter.Reset() + + // Fail on identifier with too few characters + if code := cmd.Run([]string{"-address=" + url, "-enable", "1"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "must contain at least two characters.") { + t.Fatalf("expected too few characters error, got: %s", out) + } + ui.ErrorWriter.Reset() + + // Identifiers with uneven length should produce a query result + if code := cmd.Run([]string{"-address=" + url, "-enable", "123"}); code != 1 { + t.Fatalf("expected exit 1, got: %d", code) + } + if out := ui.ErrorWriter.String(); !strings.Contains(out, "No node(s) with prefix or id") { + t.Fatalf("expected not exist error, got: %s", out) + } + ui.ErrorWriter.Reset() +} + +func TestNodeEligibilityCommand_AutocompleteArgs(t *testing.T) { + assert := assert.New(t) + t.Parallel() + + srv, client, url := testServer(t, true, nil) + defer srv.Shutdown() + + // Wait for a node to appear + var nodeID string + testutil.WaitForResult(func() (bool, error) { + nodes, _, err := client.Nodes().List(nil) + if err != nil { + return false, err + } + if len(nodes) == 0 { + return false, fmt.Errorf("missing node") + } + nodeID = nodes[0].ID + return true, nil + }, func(err error) { + t.Fatalf("err: %s", err) + }) + + ui := new(cli.MockUi) + cmd := &NodeEligibilityCommand{Meta: Meta{Ui: ui, flagAddress: url}} + + prefix := nodeID[:len(nodeID)-5] + args := complete.Args{Last: prefix} + predictor := cmd.AutocompleteArgs() + + res := predictor.Predict(args) + assert.Equal(1, len(res)) + assert.Equal(nodeID, res[0]) +} diff --git a/commands.go b/commands.go index 9e27664af896..0b3a422f0348 100644 --- a/commands.go +++ b/commands.go @@ -273,6 +273,11 @@ func Commands(metaPtr *command.Meta) map[string]cli.CommandFactory { Meta: meta, }, nil }, + "node eligibility": func() (cli.Command, error) { + return &command.NodeEligibilityCommand{ + Meta: meta, + }, nil + }, "node-status": func() (cli.Command, error) { return &command.NodeStatusCommand{ Meta: meta, diff --git a/main.go b/main.go index 3c178e145709..f482ca2838bd 100644 --- a/main.go +++ b/main.go @@ -47,7 +47,7 @@ func RunCustom(args []string, commands map[string]cli.CommandFactory) int { // users should not be running should be placed here, versus hiding // subcommands from the main help, which should be filtered out of the // commands above. - hidden := []string{"check", "executor", "syslog"} + hidden := []string{"check", "executor", "syslog", "node-drain", "node-status"} cli := &cli.CLI{ Name: "nomad", From d6399cb733e764cd6fa0e5328868b91e361a85e9 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 14:00:55 -0800 Subject: [PATCH 18/79] Add eligibility to node view --- api/nodes.go | 23 ++++++++++++----------- command/node_status.go | 12 ++++++++---- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/api/nodes.go b/api/nodes.go index 94fc206ce5ee..37adb8fc34e5 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -241,17 +241,18 @@ type HostDiskStats struct { // NodeListStub is a subset of information returned during // node list operations. type NodeListStub struct { - Address string - ID string - Datacenter string - Name string - NodeClass string - Version string - Drain bool - Status string - StatusDescription string - CreateIndex uint64 - ModifyIndex uint64 + Address string + ID string + Datacenter string + Name string + NodeClass string + Version string + Drain bool + SchedulingEligibility string + Status string + StatusDescription string + CreateIndex uint64 + ModifyIndex uint64 } // NodeIndexSort reverse sorts nodes by CreateIndex diff --git a/command/node_status.go b/command/node_status.go index b347b6b78853..c59b8d7e6f8e 100644 --- a/command/node_status.go +++ b/command/node_status.go @@ -183,7 +183,7 @@ func (c *NodeStatusCommand) Run(args []string) int { out[0] += "Address|Version|" } - out[0] += "Drain|Status" + out[0] += "Drain|Eligibility|Status" if c.list_allocs { out[0] += "|Running Allocs" @@ -199,9 +199,11 @@ func (c *NodeStatusCommand) Run(args []string) int { out[i+1] += fmt.Sprintf("|%s|%s", node.Address, node.Version) } - out[i+1] += fmt.Sprintf("|%v|%s", + out[i+1] += fmt.Sprintf("|%v|%s|%s", node.Drain, + node.SchedulingEligibility, node.Status) + if c.list_allocs { numAllocs, err := getRunningAllocs(client, node.ID) if err != nil { @@ -249,14 +251,15 @@ func (c *NodeStatusCommand) Run(args []string) int { // Format the nodes list that matches the prefix so that the user // can create a more specific request out := make([]string, len(nodes)+1) - out[0] = "ID|DC|Name|Class|Drain|Status" + out[0] = "ID|DC|Name|Class|Drain|Eligibility|Status" for i, node := range nodes { - out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s", + out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s|%s", limit(node.ID, c.length), node.Datacenter, node.Name, node.NodeClass, node.Drain, + node.SchedulingEligibility, node.Status) } // Dump the output @@ -313,6 +316,7 @@ func (c *NodeStatusCommand) formatNode(client *api.Client, node *api.Node) int { fmt.Sprintf("Class|%s", node.NodeClass), fmt.Sprintf("DC|%s", node.Datacenter), fmt.Sprintf("Drain|%v", node.Drain), + fmt.Sprintf("Eligibility|%s", node.SchedulingEligibility), fmt.Sprintf("Status|%s", node.Status), fmt.Sprintf("Drivers|%s", strings.Join(nodeDrivers(node), ",")), } From 451b77d5d761e58d5c877b2cbbc6b97b1d2bddf1 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 14:08:29 -0800 Subject: [PATCH 19/79] Unblock evals once eligible --- nomad/fsm.go | 15 +++++++++++++ nomad/fsm_test.go | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/nomad/fsm.go b/nomad/fsm.go index 7df2582dbb72..b377f09b3fef 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -344,10 +344,25 @@ func (n *nomadFSM) applyNodeEligibilityUpdate(buf []byte, index uint64) interfac panic(fmt.Errorf("failed to decode request: %v", err)) } + // Lookup the existing node + node, err := n.state.NodeByID(nil, req.NodeID) + if err != nil { + n.logger.Printf("[ERR] nomad.fsm: UpdateNodeEligibility failed to lookup node %q: %v", req.NodeID, err) + return err + } + if err := n.state.UpdateNodeEligibility(index, req.NodeID, req.Eligibility); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpdateNodeEligibility failed: %v", err) return err } + + // Unblock evals for the nodes computed node class if it is in a ready + // state. + if node != nil && node.SchedulingEligibility == structs.NodeSchedulingIneligible && + req.Eligibility == structs.NodeSchedulingEligible { + n.blockedEvals.Unblock(node.ComputedClass, index) + } + return nil } diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index b834b432f5ff..9f8ed205a77e 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -377,6 +377,60 @@ func TestFSM_UpdateNodeEligibility(t *testing.T) { require.Contains(err.Error(), "draining") } +func TestFSM_UpdateNodeEligibility_Unblock(t *testing.T) { + t.Parallel() + require := require.New(t) + fsm := testFSM(t) + + node := mock.Node() + req := structs.NodeRegisterRequest{ + Node: node, + } + buf, err := structs.Encode(structs.NodeRegisterRequestType, req) + require.Nil(err) + + resp := fsm.Apply(makeLog(buf)) + require.Nil(resp) + + // Set the eligibility + req2 := structs.NodeUpdateEligibilityRequest{ + NodeID: node.ID, + Eligibility: structs.NodeSchedulingIneligible, + } + buf, err = structs.Encode(structs.NodeUpdateEligibilityRequestType, req2) + require.Nil(err) + + resp = fsm.Apply(makeLog(buf)) + require.Nil(resp) + + // Mark an eval as blocked. + eval := mock.Eval() + eval.ClassEligibility = map[string]bool{node.ComputedClass: true} + fsm.blockedEvals.Block(eval) + + // Set eligible + req4 := structs.NodeUpdateEligibilityRequest{ + NodeID: node.ID, + Eligibility: structs.NodeSchedulingEligible, + } + buf, err = structs.Encode(structs.NodeUpdateEligibilityRequestType, req4) + require.Nil(err) + + resp = fsm.Apply(makeLog(buf)) + require.Nil(resp) + + // Verify the eval was unblocked. + testutil.WaitForResult(func() (bool, error) { + bStats := fsm.blockedEvals.Stats() + if bStats.TotalBlocked != 0 { + return false, fmt.Errorf("bad: %#v", bStats) + } + return true, nil + }, func(err error) { + t.Fatalf("err: %s", err) + }) +} + func TestFSM_RegisterJob(t *testing.T) { t.Parallel() fsm := testFSM(t) From a96c3374e2abe08022ce716368fd0377977f41d3 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 14:20:28 -0800 Subject: [PATCH 20/79] Fix retaining the drain --- nomad/state/state_store.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index e48a940e86b5..ef6a51754167 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -531,6 +531,7 @@ func (s *StateStore) UpsertNode(index uint64, node *structs.Node) error { node.Drain = exist.Drain // Retain the drain mode node.SchedulingEligibility = exist.SchedulingEligibility // Retain the eligibility + node.DrainStrategy = exist.DrainStrategy // Retain the drain strategy } else { // Because this is the first time the node is being registered, we should // also create a node registration event @@ -598,8 +599,7 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error // Copy the existing node existingNode := existing.(*structs.Node) - copyNode := new(structs.Node) - *copyNode = *existingNode + copyNode := existingNode.Copy() // Update the status in the copy copyNode.Status = status @@ -639,11 +639,7 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs // Update the drain in the copy copyNode.Drain = drain != nil // COMPAT: Remove in Nomad 0.9 copyNode.DrainStrategy = drain - if drain == nil { - // When stopping a drain unset the strategy but leave the node - // ineligible for scheduling - copyNode.DrainStrategy = nil - } else { + if drain != nil { copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible } From d65ae92dfa180a46eafe9f6a44ddf74e2cb61c49 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 14:43:35 -0800 Subject: [PATCH 21/79] Small refactor and cleanups --- command/node_drain.go | 17 ++----------- command/node_eligibility.go | 23 ++++-------------- command/node_status.go | 48 +++++++++++++++++++++++++------------ 3 files changed, 40 insertions(+), 48 deletions(-) diff --git a/command/node_drain.go b/command/node_drain.go index 18bd695c4256..f6475c7bedd4 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -204,21 +204,8 @@ func (c *NodeDrainCommand) Run(args []string) int { return 1 } if len(nodes) > 1 { - // Format the nodes list that matches the prefix so that the user - // can create a more specific request - out := make([]string, len(nodes)+1) - out[0] = "ID|Datacenter|Name|Class|Drain|Status" - for i, node := range nodes { - out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s", - node.ID, - node.Datacenter, - node.Name, - node.NodeClass, - node.Drain, - node.Status) - } - // Dump the output - c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", formatList(out))) + c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", + formatNodeStubList(nodes, true))) return 1 } diff --git a/command/node_eligibility.go b/command/node_eligibility.go index 2db14ddc29b8..b0bcbc35bccf 100644 --- a/command/node_eligibility.go +++ b/command/node_eligibility.go @@ -21,7 +21,7 @@ Usage: nomad node eligibility [options] To remove existing allocations, use the node drain command. It is required that either -enable or -disable is specified, but not both. - The -self flag is useful to drain the local node. + The -self flag is useful to set the scheduling eligibility of the local node. General Options: @@ -123,7 +123,7 @@ func (c *NodeEligibilityCommand) Run(args []string) int { nodeID = sanatizeUUIDPrefix(nodeID) nodes, _, err := client.Nodes().PrefixList(nodeID) if err != nil { - c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err)) + c.Ui.Error(fmt.Sprintf("Error updating scheduling eligibility: %s", err)) return 1 } // Return error if no nodes are found @@ -132,28 +132,15 @@ func (c *NodeEligibilityCommand) Run(args []string) int { return 1 } if len(nodes) > 1 { - // Format the nodes list that matches the prefix so that the user - // can create a more specific request - out := make([]string, len(nodes)+1) - out[0] = "ID|Datacenter|Name|Class|Drain|Status" - for i, node := range nodes { - out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s", - node.ID, - node.Datacenter, - node.Name, - node.NodeClass, - node.Drain, - node.Status) - } - // Dump the output - c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", formatList(out))) + c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", + formatNodeStubList(nodes, true))) return 1 } // Prefix lookup matched a single node node, _, err := client.Nodes().Info(nodes[0].ID, nil) if err != nil { - c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err)) + c.Ui.Error(fmt.Sprintf("Error updating scheduling eligibility: %s", err)) return 1 } diff --git a/command/node_status.go b/command/node_status.go index c59b8d7e6f8e..68c72342b11f 100644 --- a/command/node_status.go +++ b/command/node_status.go @@ -248,24 +248,12 @@ func (c *NodeStatusCommand) Run(args []string) int { return 1 } if len(nodes) > 1 { - // Format the nodes list that matches the prefix so that the user - // can create a more specific request - out := make([]string, len(nodes)+1) - out[0] = "ID|DC|Name|Class|Drain|Eligibility|Status" - for i, node := range nodes { - out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s|%s", - limit(node.ID, c.length), - node.Datacenter, - node.Name, - node.NodeClass, - node.Drain, - node.SchedulingEligibility, - node.Status) - } // Dump the output - c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", formatList(out))) + c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s", + formatNodeStubList(nodes, c.verbose))) return 1 } + // Prefix lookup matched a single node node, _, err := client.Nodes().Info(nodes[0].ID, nil) if err != nil { @@ -641,3 +629,33 @@ func getHostResources(hostStats *api.HostStats, node *api.Node) ([]string, error } return resources, nil } + +// formatNodeStubList is used to return a table format of a list of node stubs. +func formatNodeStubList(nodes []*api.NodeListStub, verbose bool) string { + // Return error if no nodes are found + if len(nodes) == 0 { + return "" + } + // Truncate the id unless full length is requested + length := shortId + if verbose { + length = fullId + } + + // Format the nodes list that matches the prefix so that the user + // can create a more specific request + out := make([]string, len(nodes)+1) + out[0] = "ID|DC|Name|Class|Drain|Eligibility|Status" + for i, node := range nodes { + out[i+1] = fmt.Sprintf("%s|%s|%s|%s|%v|%s|%s", + limit(node.ID, length), + node.Datacenter, + node.Name, + node.NodeClass, + node.Drain, + node.SchedulingEligibility, + node.Status) + } + + return formatList(out) +} From 7d58209927da49d09ad7b27edc87eb70eb6af75d Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 27 Feb 2018 14:46:40 -0800 Subject: [PATCH 22/79] code review --- command/agent/node_endpoint_test.go | 2 +- command/node_eligibility.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index e208f59b72e8..19ff6e64cc1e 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -302,7 +302,7 @@ func TestHTTP_NodeDrain(t *testing.T) { }) } -func TestHTTP_NodeEligble(t *testing.T) { +func TestHTTP_NodeEligible(t *testing.T) { t.Parallel() require := require.New(t) httpTest(t, nil, func(s *TestAgent) { diff --git a/command/node_eligibility.go b/command/node_eligibility.go index b0bcbc35bccf..a3fe5f802cfd 100644 --- a/command/node_eligibility.go +++ b/command/node_eligibility.go @@ -120,7 +120,7 @@ func (c *NodeEligibilityCommand) Run(args []string) int { return 1 } - nodeID = sanatizeUUIDPrefix(nodeID) + nodeID = sanitizeUUIDPrefix(nodeID) nodes, _, err := client.Nodes().PrefixList(nodeID) if err != nil { c.Ui.Error(fmt.Sprintf("Error updating scheduling eligibility: %s", err)) From 5be32632049e4326eaa53b7d9db3a3ed821e5c82 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 26 Feb 2018 16:28:10 -0800 Subject: [PATCH 23/79] refactor drainer into a subpkg --- .../deploymentwatcher/deployments_watcher.go | 4 +- nomad/{ => drainer}/drain.go | 141 +++++++++++------- nomad/{ => drainer}/drain_test.go | 36 ++++- nomad/drainer_shims.go | 30 ++++ nomad/leader.go | 25 +++- nomad/rpc_test.go | 2 +- nomad/server.go | 16 ++ 7 files changed, 176 insertions(+), 78 deletions(-) rename nomad/{ => drainer}/drain.go (85%) rename nomad/{ => drainer}/drain_test.go (89%) create mode 100644 nomad/drainer_shims.go diff --git a/nomad/deploymentwatcher/deployments_watcher.go b/nomad/deploymentwatcher/deployments_watcher.go index d9aab78770fb..a88a1de67f93 100644 --- a/nomad/deploymentwatcher/deployments_watcher.go +++ b/nomad/deploymentwatcher/deployments_watcher.go @@ -102,7 +102,7 @@ func NewDeploymentsWatcher(logger *log.Logger, // SetEnabled is used to control if the watcher is enabled. The watcher // should only be enabled on the active leader. When being enabled the state is // passed in as it is no longer valid once a leader election has taken place. -func (w *Watcher) SetEnabled(enabled bool, state *state.StateStore) error { +func (w *Watcher) SetEnabled(enabled bool, state *state.StateStore) { w.l.Lock() defer w.l.Unlock() @@ -120,8 +120,6 @@ func (w *Watcher) SetEnabled(enabled bool, state *state.StateStore) error { if enabled && !wasEnabled { go w.watchDeployments(w.ctx) } - - return nil } // flush is used to clear the state of the watcher diff --git a/nomad/drain.go b/nomad/drainer/drain.go similarity index 85% rename from nomad/drain.go rename to nomad/drainer/drain.go index f0e1dd59b89f..e0c386f4056e 100644 --- a/nomad/drain.go +++ b/nomad/drainer/drain.go @@ -1,4 +1,4 @@ -package nomad +package drainer import ( "context" @@ -71,21 +71,67 @@ func (s *stopAllocs) add(j *structs.Job, a *structs.Allocation) { s.jobBatch[jobKey{a.Namespace, a.JobID}] = j } -// startNodeDrainer should be called in establishLeadership by the leader. -func (s *Server) startNodeDrainer(stopCh chan struct{}) { - state := s.fsm.State() +// RaftApplier contains methods for applying the raft requests required by the +// NodeDrainer. +type RaftApplier interface { + AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) error + NodeDrainComplete(nodeID string) error +} - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - go func() { - select { - case <-stopCh: +type nodeDrainerState struct { + enabled bool + state *state.StateStore +} + +type NodeDrainer struct { + enabledCh chan nodeDrainerState + + raft RaftApplier + + logger *log.Logger +} + +func NewNodeDrainer(logger *log.Logger, raft RaftApplier) *NodeDrainer { + return &NodeDrainer{ + enabledCh: make(chan nodeDrainerState), + raft: raft, + logger: logger, + } +} + +func (n *NodeDrainer) SetEnabled(enabled bool, state *state.StateStore) { + n.enabledCh <- nodeDrainerState{enabled, state} +} + +//FIXME never exits +func (n *NodeDrainer) Run() { + running := false + var ctx context.Context + cancel := func() {} + for s := range n.enabledCh { + switch { + case s.enabled && running: + // Already running + continue + case !s.enabled && !running: + // Already stopped + continue + case !s.enabled && running: + // Stop running node drainer cancel() - case <-ctx.Done(): + running = false + case s.enabled && !running: + // Start running node drainer + ctx, cancel = context.WithCancel(context.Background()) + go n.nodeDrainer(ctx, s.state) + running = true } - }() + } +} - nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(s.logger, state) +// nodeDrainer should be called in establishLeadership by the leader. +func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) { + nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(n.logger, state) // Wait for a node's drain deadline to expire var nextDeadline time.Time @@ -102,12 +148,12 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { deadlineTimer := time.NewTimer(time.Until(nextDeadline)) // Watch for nodes to start or stop draining - nodeWatcher := newNodeWatcher(s.logger, nodes, nodesIndex, state) + nodeWatcher := newNodeWatcher(n.logger, nodes, nodesIndex, state) go nodeWatcher.run(ctx) // Watch for drained allocations to be replaced // Watch for changes in allocs for jobs with allocs on draining nodes - jobWatcher := newJobWatcher(s.logger, drainingJobs, allocsIndex, state) + jobWatcher := newJobWatcher(n.logger, drainingJobs, allocsIndex, state) go jobWatcher.run(ctx) for { @@ -116,11 +162,11 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { //possible outcome of this is that an allocation could be //stopped on a node that recently had its drain cancelled which //doesn't seem like that bad of a pathological case - s.logger.Printf("[TRACE] nomad.drain: LOOP next deadline: %s (%s)", nextDeadline, time.Until(nextDeadline)) + n.logger.Printf("[TRACE] nomad.drain: LOOP next deadline: %s (%s)", nextDeadline, time.Until(nextDeadline)) select { case nodes = <-nodeWatcher.nodesCh: // update draining nodes - s.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes)) + n.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes)) // update deadline timer changed := false @@ -139,7 +185,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // if changed reset the timer if changed { - s.logger.Printf("[TRACE] nomad.drain: new node deadline: %s", nextDeadline) + n.logger.Printf("[TRACE] nomad.drain: new node deadline: %s", nextDeadline) if !deadlineTimer.Stop() { // timer may have been recv'd in a // previous loop, so don't block @@ -152,10 +198,10 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { } case jobs := <-jobWatcher.WaitCh(): - s.logger.Printf("[TRACE] nomad.drain: running due to alloc change (%d jobs updated)", len(jobs)) + n.logger.Printf("[TRACE] nomad.drain: running due to alloc change (%d jobs updated)", len(jobs)) case when := <-deadlineTimer.C: // deadline for a node was reached - s.logger.Printf("[TRACE] nomad.drain: running due to deadline reached (at %s)", when) + n.logger.Printf("[TRACE] nomad.drain: running due to deadline reached (at %s)", when) case <-ctx.Done(): // exit return @@ -164,15 +210,13 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // Tracks nodes that are done draining doneNodes := map[string]*structs.Node{} - //TODO work from a state snapshot? perhaps from a last update - //index? I can't think of why this would be beneficial as this - //entire process runs asynchronously with the fsm/scheduler/etc + // Capture state (statestore and time) to do consistent comparisons snapshot, err := state.Snapshot() if err != nil { //FIXME panic(err) } - now := time.Now() // for determing deadlines in a consistent way + now := time.Now() // job key -> {job, allocs} // Collect all allocs for all jobs with at least one @@ -227,14 +271,14 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // allocs left to be drained if !alloc.TerminalStatus() { if !allocsLeft { - s.logger.Printf("[TRACE] nomad.drain: node %s has allocs left to drain", nodeID[:6]) + n.logger.Printf("[TRACE] nomad.drain: node %s has allocs left to drain", nodeID[:6]) allocsLeft = true } } // Don't bother collecting system/batch jobs for nodes that haven't hit their deadline if job.Type != structs.JobTypeService && !deadlineReached { - s.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s", + n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s", job.Type, job.Name, node.DrainStrategy.DeadlineTime().Sub(now)) skipJob[jobkey] = struct{}{} continue @@ -248,14 +292,14 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // Count the number of down (terminal or nil deployment status) per task group if job.Type == structs.JobTypeService { - n := 0 + num := 0 for _, a := range jobAllocs { if !a.TerminalStatus() && a.DeploymentStatus != nil { upPerTG[makeTaskGroupKey(a)]++ - n++ + num++ } } - s.logger.Printf("[TRACE] nomad.drain: job %s has %d task groups running", job.Name, n) + n.logger.Printf("[TRACE] nomad.drain: job %s has %d task groups running", job.Name, num) } drainable[jobkey] = &drainingJob{ @@ -268,7 +312,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // if node has no allocs or has hit its deadline, it's done draining! if !allocsLeft || deadlineReached { - s.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain or has reached deadline", nodeID) + n.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain or has reached deadline", nodeID) jobWatcher.nodeDone(nodeID) doneNodes[nodeID] = node } @@ -298,7 +342,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { tgKey := makeTaskGroupKey(alloc) if node.DrainStrategy.DeadlineTime().Before(now) { - s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) // Alloc's Node has reached its deadline stoplist.add(drainingJob.job, alloc) upPerTG[tgKey]-- @@ -319,19 +363,19 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // Only 1, drain if tg.Count == 1 { - s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to count=1", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to count=1", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) stoplist.add(drainingJob.job, alloc) continue } // No migrate strategy or a max parallel of 0 mean force draining if tg.Migrate == nil || tg.Migrate.MaxParallel == 0 { - s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to force drain", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to force drain", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) stoplist.add(drainingJob.job, alloc) continue } - s.logger.Printf("[TRACE] nomad.drain: considering job %s alloc %s count %d maxp %d up %d", + n.logger.Printf("[TRACE] nomad.drain: considering job %s alloc %s count %d maxp %d up %d", drainingJob.job.Name, alloc.ID[:6], tg.Count, tg.Migrate.MaxParallel, upPerTG[tgKey]) // Count - MaxParalell = minimum number of allocations that must be "up" @@ -339,7 +383,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { // If minimum is < the current number up it is safe to stop one. if minUp < upPerTG[tgKey] { - s.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to max parallel", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to max parallel", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) // More migrations are allowed, add to stoplist stoplist.add(drainingJob.job, alloc) upPerTG[tgKey]-- @@ -348,7 +392,7 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { } if len(stoplist.allocBatch) > 0 { - s.logger.Printf("[DEBUG] nomad.drain: stopping %d alloc(s) for %d job(s)", len(stoplist.allocBatch), len(stoplist.jobBatch)) + n.logger.Printf("[DEBUG] nomad.drain: stopping %d alloc(s) for %d job(s)", len(stoplist.allocBatch), len(stoplist.jobBatch)) // Reevaluate affected jobs evals := make([]*structs.Evaluation, 0, len(stoplist.jobBatch)) @@ -365,40 +409,21 @@ func (s *Server) startNodeDrainer(stopCh chan struct{}) { }) } - // Send raft request - batch := &structs.AllocUpdateDesiredTransitionRequest{ - Allocs: stoplist.allocBatch, - Evals: evals, - WriteRequest: structs.WriteRequest{Region: s.config.Region}, - } - // Commit this update via Raft - //TODO Not the right request - _, index, err := s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, batch) - if err != nil { + if err := n.raft.AllocUpdateDesiredTransition(stoplist.allocBatch, evals); err != nil { //FIXME panic(err) } - - //TODO i bet there's something useful to do with this index - _ = index } // Unset drain for nodes done draining for nodeID, node := range doneNodes { - args := structs.NodeUpdateDrainRequest{ - NodeID: nodeID, - Drain: false, - WriteRequest: structs.WriteRequest{Region: s.config.Region}, - } - - _, _, err := s.raftApply(structs.NodeUpdateDrainRequestType, &args) - if err != nil { - s.logger.Printf("[ERR] nomad.drain: failed to unset drain for: %v", err) + if err := n.raft.NodeDrainComplete(nodeID); err != nil { + n.logger.Printf("[ERR] nomad.drain: failed to unset drain for: %v", err) //FIXME panic(err) } - s.logger.Printf("[INFO] nomad.drain: node %s (%s) completed draining", nodeID, node.Name) + n.logger.Printf("[INFO] nomad.drain: node %s (%s) completed draining", nodeID, node.Name) delete(nodes, nodeID) } } diff --git a/nomad/drain_test.go b/nomad/drainer/drain_test.go similarity index 89% rename from nomad/drain_test.go rename to nomad/drainer/drain_test.go index 9bae27fe38d2..1f38a4c293fc 100644 --- a/nomad/drain_test.go +++ b/nomad/drainer/drain_test.go @@ -1,7 +1,9 @@ -package nomad +package drainer_test import ( "fmt" + "net" + "net/rpc" "sort" "strings" "testing" @@ -10,7 +12,9 @@ import ( msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/client" "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/helper/pool" "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" @@ -20,11 +24,29 @@ import ( "github.com/stretchr/testify/require" ) +// rpcClient is a test helper method to return a ClientCodec to use to make rpc +// calls to the passed server. +func rpcClient(t *testing.T, conf *nomad.Config) rpc.ClientCodec { + addr := conf.RPCAddr + conn, err := net.DialTimeout("tcp", addr.String(), time.Second) + if err != nil { + t.Fatalf("err: %v", err) + } + // Write the Nomad RPC byte to set the mode + conn.Write([]byte{byte(pool.RpcNomad)}) + return pool.NewClientCodec(conn) +} + // TestNodeDrainer_SimpleDrain asserts that draining when there are two nodes // moves allocs from the draining node to the other node. func TestNodeDrainer_SimpleDrain(t *testing.T) { require := require.New(t) - server := TestServer(t, nil) + + // Capture test servers config + var serverConfig *nomad.Config + server := nomad.TestServer(t, func(c *nomad.Config) { + serverConfig = c + }) defer server.Shutdown() testutil.WaitForLeader(t, server.RPC) @@ -32,7 +54,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { // Setup 2 Nodes: A & B; A has allocs and is draining // Create mock jobs - state := server.fsm.State() + state := server.State() serviceJob := mock.Job() serviceJob.Name = "service-job" @@ -83,12 +105,12 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { // Start node 1 c1 := client.TestClient(t, func(conf *config.Config) { conf.LogOutput = testlog.NewWriter(t) - conf.Servers = []string{server.config.RPCAddr.String()} + conf.Servers = []string{serverConfig.RPCAddr.String()} }) defer c1.Shutdown() // Start jobs so they all get placed on node 1 - codec := rpcClient(t, server) + codec := rpcClient(t, serverConfig) for _, job := range []*structs.Job{systemJob, serviceJob, batchJob} { req := &structs.JobRegisterRequest{ Job: job.Copy(), @@ -137,7 +159,6 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus) } } - server.logger.Println("----------------------------------------------------------------------quitting--------------------------------------------------------") t.Fatalf("failed waiting for all allocs to start: %v", err) }) @@ -155,7 +176,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { // Start node 2 c2 := client.TestClient(t, func(conf *config.Config) { conf.NetworkSpeed = 10000 - conf.Servers = []string{server.config.RPCAddr.String()} + conf.Servers = []string{serverConfig.RPCAddr.String()} }) defer c2.Shutdown() @@ -191,7 +212,6 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Logf("%d alloc %s job %s status %s prev %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus, alloc.PreviousAllocation) } } - server.logger.Println("----------------------------------------------------------------------quitting--------------------------------------------------------") t.Errorf("failed waiting for all allocs to migrate: %v", err) }) diff --git a/nomad/drainer_shims.go b/nomad/drainer_shims.go new file mode 100644 index 000000000000..0ced081f5fd9 --- /dev/null +++ b/nomad/drainer_shims.go @@ -0,0 +1,30 @@ +package nomad + +import "github.com/hashicorp/nomad/nomad/structs" + +// drainerShim implements the drainer.RaftApplier interface required by the +// NodeDrainer. +type drainerShim struct { + s *Server +} + +func (d drainerShim) NodeDrainComplete(nodeID string) error { + args := &structs.NodeUpdateDrainRequest{ + NodeID: nodeID, + Drain: false, + WriteRequest: structs.WriteRequest{Region: d.s.config.Region}, + } + + _, _, err := d.s.raftApply(structs.NodeUpdateDrainRequestType, args) + return err +} + +func (d drainerShim) AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) error { + args := &structs.AllocUpdateDesiredTransitionRequest{ + Allocs: allocs, + Evals: evals, + WriteRequest: structs.WriteRequest{Region: d.s.config.Region}, + } + _, _, err := d.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, args) + return err +} diff --git a/nomad/leader.go b/nomad/leader.go index b81b65d23232..a395c3a91d5b 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -199,9 +199,10 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { s.blockedEvals.SetTimetable(s.fsm.TimeTable()) // Enable the deployment watcher, since we are now the leader - if err := s.deploymentWatcher.SetEnabled(true, s.State()); err != nil { - return err - } + s.deploymentWatcher.SetEnabled(true, s.State()) + + // Enable the NodeDrainer + s.nodeDrainer.SetEnabled(true, s.State()) // Restore the eval broker state if err := s.restoreEvals(); err != nil { @@ -267,8 +268,15 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { go s.replicateACLTokens(stopCh) } - // Start Node Drainer - go s.startNodeDrainer(stopCh) + // Convert stopCh into a Context + ctx, cancel := context.WithCancel(context.Background()) + go func() { + defer cancel() + select { + case <-stopCh: + case <-ctx.Done(): + } + }() // Setup any enterprise systems required. if err := s.establishEnterpriseLeadership(stopCh); err != nil { @@ -676,9 +684,10 @@ func (s *Server) revokeLeadership() error { s.vault.SetActive(false) // Disable the deployment watcher as it is only useful as a leader. - if err := s.deploymentWatcher.SetEnabled(false, nil); err != nil { - return err - } + s.deploymentWatcher.SetEnabled(false, nil) + + // Disable the node drainer + s.nodeDrainer.SetEnabled(false, nil) // Disable any enterprise systems required. if err := s.revokeEnterpriseLeadership(); err != nil { diff --git a/nomad/rpc_test.go b/nomad/rpc_test.go index c876c6adb1df..ec885cc652c8 100644 --- a/nomad/rpc_test.go +++ b/nomad/rpc_test.go @@ -30,7 +30,7 @@ func rpcClient(t *testing.T, s *Server) rpc.ClientCodec { if err != nil { t.Fatalf("err: %v", err) } - // Write the Consul RPC byte to set the mode + // Write the Nomad RPC byte to set the mode conn.Write([]byte{byte(pool.RpcNomad)}) return pool.NewClientCodec(conn) } diff --git a/nomad/server.go b/nomad/server.go index 68789da4a259..f49c62b9cf5a 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -27,6 +27,7 @@ import ( "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/nomad/deploymentwatcher" + "github.com/hashicorp/nomad/nomad/drainer" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" @@ -172,6 +173,9 @@ type Server struct { // make the required calls to continue to transition the deployment. deploymentWatcher *deploymentwatcher.Watcher + // nodeDrainer is used to drain allocations from nodes. + nodeDrainer *drainer.NodeDrainer + // evalBroker is used to manage the in-progress evaluations // that are waiting to be brokered to a sub-scheduler evalBroker *EvalBroker @@ -355,6 +359,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg return nil, fmt.Errorf("failed to create deployment watcher: %v", err) } + // Setup the node drainer. + s.setupNodeDrainer() + // Setup the enterprise state if err := s.setupEnterprise(config); err != nil { return nil, err @@ -880,6 +887,15 @@ func (s *Server) setupDeploymentWatcher() error { return nil } +// setupNodeDrainer creates a node drainer which will be enabled when a server +// becomes a leader. +func (s *Server) setupNodeDrainer() { + // create a shim around raft requests + shim := drainerShim{s} + s.nodeDrainer = drainer.NewNodeDrainer(s.logger, shim) + go s.nodeDrainer.Run() +} + // setupVaultClient is used to set up the Vault API client. func (s *Server) setupVaultClient() error { v, err := NewVaultClient(s.config.VaultConfig, s.logger, s.purgeVaultAccessors) From 9de890899ad4241c0ab3f68ec095366abfbdae6a Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 27 Feb 2018 13:48:13 -0800 Subject: [PATCH 24/79] drainer: drainer should shutdown with server --- nomad/drainer/drain.go | 44 ++++++++++++++++++++++++++++++++++-------- nomad/server.go | 2 +- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/nomad/drainer/drain.go b/nomad/drainer/drain.go index e0c386f4056e..b795e0eea173 100644 --- a/nomad/drainer/drain.go +++ b/nomad/drainer/drain.go @@ -78,37 +78,64 @@ type RaftApplier interface { NodeDrainComplete(nodeID string) error } +// nodeDrainerState is used to communicate the state set by +// NodeDrainer.SetEnabled to the concurrently executing Run loop. type nodeDrainerState struct { enabled bool state *state.StateStore } +// NodeDrainer migrates allocations off of draining nodes. SetEnabled(true) +// should be called when a server establishes leadership and SetEnabled(false) +// called when leadership is lost. type NodeDrainer struct { enabledCh chan nodeDrainerState raft RaftApplier + shutdownCh <-chan struct{} + logger *log.Logger } -func NewNodeDrainer(logger *log.Logger, raft RaftApplier) *NodeDrainer { +// NewNodeDrainer creates a new NodeDrainer which will exit when shutdownCh is +// closed. A RaftApplier shim must be supplied to allow NodeDrainer access to +// the raft messages it sends. +func NewNodeDrainer(logger *log.Logger, shutdownCh <-chan struct{}, raft RaftApplier) *NodeDrainer { return &NodeDrainer{ - enabledCh: make(chan nodeDrainerState), - raft: raft, - logger: logger, + enabledCh: make(chan nodeDrainerState), + raft: raft, + shutdownCh: shutdownCh, + logger: logger, } } +// SetEnabled will start or stop the node draining goroutine depending on the +// enabled boolean. SetEnabled is meant to be called concurrently with Run. func (n *NodeDrainer) SetEnabled(enabled bool, state *state.StateStore) { - n.enabledCh <- nodeDrainerState{enabled, state} + select { + case n.enabledCh <- nodeDrainerState{enabled, state}: + case <-n.shutdownCh: + } } -//FIXME never exits +// Run monitors the shutdown chan as well as SetEnabled calls and starts/stops +// the node draining goroutine appropriately. As it blocks it should be called +// in a goroutine. func (n *NodeDrainer) Run() { running := false + var s nodeDrainerState var ctx context.Context cancel := func() {} - for s := range n.enabledCh { + for { + select { + case s = <-n.enabledCh: + case <-n.shutdownCh: + // Stop drainer and exit + cancel() + return + } + switch { case s.enabled && running: // Already running @@ -129,7 +156,8 @@ func (n *NodeDrainer) Run() { } } -// nodeDrainer should be called in establishLeadership by the leader. +// nodeDrainer is the core node draining main loop and should be started in a +// goroutine when a server establishes leadership. func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) { nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(n.logger, state) diff --git a/nomad/server.go b/nomad/server.go index f49c62b9cf5a..a9984ac34afb 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -892,7 +892,7 @@ func (s *Server) setupDeploymentWatcher() error { func (s *Server) setupNodeDrainer() { // create a shim around raft requests shim := drainerShim{s} - s.nodeDrainer = drainer.NewNodeDrainer(s.logger, shim) + s.nodeDrainer = drainer.NewNodeDrainer(s.logger, s.shutdownCh, shim) go s.nodeDrainer.Run() } From 57c03359409578154fcf6385f7faf3c445b158f3 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 27 Feb 2018 13:51:37 -0800 Subject: [PATCH 25/79] Remove unused context --- nomad/leader.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/nomad/leader.go b/nomad/leader.go index a395c3a91d5b..f65a22477727 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -268,16 +268,6 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { go s.replicateACLTokens(stopCh) } - // Convert stopCh into a Context - ctx, cancel := context.WithCancel(context.Background()) - go func() { - defer cancel() - select { - case <-stopCh: - case <-ctx.Done(): - } - }() - // Setup any enterprise systems required. if err := s.establishEnterpriseLeadership(stopCh); err != nil { return err From f2de735cdc341a10643512207740fa4891e236d4 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 27 Feb 2018 14:08:30 -0800 Subject: [PATCH 26/79] Restart every time SetEnabled(true) is called --- nomad/drainer/drain.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/nomad/drainer/drain.go b/nomad/drainer/drain.go index b795e0eea173..5175f609f55d 100644 --- a/nomad/drainer/drain.go +++ b/nomad/drainer/drain.go @@ -125,8 +125,7 @@ func (n *NodeDrainer) SetEnabled(enabled bool, state *state.StateStore) { func (n *NodeDrainer) Run() { running := false var s nodeDrainerState - var ctx context.Context - cancel := func() {} + ctx, cancel := context.WithCancel(context.Background()) for { select { case s = <-n.enabledCh: @@ -138,15 +137,19 @@ func (n *NodeDrainer) Run() { switch { case s.enabled && running: - // Already running - continue + // Already running, must restart to ensure the latest StateStore is used + cancel() + ctx, cancel = context.WithCancel(context.Background()) + go n.nodeDrainer(ctx, s.state) + case !s.enabled && !running: - // Already stopped - continue + // Already stopped; nothing to do + case !s.enabled && running: // Stop running node drainer cancel() running = false + case s.enabled && !running: // Start running node drainer ctx, cancel = context.WithCancel(context.Background()) From 678fbe1755a0bb0d9a54a798768a20686fd7cf92 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 27 Feb 2018 14:50:17 -0800 Subject: [PATCH 27/79] drainer: factor job & node watchers out of drainer.go --- nomad/drainer/drain.go | 240 ----------------------------------- nomad/drainer/jobwatcher.go | 140 ++++++++++++++++++++ nomad/drainer/nodewatcher.go | 121 ++++++++++++++++++ 3 files changed, 261 insertions(+), 240 deletions(-) create mode 100644 nomad/drainer/jobwatcher.go create mode 100644 nomad/drainer/nodewatcher.go diff --git a/nomad/drainer/drain.go b/nomad/drainer/drain.go index 5175f609f55d..5f35bca6c0b4 100644 --- a/nomad/drainer/drain.go +++ b/nomad/drainer/drain.go @@ -4,10 +4,8 @@ import ( "context" "log" "strings" - "sync" "time" - memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/state" @@ -460,244 +458,6 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) } } -// nodeWatcher watches for nodes to start or stop draining -type nodeWatcher struct { - index uint64 - nodes map[string]*structs.Node - nodesCh chan map[string]*structs.Node - state *state.StateStore - logger *log.Logger -} - -func newNodeWatcher(logger *log.Logger, nodes map[string]*structs.Node, index uint64, state *state.StateStore) *nodeWatcher { - return &nodeWatcher{ - nodes: nodes, - nodesCh: make(chan map[string]*structs.Node), - index: index, - state: state, - logger: logger, - } -} - -func (n *nodeWatcher) run(ctx context.Context) { - // Trigger an initial drain pass if there are already nodes draining - //FIXME this is unneccessary if a node has reached a deadline - n.logger.Printf("[TRACE] nomad.drain: initial draining nodes: %d", len(n.nodes)) - if len(n.nodes) > 0 { - n.nodesCh <- n.nodes - } - - for { - //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? - resp, index, err := n.state.BlockingQuery(n.queryNodeDrain, n.index, ctx) - if err != nil { - if err == context.Canceled { - n.logger.Printf("[TRACE] nomad.drain: draining node watcher shutting down") - return - } - n.logger.Printf("[ERR] nomad.drain: error blocking on node updates at index %d: %v", n.index, err) - return - } - - // update index for next run - n.index = index - - changed := false - newNodes := resp.([]*structs.Node) - n.logger.Printf("[TRACE] nomad.drain: %d nodes to consider", len(newNodes)) //FIXME remove - for _, newNode := range newNodes { - if existingNode, ok := n.nodes[newNode.ID]; ok { - // Node was draining, see if it has changed - if !newNode.Drain { - // Node stopped draining - delete(n.nodes, newNode.ID) - changed = true - } else if !newNode.DrainStrategy.DeadlineTime().Equal(existingNode.DrainStrategy.DeadlineTime()) { - // Update deadline - n.nodes[newNode.ID] = newNode - changed = true - } - } else { - // Node was not draining - if newNode.Drain { - // Node started draining - n.nodes[newNode.ID] = newNode - changed = true - } - } - } - - // Send a copy of the draining nodes if there were changes - if !changed { - continue - } - - nodesCopy := make(map[string]*structs.Node, len(n.nodes)) - for k, v := range n.nodes { - nodesCopy[k] = v - } - - select { - case n.nodesCh <- nodesCopy: - case <-ctx.Done(): - return - } - } -} - -func (n *nodeWatcher) queryNodeDrain(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { - iter, err := state.Nodes(ws) - if err != nil { - return nil, 0, err - } - - index, err := state.Index("nodes") - if err != nil { - return nil, 0, err - } - - resp := make([]*structs.Node, 0, 8) - - for { - raw := iter.Next() - if raw == nil { - break - } - - node := raw.(*structs.Node) - resp = append(resp, node) - } - - return resp, index, nil -} - -type jobWatcher struct { - // allocsIndex to start watching from - allocsIndex uint64 - - // job -> node.ID - jobs map[jobKey]string - jobsMu sync.Mutex - - jobsCh chan map[jobKey]struct{} - - state *state.StateStore - - logger *log.Logger -} - -func newJobWatcher(logger *log.Logger, jobs map[jobKey]string, allocsIndex uint64, state *state.StateStore) *jobWatcher { - return &jobWatcher{ - allocsIndex: allocsIndex, - logger: logger, - jobs: jobs, - jobsCh: make(chan map[jobKey]struct{}), - state: state, - } -} - -func (j *jobWatcher) watch(k jobKey, nodeID string) { - j.logger.Printf("[TRACE] nomad.drain: watching job %s on draining node %s", k.jobid, nodeID[:6]) - j.jobsMu.Lock() - j.jobs[k] = nodeID - j.jobsMu.Unlock() -} - -func (j *jobWatcher) nodeDone(nodeID string) { - j.jobsMu.Lock() - defer j.jobsMu.Unlock() - for k, v := range j.jobs { - if v == nodeID { - j.logger.Printf("[TRACE] nomad.drain: UNwatching job %s on done draining node %s", k.jobid, nodeID[:6]) - delete(j.jobs, k) - } - } -} - -func (j *jobWatcher) WaitCh() <-chan map[jobKey]struct{} { - return j.jobsCh -} - -func (j *jobWatcher) run(ctx context.Context) { - var resp interface{} - var err error - - for { - //FIXME have watchAllocs create a closure and give it a copy of j.jobs to remove locking? - //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? - var newIndex uint64 - resp, newIndex, err = j.state.BlockingQuery(j.watchAllocs, j.allocsIndex, ctx) - if err != nil { - if err == context.Canceled { - j.logger.Printf("[TRACE] nomad.drain: job watcher shutting down") - return - } - j.logger.Printf("[ERR] nomad.drain: error blocking on alloc updates: %v", err) - return - } - - j.logger.Printf("[TRACE] nomad.drain: job watcher old index: %d new index: %d", j.allocsIndex, newIndex) - j.allocsIndex = newIndex - - changedJobs := resp.(map[jobKey]struct{}) - if len(changedJobs) > 0 { - select { - case j.jobsCh <- changedJobs: - case <-ctx.Done(): - return - } - } - } -} - -func (j *jobWatcher) watchAllocs(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { - iter, err := state.Allocs(ws) - if err != nil { - return nil, 0, err - } - - index, err := state.Index("allocs") - if err != nil { - return nil, 0, err - } - - skipped := 0 - - // job ids - resp := map[jobKey]struct{}{} - - for { - raw := iter.Next() - if raw == nil { - break - } - - alloc := raw.(*structs.Allocation) - - j.jobsMu.Lock() - _, ok := j.jobs[jobKey{alloc.Namespace, alloc.JobID}] - j.jobsMu.Unlock() - - if !ok { - // alloc is not part of a draining job - skipped++ - continue - } - - // don't wake drain loop if alloc hasn't updated its health - if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { - j.logger.Printf("[TRACE] nomad.drain: job watcher found alloc %s - deployment status: %t", alloc.ID[:6], *alloc.DeploymentStatus.Healthy) - resp[jobKey{alloc.Namespace, alloc.JobID}] = struct{}{} - } else { - j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring alloc %s - no deployment status", alloc.ID[:6]) - } - } - - j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring %d allocs - not part of draining job at index %d", skipped, index) - - return resp, index, nil -} - // initDrainer initializes the node drainer state and returns a list of // draining nodes as well as allocs that are draining that should be watched // for a replacement. diff --git a/nomad/drainer/jobwatcher.go b/nomad/drainer/jobwatcher.go new file mode 100644 index 000000000000..95a1be5d157e --- /dev/null +++ b/nomad/drainer/jobwatcher.go @@ -0,0 +1,140 @@ +package drainer + +import ( + "context" + "log" + "sync" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +// jobWatcher watches allocation changes for jobs with at least one allocation +// on a draining node. +type jobWatcher struct { + // allocsIndex to start watching from + allocsIndex uint64 + + // job -> node.ID + jobs map[jobKey]string + jobsMu sync.Mutex + + jobsCh chan map[jobKey]struct{} + + state *state.StateStore + + logger *log.Logger +} + +func newJobWatcher(logger *log.Logger, jobs map[jobKey]string, allocsIndex uint64, state *state.StateStore) *jobWatcher { + return &jobWatcher{ + allocsIndex: allocsIndex, + logger: logger, + jobs: jobs, + jobsCh: make(chan map[jobKey]struct{}), + state: state, + } +} + +func (j *jobWatcher) watch(k jobKey, nodeID string) { + j.logger.Printf("[TRACE] nomad.drain: watching job %s on draining node %s", k.jobid, nodeID[:6]) + j.jobsMu.Lock() + j.jobs[k] = nodeID + j.jobsMu.Unlock() +} + +func (j *jobWatcher) nodeDone(nodeID string) { + j.jobsMu.Lock() + defer j.jobsMu.Unlock() + for k, v := range j.jobs { + if v == nodeID { + j.logger.Printf("[TRACE] nomad.drain: UNwatching job %s on done draining node %s", k.jobid, nodeID[:6]) + delete(j.jobs, k) + } + } +} + +func (j *jobWatcher) WaitCh() <-chan map[jobKey]struct{} { + return j.jobsCh +} + +func (j *jobWatcher) run(ctx context.Context) { + var resp interface{} + var err error + + for { + //FIXME have watchAllocs create a closure and give it a copy of j.jobs to remove locking? + //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? + var newIndex uint64 + resp, newIndex, err = j.state.BlockingQuery(j.watchAllocs, j.allocsIndex, ctx) + if err != nil { + if err == context.Canceled { + j.logger.Printf("[TRACE] nomad.drain: job watcher shutting down") + return + } + j.logger.Printf("[ERR] nomad.drain: error blocking on alloc updates: %v", err) + return + } + + j.logger.Printf("[TRACE] nomad.drain: job watcher old index: %d new index: %d", j.allocsIndex, newIndex) + j.allocsIndex = newIndex + + changedJobs := resp.(map[jobKey]struct{}) + if len(changedJobs) > 0 { + select { + case j.jobsCh <- changedJobs: + case <-ctx.Done(): + return + } + } + } +} + +func (j *jobWatcher) watchAllocs(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + iter, err := state.Allocs(ws) + if err != nil { + return nil, 0, err + } + + index, err := state.Index("allocs") + if err != nil { + return nil, 0, err + } + + skipped := 0 + + // job ids + resp := map[jobKey]struct{}{} + + for { + raw := iter.Next() + if raw == nil { + break + } + + alloc := raw.(*structs.Allocation) + + j.jobsMu.Lock() + _, ok := j.jobs[jobKey{alloc.Namespace, alloc.JobID}] + j.jobsMu.Unlock() + + if !ok { + // alloc is not part of a draining job + skipped++ + continue + } + + // don't wake drain loop if alloc hasn't updated its health + if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { + j.logger.Printf("[TRACE] nomad.drain: job watcher found alloc %s - deployment status: %t", alloc.ID[:6], *alloc.DeploymentStatus.Healthy) + resp[jobKey{alloc.Namespace, alloc.JobID}] = struct{}{} + } else { + j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring alloc %s - no deployment status", alloc.ID[:6]) + } + } + + j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring %d allocs - not part of draining job at index %d", skipped, index) + + return resp, index, nil +} diff --git a/nomad/drainer/nodewatcher.go b/nomad/drainer/nodewatcher.go new file mode 100644 index 000000000000..eb54e4995842 --- /dev/null +++ b/nomad/drainer/nodewatcher.go @@ -0,0 +1,121 @@ +package drainer + +import ( + "context" + "log" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +// nodeWatcher watches for nodes to start or stop draining +type nodeWatcher struct { + index uint64 + nodes map[string]*structs.Node + nodesCh chan map[string]*structs.Node + state *state.StateStore + logger *log.Logger +} + +func newNodeWatcher(logger *log.Logger, nodes map[string]*structs.Node, index uint64, state *state.StateStore) *nodeWatcher { + return &nodeWatcher{ + nodes: nodes, + nodesCh: make(chan map[string]*structs.Node), + index: index, + state: state, + logger: logger, + } +} + +func (n *nodeWatcher) run(ctx context.Context) { + // Trigger an initial drain pass if there are already nodes draining + //FIXME this is unneccessary if a node has reached a deadline + n.logger.Printf("[TRACE] nomad.drain: initial draining nodes: %d", len(n.nodes)) + if len(n.nodes) > 0 { + n.nodesCh <- n.nodes + } + + for { + //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? + resp, index, err := n.state.BlockingQuery(n.queryNodeDrain, n.index, ctx) + if err != nil { + if err == context.Canceled { + n.logger.Printf("[TRACE] nomad.drain: draining node watcher shutting down") + return + } + n.logger.Printf("[ERR] nomad.drain: error blocking on node updates at index %d: %v", n.index, err) + return + } + + // update index for next run + n.index = index + + changed := false + newNodes := resp.([]*structs.Node) + n.logger.Printf("[TRACE] nomad.drain: %d nodes to consider", len(newNodes)) //FIXME remove + for _, newNode := range newNodes { + if existingNode, ok := n.nodes[newNode.ID]; ok { + // Node was draining, see if it has changed + if !newNode.Drain { + // Node stopped draining + delete(n.nodes, newNode.ID) + changed = true + } else if !newNode.DrainStrategy.DeadlineTime().Equal(existingNode.DrainStrategy.DeadlineTime()) { + // Update deadline + n.nodes[newNode.ID] = newNode + changed = true + } + } else { + // Node was not draining + if newNode.Drain { + // Node started draining + n.nodes[newNode.ID] = newNode + changed = true + } + } + } + + // Send a copy of the draining nodes if there were changes + if !changed { + continue + } + + nodesCopy := make(map[string]*structs.Node, len(n.nodes)) + for k, v := range n.nodes { + nodesCopy[k] = v + } + + select { + case n.nodesCh <- nodesCopy: + case <-ctx.Done(): + return + } + } +} + +func (n *nodeWatcher) queryNodeDrain(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + iter, err := state.Nodes(ws) + if err != nil { + return nil, 0, err + } + + index, err := state.Index("nodes") + if err != nil { + return nil, 0, err + } + + resp := make([]*structs.Node, 0, 8) + + for { + raw := iter.Next() + if raw == nil { + break + } + + node := raw.(*structs.Node) + resp = append(resp, node) + } + + return resp, index, nil +} From 3b25f784bec188264c51381a04083b0361742c7e Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 27 Feb 2018 15:18:32 -0800 Subject: [PATCH 28/79] drainer: convert fsm errors to go errors --- nomad/drainer_shims.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/nomad/drainer_shims.go b/nomad/drainer_shims.go index 0ced081f5fd9..09a1a8f6635c 100644 --- a/nomad/drainer_shims.go +++ b/nomad/drainer_shims.go @@ -15,8 +15,8 @@ func (d drainerShim) NodeDrainComplete(nodeID string) error { WriteRequest: structs.WriteRequest{Region: d.s.config.Region}, } - _, _, err := d.s.raftApply(structs.NodeUpdateDrainRequestType, args) - return err + resp, _, err := d.s.raftApply(structs.NodeUpdateDrainRequestType, args) + return d.convertApplyErrors(resp, err) } func (d drainerShim) AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) error { @@ -25,6 +25,21 @@ func (d drainerShim) AllocUpdateDesiredTransition(allocs map[string]*structs.Des Evals: evals, WriteRequest: structs.WriteRequest{Region: d.s.config.Region}, } - _, _, err := d.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, args) + resp, _, err := d.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, args) + return d.convertApplyErrors(resp, err) +} + +// convertApplyErrors parses the results of a raftApply and returns the index at +// which it was applied and any error that occurred. Raft Apply returns two +// separate errors, Raft library errors and user returned errors from the FSM. +// This helper, joins the errors by inspecting the applyResponse for an error. +// +// Similar to deployment watcher's convertApplyErrors +func (d drainerShim) convertApplyErrors(applyResp interface{}, err error) error { + if applyResp != nil { + if fsmErr, ok := applyResp.(error); ok && fsmErr != nil { + return fsmErr + } + } return err } From 3fe3c6eff70e3cc7e1df121803bae39a11b66030 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 28 Feb 2018 16:25:56 -0800 Subject: [PATCH 29/79] Improve DeadlineTime helper --- api/nodes.go | 3 ++ client/testing.go | 4 ++ nomad/drainer/drain.go | 84 +++++++++++++++++++----------------- nomad/drainer/drain_test.go | 8 ++++ nomad/drainer/nodewatcher.go | 6 +-- nomad/node_endpoint.go | 5 +++ nomad/structs/structs.go | 51 +++++++++++++++++----- 7 files changed, 108 insertions(+), 53 deletions(-) diff --git a/api/nodes.go b/api/nodes.go index 37adb8fc34e5..a505d9ae369f 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -176,6 +176,9 @@ type Node struct { type DrainStrategy struct { // DrainSpec is the user declared drain specification DrainSpec + + // DeadlineTime is the deadline time for the drain. + DeadlineTime time.Time } // DrainSpec describes a Node's drain behavior. diff --git a/client/testing.go b/client/testing.go index a86728365abe..4043da298738 100644 --- a/client/testing.go +++ b/client/testing.go @@ -21,6 +21,10 @@ func TestClient(t testing.T, cb func(c *config.Config)) *Client { }, } + // Loosen GC threshold + conf.GCDiskUsageThreshold = 98.0 + conf.GCInodeUsageThreshold = 98.0 + // Tighten the fingerprinter timeouts if conf.Options == nil { conf.Options = make(map[string]string) diff --git a/nomad/drainer/drain.go b/nomad/drainer/drain.go index 5f35bca6c0b4..450c4261f3f6 100644 --- a/nomad/drainer/drain.go +++ b/nomad/drainer/drain.go @@ -157,24 +157,41 @@ func (n *NodeDrainer) Run() { } } +// getNextDeadline is a helper that takes a set of draining nodes and returns the +// next deadline. It also returns a boolean if there is a deadline. +func getNextDeadline(nodes map[string]*structs.Node) (time.Time, bool) { + var nextDeadline time.Time + found := false + for _, node := range nodes { + inf, d := node.DrainStrategy.DeadlineTime() + if !inf && (nextDeadline.IsZero() || d.Before(nextDeadline)) { + nextDeadline = d + found = true + } + } + + return nextDeadline, found +} + // nodeDrainer is the core node draining main loop and should be started in a // goroutine when a server establishes leadership. func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) { nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(n.logger, state) // Wait for a node's drain deadline to expire - var nextDeadline time.Time - for _, node := range nodes { - if nextDeadline.IsZero() { - nextDeadline = node.DrainStrategy.DeadlineTime() - continue - } - if deadline := node.DrainStrategy.DeadlineTime(); deadline.Before(nextDeadline) { - nextDeadline = deadline + nextDeadline, ok := getNextDeadline(nodes) + deadlineTimer := time.NewTimer(time.Until(nextDeadline)) + stopDeadlineTimer := func() { + if !deadlineTimer.Stop() { + select { + case <-deadlineTimer.C: + default: + } } - } - deadlineTimer := time.NewTimer(time.Until(nextDeadline)) + if !ok { + stopDeadlineTimer() + } // Watch for nodes to start or stop draining nodeWatcher := newNodeWatcher(n.logger, nodes, nodesIndex, state) @@ -197,33 +214,14 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) // update draining nodes n.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes)) - // update deadline timer - changed := false - for _, n := range nodes { - if nextDeadline.IsZero() { - nextDeadline = n.DrainStrategy.DeadlineTime() - changed = true - continue - } - - if deadline := n.DrainStrategy.DeadlineTime(); deadline.Before(nextDeadline) { - nextDeadline = deadline - changed = true - } - } - - // if changed reset the timer - if changed { + d, ok := getNextDeadline(nodes) + if ok && !nextDeadline.Equal(d) { + nextDeadline = d n.logger.Printf("[TRACE] nomad.drain: new node deadline: %s", nextDeadline) - if !deadlineTimer.Stop() { - // timer may have been recv'd in a - // previous loop, so don't block - select { - case <-deadlineTimer.C: - default: - } - } + stopDeadlineTimer() deadlineTimer.Reset(time.Until(nextDeadline)) + } else if !ok { + stopDeadlineTimer() } case jobs := <-jobWatcher.WaitCh(): @@ -275,7 +273,8 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) // track number of allocs left on this node to be drained allocsLeft := false - deadlineReached := node.DrainStrategy.DeadlineTime().Before(now) + inf, deadline := node.DrainStrategy.DeadlineTime() + deadlineReached := !inf && deadline.Before(now) for _, alloc := range allocs { jobkey := jobKey{alloc.Namespace, alloc.JobID} @@ -307,8 +306,13 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) // Don't bother collecting system/batch jobs for nodes that haven't hit their deadline if job.Type != structs.JobTypeService && !deadlineReached { - n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s", - job.Type, job.Name, node.DrainStrategy.DeadlineTime().Sub(now)) + if inf, d := node.DrainStrategy.DeadlineTime(); inf { + n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because node has an infinite deadline", + job.Type, job.Name) + } else { + n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s", + job.Type, job.Name, d.Sub(now)) + } skipJob[jobkey] = struct{}{} continue } @@ -370,7 +374,7 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) tgKey := makeTaskGroupKey(alloc) - if node.DrainStrategy.DeadlineTime().Before(now) { + if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) { n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) // Alloc's Node has reached its deadline stoplist.add(drainingJob.job, alloc) @@ -494,7 +498,7 @@ func initDrainer(logger *log.Logger, state *state.StateStore) (map[string]*struc nodes[node.ID] = node // No point in tracking draining allocs as the deadline has been reached - if node.DrainStrategy.DeadlineTime().Before(now) { + if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) { continue } diff --git a/nomad/drainer/drain_test.go b/nomad/drainer/drain_test.go index 1f38a4c293fc..dd25becccf48 100644 --- a/nomad/drainer/drain_test.go +++ b/nomad/drainer/drain_test.go @@ -59,6 +59,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { serviceJob := mock.Job() serviceJob.Name = "service-job" serviceJob.Type = structs.JobTypeService + serviceJob.Constraints = nil serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{ MaxParallel: 1, HealthCheck: structs.MigrateStrategyHealthStates, @@ -76,6 +77,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { systemJob := mock.SystemJob() systemJob.Name = "system-job" systemJob.Type = structs.JobTypeSystem + systemJob.Constraints = nil //FIXME hack until system job reschedule policy validation is fixed systemJob.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: time.Minute} systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" @@ -90,6 +92,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { batchJob := mock.Job() batchJob.Name = "batch-job" batchJob.Type = structs.JobTypeBatch + batchJob.Constraints = nil batchJob.TaskGroups[0].Name = "batch-group" batchJob.TaskGroups[0].Migrate = nil batchJob.TaskGroups[0].Tasks[0].Name = "batch-task" @@ -159,6 +162,11 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus) } } + if resp, err := rpc.EvalList(); err == nil { + for _, eval := range resp.Evaluations { + t.Logf("% #v\n", pretty.Formatter(eval)) + } + } t.Fatalf("failed waiting for all allocs to start: %v", err) }) diff --git a/nomad/drainer/nodewatcher.go b/nomad/drainer/nodewatcher.go index eb54e4995842..5f419ea2ca91 100644 --- a/nomad/drainer/nodewatcher.go +++ b/nomad/drainer/nodewatcher.go @@ -57,18 +57,18 @@ func (n *nodeWatcher) run(ctx context.Context) { for _, newNode := range newNodes { if existingNode, ok := n.nodes[newNode.ID]; ok { // Node was draining, see if it has changed - if !newNode.Drain { + if newNode.DrainStrategy == nil { // Node stopped draining delete(n.nodes, newNode.ID) changed = true - } else if !newNode.DrainStrategy.DeadlineTime().Equal(existingNode.DrainStrategy.DeadlineTime()) { + } else if !newNode.DrainStrategy.Equal(existingNode.DrainStrategy) { // Update deadline n.nodes[newNode.ID] = newNode changed = true } } else { // Node was not draining - if newNode.Drain { + if newNode.DrainStrategy != nil { // Node started draining n.nodes[newNode.ID] = newNode changed = true diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 5cf5aa587d7e..6cfe62ae7e5c 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -443,6 +443,11 @@ func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, } } + // Mark the deadline time + if args.DrainStrategy != nil && args.DrainStrategy.Deadline.Nanoseconds() > 0 { + args.DrainStrategy.ForceDeadline = time.Now().Add(args.DrainStrategy.Deadline) + } + // Commit this update via Raft _, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) if err != nil { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index e1d9b077752d..018b96c422ec 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1214,9 +1214,9 @@ type DrainStrategy struct { // DrainSpec is the user declared drain specification DrainSpec - // StartTime as nanoseconds since Unix epoch indicating when a drain - // began for deadline calcuations. - StartTime int64 + // ForceDeadline is the deadline time for the drain after which drains will + // be forced + ForceDeadline time.Time } func (d *DrainStrategy) Copy() *DrainStrategy { @@ -1229,16 +1229,47 @@ func (d *DrainStrategy) Copy() *DrainStrategy { return nd } -// DeadlineTime returns the Time this drain's deadline will be reached or the -// zero value for Time if DrainStrategy is nil or Duration is <= 0. -func (d *DrainStrategy) DeadlineTime() time.Time { +// DeadlineTime returns a boolean whether the drain strategy allows an infinite +// duration or otherwise the deadline time. The force drain is captured by the +// deadline time being in the past. +func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { + // Treat the nil case as a force drain so during an upgrade where a node may + // not have a drain strategy but has Drain set to true, it is treated as a + // force to mimick old behavior. if d == nil { - return time.Time{} + return false, time.Time{} } - if d.Deadline <= 0 { - return time.Time{} + + ns := d.Deadline.Nanoseconds() + switch { + case ns < 0: // Force + return false, time.Time{} + case ns == 0: // Infinite + return true, time.Time{} + default: + return false, d.ForceDeadline + } +} + +func (d *DrainStrategy) Equal(o *DrainStrategy) bool { + if d == nil && o == nil { + return true + } else if o != nil && d == nil { + return false + } else if d != nil && o == nil { + return false } - return time.Unix(0, d.StartTime).Add(d.Deadline) + + // Compare values + if d.ForceDeadline != o.ForceDeadline { + return false + } else if d.Deadline != o.Deadline { + return false + } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { + return false + } + + return true } // Node is a representation of a schedulable client node From 3ca9cdfadc5988d134fc16a52c933c6c7e3ef67a Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 27 Feb 2018 15:51:09 -0800 Subject: [PATCH 30/79] client: don't monitor health of non-service jobs Also fix system job draining; won't work without deadline fixes --- client/alloc_runner_health_watcher.go | 15 +- nomad/drainer/drain.go | 121 +++++++---- nomad/drainer/drain_test.go | 282 +++++++++++++++++++++++--- testutil/rpcapi/rcpapi.go | 18 ++ 4 files changed, 357 insertions(+), 79 deletions(-) diff --git a/client/alloc_runner_health_watcher.go b/client/alloc_runner_health_watcher.go index db9164740319..bdb7eaa82261 100644 --- a/client/alloc_runner_health_watcher.go +++ b/client/alloc_runner_health_watcher.go @@ -31,25 +31,24 @@ func (r *AllocRunner) watchHealth(ctx context.Context) { // See if we should watch the allocs health alloc := r.Alloc() - if alloc.Job.Type == structs.JobTypeSystem || alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { - // Neither deployments nor migrations apply to system jobs and - // we don't need to track allocations which already have a - // status + if alloc.Job.Type != structs.JobTypeService { + // No need to watch non-service jos return } - isDeploy := alloc.DeploymentID != "" - - if isDeploy && alloc.Job.Type != structs.JobTypeService { - // Deployments don't track non-Service jobs + if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { + // No need to watch health as it's already set return } + isDeploy := alloc.DeploymentID != "" + tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) if tg == nil { r.logger.Printf("[ERR] client.alloc_watcher: failed to lookup allocation's task group. Exiting watcher") return } + if isDeploy && (tg.Update == nil || tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Manual) { return } diff --git a/nomad/drainer/drain.go b/nomad/drainer/drain.go index 450c4261f3f6..e52a735aaf13 100644 --- a/nomad/drainer/drain.go +++ b/nomad/drainer/drain.go @@ -18,13 +18,13 @@ type jobKey struct { jobid string } -// drainingJob contains the Job and allocations for that job meant to be used +// runningJob contains the Job and allocations for that job meant to be used // when collecting all allocations for a job with at least one allocation on a // draining node. // -// This allows the MaxParallel calculation to take the entire job's allocation -// state into account. FIXME is that even useful? -type drainingJob struct { +// In order to drain an allocation we must also emit an evaluation for its job, +// so this struct bundles allocations with their job. +type runningJob struct { job *structs.Job allocs []*structs.Allocation } @@ -247,17 +247,16 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) // job key -> {job, allocs} // Collect all allocs for all jobs with at least one - // alloc on a draining node. + // non-terminal alloc on a draining node. // Invariants: - // - No system jobs - // - No batch jobs unless their node's deadline is reached + // - Only service jobs // - No entries with 0 allocs //TODO could this be a helper method on prevAllocWatcher - drainable := map[jobKey]*drainingJob{} + drainableSvcs := map[jobKey]*runningJob{} - // track jobs we've looked up before and know we shouldn't - // consider for draining eg system jobs - skipJob := map[jobKey]struct{}{} + // drainNow are allocs for batch or system jobs that should be + // drained due to a node deadline being reached + drainNow := map[jobKey]*runningJob{} // track number of "up" allocs per task group (not terminal and // have a deployment status) @@ -271,22 +270,21 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) panic(err) } + // drainableSys are allocs for system jobs that should be + // drained if there are no other allocs left + drainableSys := map[jobKey]*runningJob{} + // track number of allocs left on this node to be drained allocsLeft := false inf, deadline := node.DrainStrategy.DeadlineTime() deadlineReached := !inf && deadline.Before(now) for _, alloc := range allocs { - jobkey := jobKey{alloc.Namespace, alloc.JobID} - - if _, ok := drainable[jobkey]; ok { - // already found + // Don't need to consider drained allocs + if alloc.TerminalStatus() { continue } - if _, ok := skipJob[jobkey]; ok { - // already looked up and skipped - continue - } + jobkey := jobKey{alloc.Namespace, alloc.JobID} // job does not found yet job, err := snapshot.JobByID(nil, alloc.Namespace, alloc.JobID) @@ -295,28 +293,49 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) panic(err) } - // If alloc isn't yet terminal this node has - // allocs left to be drained - if !alloc.TerminalStatus() { - if !allocsLeft { - n.logger.Printf("[TRACE] nomad.drain: node %s has allocs left to drain", nodeID[:6]) - allocsLeft = true + // IgnoreSystemJobs if specified in the node's DrainStrategy + if node.DrainStrategy.IgnoreSystemJobs && job.Type == structs.JobTypeSystem { + continue + } + + // When the node deadline is reached all batch + // and service jobs will be drained + if deadlineReached && job.Type != structs.JobTypeService { + n.logger.Printf("[TRACE] nomad.drain: draining alloc %s due to node %s reaching drain deadline", alloc.ID, node.ID) + if j, ok := drainNow[jobkey]; ok { + j.allocs = append(j.allocs, alloc) + } else { + // First alloc for this job, create entry + drainNow[jobkey] = &runningJob{ + job: job, + allocs: []*structs.Allocation{alloc}, + } } + continue } - // Don't bother collecting system/batch jobs for nodes that haven't hit their deadline - if job.Type != structs.JobTypeService && !deadlineReached { - if inf, d := node.DrainStrategy.DeadlineTime(); inf { - n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because node has an infinite deadline", - job.Type, job.Name) + // If deadline hasn't been reached, system jobs + // may still be drained if there are no other + // allocs left + if !deadlineReached && job.Type == structs.JobTypeSystem { + n.logger.Printf("[TRACE] nomad.drain: system alloc %s will be drained if no other allocs on node %s", alloc.ID, node.ID) + if j, ok := drainableSys[jobkey]; ok { + j.allocs = append(j.allocs, alloc) } else { - n.logger.Printf("[TRACE] nomad.drain: not draining %s job %s because deadline isn't for %s", - job.Type, job.Name, d.Sub(now)) + // First alloc for this job, create entry + drainableSys[jobkey] = &runningJob{ + job: job, + allocs: []*structs.Allocation{alloc}, + } } - skipJob[jobkey] = struct{}{} continue } + // This alloc is still running on a draining + // node, so treat the node as having allocs + // remaining + allocsLeft = true + jobAllocs, err := snapshot.AllocsByJob(nil, alloc.Namespace, alloc.JobID, true) if err != nil { //FIXME @@ -328,14 +347,15 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) num := 0 for _, a := range jobAllocs { if !a.TerminalStatus() && a.DeploymentStatus != nil { + // Not terminal and health updated, count it as up! upPerTG[makeTaskGroupKey(a)]++ num++ } } - n.logger.Printf("[TRACE] nomad.drain: job %s has %d task groups running", job.Name, num) + n.logger.Printf("[TRACE] nomad.drain: job %s has %d allocs running", job.Name, num) } - drainable[jobkey] = &drainingJob{ + drainableSvcs[jobkey] = &runningJob{ job: job, allocs: jobAllocs, } @@ -348,6 +368,17 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) n.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain or has reached deadline", nodeID) jobWatcher.nodeDone(nodeID) doneNodes[nodeID] = node + + // Add all system jobs on this node to the drainNow slice + for k, sysj := range drainableSys { + if j, ok := drainNow[k]; ok { + // Job already has at least one alloc draining, append this one + j.allocs = append(j.allocs, sysj.allocs...) + } else { + // First draining alloc for this job, add the entry + drainNow[k] = sysj + } + } } } @@ -358,8 +389,15 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) jobBatch: make(map[jobKey]*structs.Job), } + // Immediately drain all allocs in drainNow + for _, drainingJob := range drainNow { + for _, a := range drainingJob.allocs { + stoplist.add(drainingJob.job, a) + } + } + // build drain list considering deadline & max_parallel - for _, drainingJob := range drainable { + for _, drainingJob := range drainableSvcs { for _, alloc := range drainingJob.allocs { // Already draining/dead allocs don't need to be drained if alloc.TerminalStatus() { @@ -383,13 +421,6 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) continue } - // Batch/System jobs are only stopped when the - // node deadline is reached which has already - // been done. - if drainingJob.job.Type != structs.JobTypeService { - continue - } - // Stop allocs with count=1, max_parallel==0, or draining 0 { n.logger.Printf("[DEBUG] nomad.drain: stopping %d alloc(s) for %d job(s)", len(stoplist.allocBatch), len(stoplist.jobBatch)) + for id, _ := range stoplist.allocBatch { + n.logger.Printf("[TRACE] nomad.drain: migrating alloc %s", id[:6]) + } + // Reevaluate affected jobs evals := make([]*structs.Evaluation, 0, len(stoplist.jobBatch)) for _, job := range stoplist.jobBatch { diff --git a/nomad/drainer/drain_test.go b/nomad/drainer/drain_test.go index dd25becccf48..8361a56593ca 100644 --- a/nomad/drainer/drain_test.go +++ b/nomad/drainer/drain_test.go @@ -9,7 +9,6 @@ import ( "testing" "time" - msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/client" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/helper/pool" @@ -40,6 +39,7 @@ func rpcClient(t *testing.T, conf *nomad.Config) rpc.ClientCodec { // TestNodeDrainer_SimpleDrain asserts that draining when there are two nodes // moves allocs from the draining node to the other node. func TestNodeDrainer_SimpleDrain(t *testing.T) { + assert := assert.New(t) require := require.New(t) // Capture test servers config @@ -78,8 +78,6 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { systemJob.Name = "system-job" systemJob.Type = structs.JobTypeSystem systemJob.Constraints = nil - //FIXME hack until system job reschedule policy validation is fixed - systemJob.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{Attempts: 1, Interval: time.Minute} systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" systemJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ "run_for": "10m", @@ -111,28 +109,20 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { conf.Servers = []string{serverConfig.RPCAddr.String()} }) defer c1.Shutdown() + node1ID := c1.NodeID() // Start jobs so they all get placed on node 1 codec := rpcClient(t, serverConfig) + rpc := rpcapi.NewRPC(codec) for _, job := range []*structs.Job{systemJob, serviceJob, batchJob} { - req := &structs.JobRegisterRequest{ - Job: job.Copy(), - WriteRequest: structs.WriteRequest{ - Region: "global", - Namespace: job.Namespace, - }, - } - - // Fetch the response - var resp structs.JobRegisterResponse - require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp)) + resp, err := rpc.JobRegister(job) + require.Nil(err) require.NotZero(resp.Index) } // Wait for jobs to start on c1 - rpc := rpcapi.NewRPC(codec) testutil.WaitForResult(func() (bool, error) { - resp, err := rpc.NodeGetAllocs(c1.NodeID()) + resp, err := rpc.NodeGetAllocs(node1ID) if err != nil { return false, err } @@ -157,7 +147,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { } return true, nil }, func(err error) { - if resp, err := rpc.NodeGetAllocs(c1.NodeID()); err == nil { + if resp, err := rpc.NodeGetAllocs(node1ID); err == nil { for i, alloc := range resp.Allocs { t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus) } @@ -170,27 +160,28 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Fatalf("failed waiting for all allocs to start: %v", err) }) - // Start draining node 1 - //FIXME update drain rpc to skip fsm manipulation and use api + // Start draining node 1 with no deadline strategy := &structs.DrainStrategy{ DrainSpec: structs.DrainSpec{ Deadline: -1 * time.Second, }, } - node, err := state.NodeByID(nil, c1.NodeID()) + node1Resp, err := rpc.NodeGet(node1ID) require.Nil(err) - require.Nil(state.UpdateNodeDrain(node.ModifyIndex+1, node.ID, strategy)) + node1 := node1Resp.Node + require.Nil(state.UpdateNodeDrain(node1.ModifyIndex+1, node1ID, strategy)) // Start node 2 c2 := client.TestClient(t, func(conf *config.Config) { - conf.NetworkSpeed = 10000 + conf.LogOutput = testlog.NewWriter(t) conf.Servers = []string{serverConfig.RPCAddr.String()} }) defer c2.Shutdown() + node2ID := c2.NodeID() // Wait for services to be migrated testutil.WaitForResult(func() (bool, error) { - resp, err := rpc.NodeGetAllocs(c2.NodeID()) + resp, err := rpc.NodeGetAllocs(node2ID) if err != nil { return false, err } @@ -215,7 +206,7 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { } return true, nil }, func(err error) { - if resp, err := rpc.NodeGetAllocs(c2.NodeID()); err == nil { + if resp, err := rpc.NodeGetAllocs(node2ID); err == nil { for i, alloc := range resp.Allocs { t.Logf("%d alloc %s job %s status %s prev %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus, alloc.PreviousAllocation) } @@ -223,12 +214,247 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Errorf("failed waiting for all allocs to migrate: %v", err) }) - node1, err := rpc.NodeGet(c1.NodeID()) + // Wait for drained services to be dead + testutil.WaitForResult(func() (bool, error) { + resp, err := rpc.NodeGetAllocs(c1.NodeID()) + if err != nil { + return false, err + } + + running := make([]string, 0, len(resp.Allocs)) + for _, alloc := range resp.Allocs { + if alloc.ClientStatus == structs.AllocClientStatusRunning { + running = append(running, alloc.ID[:6]) + } + } + + if len(running) > 0 { + return false, fmt.Errorf("%d alloc(s) on draining node %s still running: %s", len(running), c1.NodeID()[:6], running) + } + return true, nil + }, func(err error) { + t.Errorf("failed waiting for all draining allocs to stop: %v", err) + }) + + node1Resp, err = rpc.NodeGet(node1ID) + require.Nil(err) + node1 = node1Resp.Node + assert.False(node1.Drain) + assert.Nil(node1.DrainStrategy) + assert.Equal(structs.NodeSchedulingIneligible, node1.SchedulingEligibility) + + jobs, err := rpc.JobList() + require.Nil(err) + t.Logf("--> %d jobs", len(jobs.Jobs)) + for _, job := range jobs.Jobs { + t.Logf("job: %s status: %s %s", job.Name, job.Status, job.StatusDescription) + } + + allocs, err := rpc.AllocAll() + require.Nil(err) + + sort.Slice(allocs, func(i, j int) bool { + r := strings.Compare(allocs[i].Job.Name, allocs[j].Job.Name) + switch { + case r < 0: + return true + case r == 0: + return allocs[i].ModifyIndex < allocs[j].ModifyIndex + case r > 0: + return false + } + panic("unreachable") + }) + + t.Logf("--> %d allocs", len(allocs)) + for _, alloc := range allocs { + t.Logf("job: %s node: %s alloc: %s desired_status: %s desired_transition: %s actual: %s replaces: %s", + alloc.Job.Name, alloc.NodeID[:6], alloc.ID[:6], alloc.DesiredStatus, pretty.Sprint(alloc.DesiredTransition.Migrate), alloc.ClientStatus, alloc.PreviousAllocation) + } +} + +// TestNodeDrainer_SystemDrain asserts system jobs are drained +func TestNodeDrainer_SystemDrain(t *testing.T) { assert := assert.New(t) + require := require.New(t) + + // Capture test servers config + var serverConfig *nomad.Config + server := nomad.TestServer(t, func(c *nomad.Config) { + serverConfig = c + }) + defer server.Shutdown() + + testutil.WaitForLeader(t, server.RPC) + + // Setup 2 Nodes: A & B; A has allocs and is draining + + // Create mock jobs + state := server.State() + + serviceJob := mock.Job() + serviceJob.Name = "service-job" + serviceJob.Type = structs.JobTypeService + serviceJob.TaskGroups[0].Count = 2 + serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{ + MaxParallel: 1, + HealthCheck: structs.MigrateStrategyHealthStates, + MinHealthyTime: time.Millisecond, + HealthyDeadline: 2 * time.Second, + } + serviceJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" + serviceJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() + serviceJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ + "run_for": "10m", + "kill_after": "1ms", + } + serviceJob.TaskGroups[0].Tasks[0].Services = nil + + systemJob := mock.SystemJob() + systemJob.Name = "system-job" + systemJob.Type = structs.JobTypeSystem + systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" + systemJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ + "run_for": "10m", + "kill_after": "1ms", + } + systemJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() + systemJob.TaskGroups[0].Tasks[0].Services = nil + + // Start node 1 + c1 := client.TestClient(t, func(conf *config.Config) { + conf.LogOutput = testlog.NewWriter(t) + conf.Servers = []string{serverConfig.RPCAddr.String()} + }) + defer c1.Shutdown() + node1ID := c1.NodeID() + + // Start jobs so they all get placed on node 1 + codec := rpcClient(t, serverConfig) + rpc := rpcapi.NewRPC(codec) + for _, job := range []*structs.Job{systemJob, serviceJob} { + resp, err := rpc.JobRegister(job) + require.Nil(err) + require.NotZero(resp.Index) + } + + // Wait for jobs to start on c1 + testutil.WaitForResult(func() (bool, error) { + resp, err := rpc.NodeGetAllocs(c1.NodeID()) + if err != nil { + return false, err + } + + system, service := 0, 0 + for _, alloc := range resp.Allocs { + if alloc.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("alloc %s for job %s not running: %s", alloc.ID, alloc.Job.Name, alloc.ClientStatus) + } + switch alloc.JobID { + case serviceJob.ID: + service++ + case systemJob.ID: + system++ + default: + return false, fmt.Errorf("unknown job: %s", alloc.Job.Name) + } + } + // 1 system + 2 service = 3 + if system+service != 3 { + return false, fmt.Errorf("wrong number of allocs: system %d/1, service %d/2", system, service) + } + return true, nil + }, func(err error) { + if resp, err := rpc.NodeGetAllocs(c1.NodeID()); err == nil { + for i, alloc := range resp.Allocs { + t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus) + } + } + t.Fatalf("failed waiting for all allocs to start: %v", err) + }) + + // Start draining node 1 + strategy := &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 1 * time.Hour, + }, + } + node1Resp, err := rpc.NodeGet(node1ID) + require.Nil(err) + node1 := node1Resp.Node + require.Nil(state.UpdateNodeDrain(node1.ModifyIndex+1, node1ID, strategy)) + + // Start node 2 + c2 := client.TestClient(t, func(conf *config.Config) { + conf.LogOutput = testlog.NewWriter(t) + conf.Servers = []string{serverConfig.RPCAddr.String()} + }) + defer c2.Shutdown() + node2ID := c2.NodeID() + + // Wait for services to be migrated + testutil.WaitForResult(func() (bool, error) { + resp, err := rpc.NodeGetAllocs(node2ID) + if err != nil { + return false, err + } + + system, service := 0, 0 + for _, alloc := range resp.Allocs { + if alloc.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("alloc %s for job %s not running: %s", alloc.ID, alloc.Job.Name, alloc.ClientStatus) + } + switch alloc.JobID { + case serviceJob.ID: + service++ + case systemJob.ID: + system++ + default: + return false, fmt.Errorf("unknown job: %s", alloc.Job.Name) + } + } + // 1 system + 2 service = 3 + if system+service != 3 { + return false, fmt.Errorf("wrong number of allocs: system %d/1, service %d/2", system, service) + } + return true, nil + }, func(err error) { + if resp, err := rpc.NodeGetAllocs(node2ID); err == nil { + for i, alloc := range resp.Allocs { + t.Logf("%d alloc %s job %s status %s prev %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus, alloc.PreviousAllocation) + } + } + t.Errorf("failed waiting for all allocs to migrate: %v", err) + }) + + // Wait for drained services to be dead + testutil.WaitForResult(func() (bool, error) { + resp, err := rpc.NodeGetAllocs(node1ID) + if err != nil { + return false, err + } + + running := make([]string, 0, len(resp.Allocs)) + for _, alloc := range resp.Allocs { + if alloc.ClientStatus == structs.AllocClientStatusRunning { + running = append(running, alloc.ID[:6]) + } + } + + if len(running) > 0 { + return false, fmt.Errorf("%d alloc(s) on draining node %s still running: %s", len(running), node1ID[:6], running) + } + return true, nil + }, func(err error) { + t.Errorf("failed waiting for all draining allocs to stop: %v", err) + }) + + node1Resp, err = rpc.NodeGet(node1ID) require.Nil(err) - assert.False(node1.Node.Drain) - assert.Nil(node1.Node.DrainStrategy) - assert.Equal(structs.NodeSchedulingIneligible, node1.Node.SchedulingEligibility) + node1 = node1Resp.Node + assert.False(node1.Drain) + assert.Nil(node1.DrainStrategy) + assert.Equal(structs.NodeSchedulingIneligible, node1.SchedulingEligibility) jobs, err := rpc.JobList() require.Nil(err) diff --git a/testutil/rpcapi/rcpapi.go b/testutil/rpcapi/rcpapi.go index 795123fdabcc..1eafabccbdb3 100644 --- a/testutil/rpcapi/rcpapi.go +++ b/testutil/rpcapi/rcpapi.go @@ -103,6 +103,24 @@ func (r *RPC) JobList() (*structs.JobListResponse, error) { return &resp, nil } +// Job.Register RPC +func (r *RPC) JobRegister(j *structs.Job) (*structs.JobRegisterResponse, error) { + req := &structs.JobRegisterRequest{ + Job: j.Copy(), + WriteRequest: structs.WriteRequest{ + Region: r.Region, + Namespace: j.Namespace, + }, + } + + // Fetch the response + var resp structs.JobRegisterResponse + if err := msgpackrpc.CallWithCodec(r.codec, "Job.Register", req, &resp); err != nil { + return nil, err + } + return &resp, nil +} + // Node.List RPC func (r *RPC) NodeList() (*structs.NodeListResponse, error) { get := &structs.NodeListRequest{ From 1f73cd5d4264e86427ff58bea5c7140419f0a633 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 28 Feb 2018 16:42:29 -0800 Subject: [PATCH 31/79] drainer: refactor newStopAllocs, applyMigrations --- nomad/drainer/drain.go | 93 +++++++++++++++++++++---------------- nomad/drainer/drain_test.go | 4 ++ 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/nomad/drainer/drain.go b/nomad/drainer/drain.go index e52a735aaf13..4f2b73556cff 100644 --- a/nomad/drainer/drain.go +++ b/nomad/drainer/drain.go @@ -51,7 +51,8 @@ func makeTaskGroupKey(a *structs.Allocation) string { return strings.Join([]string{a.Namespace, a.JobID, a.TaskGroup}, "-") } -// stopAllocs tracks allocs to drain by a unique TG key +// stopAllocs tracks allocs to drain by a unique TG key along with their jobs +// as we need to emit evaluations for each allocations job type stopAllocs struct { allocBatch map[string]*structs.DesiredTransition @@ -59,6 +60,25 @@ type stopAllocs struct { jobBatch map[jobKey]*structs.Job } +// newStopAllocs creates a list of allocs to migrate from an initial list of +// running jobs+allocs that need immediate draining. +func newStopAllocs(initial map[jobKey]*runningJob) *stopAllocs { + s := &stopAllocs{ + allocBatch: make(map[string]*structs.DesiredTransition), + jobBatch: make(map[jobKey]*structs.Job), + } + + // Add initial allocs + for _, drainingJob := range initial { + for _, a := range drainingJob.allocs { + s.add(drainingJob.job, a) + } + } + return s +} + +// add an allocation to be migrated. Its job must also be specified in order to +// emit an evaluation. func (s *stopAllocs) add(j *structs.Job, a *structs.Allocation) { // Add the desired migration transition to the batch s.allocBatch[a.ID] = &structs.DesiredTransition{ @@ -203,11 +223,6 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) go jobWatcher.run(ctx) for { - //TODO this method of async node updates means we could make - //migration decisions on out of date information. the worst - //possible outcome of this is that an allocation could be - //stopped on a node that recently had its drain cancelled which - //doesn't seem like that bad of a pathological case n.logger.Printf("[TRACE] nomad.drain: LOOP next deadline: %s (%s)", nextDeadline, time.Until(nextDeadline)) select { case nodes = <-nodeWatcher.nodesCh: @@ -383,18 +398,9 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) } // stoplist are the allocations to migrate and their jobs to emit - // evaluations for - stoplist := &stopAllocs{ - allocBatch: make(map[string]*structs.DesiredTransition), - jobBatch: make(map[jobKey]*structs.Job), - } - - // Immediately drain all allocs in drainNow - for _, drainingJob := range drainNow { - for _, a := range drainingJob.allocs { - stoplist.add(drainingJob.job, a) - } - } + // evaluations for. Initialized with allocations that should be + // immediately drained regardless of MaxParallel + stoplist := newStopAllocs(drainNow) // build drain list considering deadline & max_parallel for _, drainingJob := range drainableSvcs { @@ -456,29 +462,7 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) } if len(stoplist.allocBatch) > 0 { - n.logger.Printf("[DEBUG] nomad.drain: stopping %d alloc(s) for %d job(s)", len(stoplist.allocBatch), len(stoplist.jobBatch)) - - for id, _ := range stoplist.allocBatch { - n.logger.Printf("[TRACE] nomad.drain: migrating alloc %s", id[:6]) - } - - // Reevaluate affected jobs - evals := make([]*structs.Evaluation, 0, len(stoplist.jobBatch)) - for _, job := range stoplist.jobBatch { - evals = append(evals, &structs.Evaluation{ - ID: uuid.Generate(), - Namespace: job.Namespace, - Priority: job.Priority, - Type: job.Type, - TriggeredBy: structs.EvalTriggerNodeDrain, - JobID: job.ID, - JobModifyIndex: job.ModifyIndex, - Status: structs.EvalStatusPending, - }) - } - - // Commit this update via Raft - if err := n.raft.AllocUpdateDesiredTransition(stoplist.allocBatch, evals); err != nil { + if err := n.applyMigrations(stoplist); err != nil { //FIXME panic(err) } @@ -497,6 +481,33 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) } } +// applyMigrations applies the specified allocation migrations along with their +// evaluations to raft. +func (n *NodeDrainer) applyMigrations(stoplist *stopAllocs) error { + n.logger.Printf("[DEBUG] nomad.drain: stopping %d alloc(s) for %d job(s)", len(stoplist.allocBatch), len(stoplist.jobBatch)) + + for id, _ := range stoplist.allocBatch { + n.logger.Printf("[TRACE] nomad.drain: migrating alloc %s", id[:6]) + } + // Reevaluate affected jobs + evals := make([]*structs.Evaluation, 0, len(stoplist.jobBatch)) + for _, job := range stoplist.jobBatch { + evals = append(evals, &structs.Evaluation{ + ID: uuid.Generate(), + Namespace: job.Namespace, + Priority: job.Priority, + Type: job.Type, + TriggeredBy: structs.EvalTriggerNodeDrain, + JobID: job.ID, + JobModifyIndex: job.ModifyIndex, + Status: structs.EvalStatusPending, + }) + } + + // Commit this update via Raft + return n.raft.AllocUpdateDesiredTransition(stoplist.allocBatch, evals) +} + // initDrainer initializes the node drainer state and returns a list of // draining nodes as well as allocs that are draining that should be watched // for a replacement. diff --git a/nomad/drainer/drain_test.go b/nomad/drainer/drain_test.go index 8361a56593ca..f92f2503e14f 100644 --- a/nomad/drainer/drain_test.go +++ b/nomad/drainer/drain_test.go @@ -271,6 +271,8 @@ func TestNodeDrainer_SimpleDrain(t *testing.T) { t.Logf("job: %s node: %s alloc: %s desired_status: %s desired_transition: %s actual: %s replaces: %s", alloc.Job.Name, alloc.NodeID[:6], alloc.ID[:6], alloc.DesiredStatus, pretty.Sprint(alloc.DesiredTransition.Migrate), alloc.ClientStatus, alloc.PreviousAllocation) } + + t.Logf("==> PASS") } // TestNodeDrainer_SystemDrain asserts system jobs are drained @@ -484,4 +486,6 @@ func TestNodeDrainer_SystemDrain(t *testing.T) { t.Logf("job: %s node: %s alloc: %s desired_status: %s desired_transition: %s actual: %s replaces: %s", alloc.Job.Name, alloc.NodeID[:6], alloc.ID[:6], alloc.DesiredStatus, pretty.Sprint(alloc.DesiredTransition.Migrate), alloc.ClientStatus, alloc.PreviousAllocation) } + + t.Logf("==> PASS") } From 478209807e10fcb2c6592e62d36bce2334aeb74b Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 28 Feb 2018 20:59:41 -0800 Subject: [PATCH 32/79] refactor main drainloop into 2 more methods --- nomad/drainer/drain.go | 405 +++++++++++++++++++++-------------------- 1 file changed, 212 insertions(+), 193 deletions(-) diff --git a/nomad/drainer/drain.go b/nomad/drainer/drain.go index 4f2b73556cff..8db56ac7dacc 100644 --- a/nomad/drainer/drain.go +++ b/nomad/drainer/drain.go @@ -29,21 +29,28 @@ type runningJob struct { allocs []*structs.Allocation } -// drainingAlloc contains a conservative deadline an alloc has to be healthy by -// before it should stopped being watched and replaced. -type drainingAlloc struct { - // LastModified+MigrateStrategy.HealthyDeadline - deadline time.Time - - // Task Group key - tgKey string -} - -func newDrainingAlloc(a *structs.Allocation, deadline time.Time) drainingAlloc { - return drainingAlloc{ - deadline: deadline, - tgKey: makeTaskGroupKey(a), - } +// collectResult is the state collected by scanning for drain eligible allocs +type collectResult struct { + // drainableSvcs contains all service jobs and allocs that are + // potentially drainable meaning they have at least one allocation on a + // draining node. + drainableSvcs map[jobKey]*runningJob + + // drainNow contains all batch and system jobs that should be + // immediately drained due to a deadline or in the case of system jobs: + // all other allocs on the node have completed draining. + drainNow map[jobKey]*runningJob + + // upPerTG is a count of running allocs per task group for the + // migration mark phase to use when considering how many allocs can be + // migrated for a given group. + upPerTG map[string]int + + // doneNodes need no coordinating to finish their drain. Either all + // allocs have drained, the node is being force drained, or the drain + // deadline was hit. Any remaining allocs will be migrated via + // drainNow. + doneNodes map[string]*structs.Node } // makeTaskGroupKey returns a unique key for an allocation's task group @@ -107,10 +114,15 @@ type nodeDrainerState struct { // should be called when a server establishes leadership and SetEnabled(false) // called when leadership is lost. type NodeDrainer struct { + // enabledCh is used by SetEnabled to signal Run when to start/stop the + // nodeDrainer goroutine enabledCh chan nodeDrainerState + // raft is a shim around the raft messages necessary for draining raft RaftApplier + // shutdownCh is closed when the Server is shutting down the + // NodeDrainer should permanently exit shutdownCh <-chan struct{} logger *log.Logger @@ -249,9 +261,6 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) return } - // Tracks nodes that are done draining - doneNodes := map[string]*structs.Node{} - // Capture state (statestore and time) to do consistent comparisons snapshot, err := state.Snapshot() if err != nil { @@ -260,223 +269,233 @@ func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) } now := time.Now() - // job key -> {job, allocs} - // Collect all allocs for all jobs with at least one - // non-terminal alloc on a draining node. - // Invariants: - // - Only service jobs - // - No entries with 0 allocs - //TODO could this be a helper method on prevAllocWatcher - drainableSvcs := map[jobKey]*runningJob{} + // Collect all drainable jobs + result, err := n.collectDrainable(nodes, snapshot, jobWatcher, now) + if err != nil { + //FIXME + panic(err) + } - // drainNow are allocs for batch or system jobs that should be - // drained due to a node deadline being reached - drainNow := map[jobKey]*runningJob{} + // stoplist are the allocations to migrate and their jobs to emit + // evaluations for. Initialized with allocations that should be + // immediately drained regardless of MaxParallel + stoplist := newStopAllocs(result.drainNow) - // track number of "up" allocs per task group (not terminal and - // have a deployment status) - upPerTG := map[string]int{} + // build drain list considering deadline & max_parallel + n.markMigrations(stoplist, result.upPerTG, result.drainableSvcs, nodes, now) - // Collect all drainable jobs - for nodeID, node := range nodes { - allocs, err := snapshot.AllocsByNode(nil, nodeID) - if err != nil { + if len(stoplist.allocBatch) > 0 { + if err := n.applyMigrations(stoplist); err != nil { //FIXME panic(err) } + } - // drainableSys are allocs for system jobs that should be - // drained if there are no other allocs left - drainableSys := map[jobKey]*runningJob{} - - // track number of allocs left on this node to be drained - allocsLeft := false - inf, deadline := node.DrainStrategy.DeadlineTime() - deadlineReached := !inf && deadline.Before(now) - for _, alloc := range allocs { - // Don't need to consider drained allocs - if alloc.TerminalStatus() { - continue - } + // Unset drain for nodes done draining + for nodeID, node := range result.doneNodes { + if err := n.raft.NodeDrainComplete(nodeID); err != nil { + n.logger.Printf("[ERR] nomad.drain: failed to unset drain for: %v", err) + //FIXME + panic(err) + } + n.logger.Printf("[INFO] nomad.drain: node %s (%s) completed draining", nodeID, node.Name) + delete(nodes, nodeID) + } + } +} - jobkey := jobKey{alloc.Namespace, alloc.JobID} +// collectDrainable scans all nodes and allocs on draining nodes and builds a +// structure of eligible allocs to drain. +func (n *NodeDrainer) collectDrainable(nodes map[string]*structs.Node, state *state.StateSnapshot, + jobWatcher *jobWatcher, now time.Time) (*collectResult, error) { - // job does not found yet - job, err := snapshot.JobByID(nil, alloc.Namespace, alloc.JobID) - if err != nil { - //FIXME - panic(err) - } + svcs := map[jobKey]*runningJob{} + drainNow := map[jobKey]*runningJob{} + upPerTG := map[string]int{} + doneNodes := map[string]*structs.Node{} - // IgnoreSystemJobs if specified in the node's DrainStrategy - if node.DrainStrategy.IgnoreSystemJobs && job.Type == structs.JobTypeSystem { - continue - } + for nodeID, node := range nodes { + allocs, err := state.AllocsByNode(nil, nodeID) + if err != nil { + return nil, err + } - // When the node deadline is reached all batch - // and service jobs will be drained - if deadlineReached && job.Type != structs.JobTypeService { - n.logger.Printf("[TRACE] nomad.drain: draining alloc %s due to node %s reaching drain deadline", alloc.ID, node.ID) - if j, ok := drainNow[jobkey]; ok { - j.allocs = append(j.allocs, alloc) - } else { - // First alloc for this job, create entry - drainNow[jobkey] = &runningJob{ - job: job, - allocs: []*structs.Allocation{alloc}, - } - } - continue - } + // drainableSys are allocs for system jobs that should be + // drained if there are no other allocs left + drainableSys := map[jobKey]*runningJob{} - // If deadline hasn't been reached, system jobs - // may still be drained if there are no other - // allocs left - if !deadlineReached && job.Type == structs.JobTypeSystem { - n.logger.Printf("[TRACE] nomad.drain: system alloc %s will be drained if no other allocs on node %s", alloc.ID, node.ID) - if j, ok := drainableSys[jobkey]; ok { - j.allocs = append(j.allocs, alloc) - } else { - // First alloc for this job, create entry - drainableSys[jobkey] = &runningJob{ - job: job, - allocs: []*structs.Allocation{alloc}, - } - } - continue - } + // track number of allocs left on this node to be drained + allocsLeft := false + inf, deadline := node.DrainStrategy.DeadlineTime() + deadlineReached := !inf && deadline.Before(now) + for _, alloc := range allocs { + // Don't need to consider drained allocs + if alloc.TerminalStatus() { + continue + } - // This alloc is still running on a draining - // node, so treat the node as having allocs - // remaining - allocsLeft = true + jobkey := jobKey{alloc.Namespace, alloc.JobID} - jobAllocs, err := snapshot.AllocsByJob(nil, alloc.Namespace, alloc.JobID, true) - if err != nil { - //FIXME - panic(err) - } + // job does not found yet + job, err := state.JobByID(nil, alloc.Namespace, alloc.JobID) + if err != nil { + return nil, err + } - // Count the number of down (terminal or nil deployment status) per task group - if job.Type == structs.JobTypeService { - num := 0 - for _, a := range jobAllocs { - if !a.TerminalStatus() && a.DeploymentStatus != nil { - // Not terminal and health updated, count it as up! - upPerTG[makeTaskGroupKey(a)]++ - num++ - } + // IgnoreSystemJobs if specified in the node's DrainStrategy + if node.DrainStrategy.IgnoreSystemJobs && job.Type == structs.JobTypeSystem { + continue + } + + // When the node deadline is reached all batch + // and service jobs will be drained + if deadlineReached && job.Type != structs.JobTypeService { + n.logger.Printf("[TRACE] nomad.drain: draining alloc %s due to node %s reaching drain deadline", alloc.ID, node.ID) + if j, ok := drainNow[jobkey]; ok { + j.allocs = append(j.allocs, alloc) + } else { + // First alloc for this job, create entry + drainNow[jobkey] = &runningJob{ + job: job, + allocs: []*structs.Allocation{alloc}, } - n.logger.Printf("[TRACE] nomad.drain: job %s has %d allocs running", job.Name, num) } + continue + } - drainableSvcs[jobkey] = &runningJob{ - job: job, - allocs: jobAllocs, + // If deadline hasn't been reached, system jobs + // may still be drained if there are no other + // allocs left + if !deadlineReached && job.Type == structs.JobTypeSystem { + n.logger.Printf("[TRACE] nomad.drain: system alloc %s will be drained if no other allocs on node %s", alloc.ID, node.ID) + if j, ok := drainableSys[jobkey]; ok { + j.allocs = append(j.allocs, alloc) + } else { + // First alloc for this job, create entry + drainableSys[jobkey] = &runningJob{ + job: job, + allocs: []*structs.Allocation{alloc}, + } } + continue + } + + // This alloc is still running on a draining + // node, so treat the node as having allocs + // remaining + allocsLeft = true - jobWatcher.watch(jobkey, nodeID) + jobAllocs, err := state.AllocsByJob(nil, alloc.Namespace, alloc.JobID, true) + if err != nil { + return nil, err } - // if node has no allocs or has hit its deadline, it's done draining! - if !allocsLeft || deadlineReached { - n.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain or has reached deadline", nodeID) - jobWatcher.nodeDone(nodeID) - doneNodes[nodeID] = node - - // Add all system jobs on this node to the drainNow slice - for k, sysj := range drainableSys { - if j, ok := drainNow[k]; ok { - // Job already has at least one alloc draining, append this one - j.allocs = append(j.allocs, sysj.allocs...) - } else { - // First draining alloc for this job, add the entry - drainNow[k] = sysj + // Count the number of down (terminal or nil deployment status) per task group + if job.Type == structs.JobTypeService { + num := 0 + for _, a := range jobAllocs { + if !a.TerminalStatus() && a.DeploymentStatus != nil { + // Not terminal and health updated, count it as up! + upPerTG[makeTaskGroupKey(a)]++ + num++ } } + n.logger.Printf("[TRACE] nomad.drain: job %s has %d allocs running", job.Name, num) } - } - // stoplist are the allocations to migrate and their jobs to emit - // evaluations for. Initialized with allocations that should be - // immediately drained regardless of MaxParallel - stoplist := newStopAllocs(drainNow) + svcs[jobkey] = &runningJob{ + job: job, + allocs: jobAllocs, + } - // build drain list considering deadline & max_parallel - for _, drainingJob := range drainableSvcs { - for _, alloc := range drainingJob.allocs { - // Already draining/dead allocs don't need to be drained - if alloc.TerminalStatus() { - continue - } + jobWatcher.watch(jobkey, nodeID) + } - node, ok := nodes[alloc.NodeID] - if !ok { - // Alloc's node is not draining so not elligible for draining! - continue + // if node has no allocs or has hit its deadline, it's done draining! + if !allocsLeft || deadlineReached { + n.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain or has reached deadline", nodeID) + jobWatcher.nodeDone(nodeID) + doneNodes[nodeID] = node + + // Add all system jobs on this node to the drainNow slice + for k, sysj := range drainableSys { + if j, ok := drainNow[k]; ok { + // Job already has at least one alloc draining, append this one + j.allocs = append(j.allocs, sysj.allocs...) + } else { + // First draining alloc for this job, add the entry + drainNow[k] = sysj } + } + } + } - tgKey := makeTaskGroupKey(alloc) - - if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) { - n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) - // Alloc's Node has reached its deadline - stoplist.add(drainingJob.job, alloc) - upPerTG[tgKey]-- + result := &collectResult{ + drainableSvcs: svcs, + drainNow: drainNow, + upPerTG: upPerTG, + doneNodes: doneNodes, + } + return result, nil +} - continue - } +// markMigrations marks services to be drained for migration in the stoplist. +func (n *NodeDrainer) markMigrations(stoplist *stopAllocs, upPerTG map[string]int, drainable map[jobKey]*runningJob, nodes map[string]*structs.Node, now time.Time) { + for _, drainingJob := range drainable { + for _, alloc := range drainingJob.allocs { + // Already draining/dead allocs don't need to be drained + if alloc.TerminalStatus() { + continue + } - // Stop allocs with count=1, max_parallel==0, or draining 0 { - if err := n.applyMigrations(stoplist); err != nil { - //FIXME - panic(err) + // No migrate strategy or a max parallel of 0 mean force draining + if tg.Migrate == nil || tg.Migrate.MaxParallel == 0 { + n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to force drain", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + stoplist.add(drainingJob.job, alloc) + continue } - } - // Unset drain for nodes done draining - for nodeID, node := range doneNodes { - if err := n.raft.NodeDrainComplete(nodeID); err != nil { - n.logger.Printf("[ERR] nomad.drain: failed to unset drain for: %v", err) - //FIXME - panic(err) + n.logger.Printf("[TRACE] nomad.drain: considering job %s alloc %s count %d maxp %d up %d", + drainingJob.job.Name, alloc.ID[:6], tg.Count, tg.Migrate.MaxParallel, upPerTG[tgKey]) + + // Count - MaxParalell = minimum number of allocations that must be "up" + minUp := (tg.Count - tg.Migrate.MaxParallel) + + // If minimum is < the current number up it is safe to stop one. + if minUp < upPerTG[tgKey] { + n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to max parallel", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) + // More migrations are allowed, add to stoplist + stoplist.add(drainingJob.job, alloc) + upPerTG[tgKey]-- } - n.logger.Printf("[INFO] nomad.drain: node %s (%s) completed draining", nodeID, node.Name) - delete(nodes, nodeID) } } } From 7f989499ffb4bfc5eca31531ade7c385cfc77a56 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 1 Mar 2018 11:21:32 -0800 Subject: [PATCH 33/79] Correct defaulting --- api/jobs.go | 1 + api/tasks.go | 44 +++++++- api/tasks_test.go | 152 ++++++++++++++++++++++++++ command/agent/job_endpoint.go | 9 ++ command/agent/job_endpoint_test.go | 12 ++ jobspec/parse.go | 19 +++- jobspec/parse_test.go | 38 +++++++ jobspec/test-fixtures/migrate-job.hcl | 28 +++++ 8 files changed, 297 insertions(+), 6 deletions(-) create mode 100644 jobspec/test-fixtures/migrate-job.hcl diff --git a/api/jobs.go b/api/jobs.go index 9e3227af49e8..5fcecf403871 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -559,6 +559,7 @@ type Job struct { ParameterizedJob *ParameterizedJobConfig Payload []byte Reschedule *ReschedulePolicy + Migrate *MigrateStrategy Meta map[string]string VaultToken *string `mapstructure:"vault_token"` Status *string diff --git a/api/tasks.go b/api/tasks.go index f7d3d9fb0737..47b502d57558 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -321,6 +321,30 @@ func (m *MigrateStrategy) Canonicalize() { } } +func (m *MigrateStrategy) Merge(o *MigrateStrategy) { + if o.MaxParallel != nil { + m.MaxParallel = o.MaxParallel + } + if o.HealthCheck != nil { + m.HealthCheck = o.HealthCheck + } + if o.MinHealthyTime != nil { + m.MinHealthyTime = o.MinHealthyTime + } + if o.HealthyDeadline != nil { + m.HealthyDeadline = o.HealthyDeadline + } +} + +func (m *MigrateStrategy) Copy() *MigrateStrategy { + if m == nil { + return nil + } + nm := new(MigrateStrategy) + *nm = *m + return nm +} + // TaskGroup is the unit of scheduling. type TaskGroup struct { Name *string @@ -415,7 +439,25 @@ func (g *TaskGroup) Canonicalize(job *Job) { } g.ReschedulePolicy = defaultReschedulePolicy - g.Migrate.Canonicalize() + // Merge the migrate strategy from the job + if jm, tm := job.Migrate != nil, g.Migrate != nil; jm && tm { + jobMigrate := job.Migrate.Copy() + jobMigrate.Merge(g.Migrate) + g.Migrate = jobMigrate + } else if jm { + jobMigrate := job.Migrate.Copy() + g.Migrate = jobMigrate + } + + // Merge with default reschedule policy + if *job.Type == "service" { + defaultMigrateStrategy := &MigrateStrategy{} + defaultMigrateStrategy.Canonicalize() + if g.Migrate != nil { + defaultMigrateStrategy.Merge(g.Migrate) + } + g.Migrate = defaultMigrateStrategy + } var defaultRestartPolicy *RestartPolicy switch *job.Type { diff --git a/api/tasks_test.go b/api/tasks_test.go index 3280507ad591..d72acc179bf6 100644 --- a/api/tasks_test.go +++ b/api/tasks_test.go @@ -430,6 +430,158 @@ func TestTaskGroup_Canonicalize_ReschedulePolicy(t *testing.T) { } } +// Verifies that migrate strategy is merged correctly +func TestTaskGroup_Canonicalize_MigrateStrategy(t *testing.T) { + type testCase struct { + desc string + jobType string + jobMigrate *MigrateStrategy + taskMigrate *MigrateStrategy + expected *MigrateStrategy + } + + testCases := []testCase{ + { + desc: "Default batch", + jobType: "batch", + jobMigrate: nil, + taskMigrate: nil, + expected: nil, + }, + { + desc: "Default service", + jobType: "service", + jobMigrate: nil, + taskMigrate: nil, + expected: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(1), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(10 * time.Second), + HealthyDeadline: helper.TimeToPtr(5 * time.Minute), + }, + }, + { + desc: "Empty job migrate strategy", + jobType: "service", + jobMigrate: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(0), + HealthCheck: helper.StringToPtr(""), + MinHealthyTime: helper.TimeToPtr(0), + HealthyDeadline: helper.TimeToPtr(0), + }, + taskMigrate: nil, + expected: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(0), + HealthCheck: helper.StringToPtr(""), + MinHealthyTime: helper.TimeToPtr(0), + HealthyDeadline: helper.TimeToPtr(0), + }, + }, + { + desc: "Inherit from job", + jobType: "service", + jobMigrate: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(3), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(2), + HealthyDeadline: helper.TimeToPtr(2), + }, + taskMigrate: nil, + expected: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(3), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(2), + HealthyDeadline: helper.TimeToPtr(2), + }, + }, + { + desc: "Set in task", + jobType: "service", + jobMigrate: nil, + taskMigrate: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(3), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(2), + HealthyDeadline: helper.TimeToPtr(2), + }, + expected: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(3), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(2), + HealthyDeadline: helper.TimeToPtr(2), + }, + }, + { + desc: "Merge from job", + jobType: "service", + jobMigrate: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(11), + }, + taskMigrate: &MigrateStrategy{ + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(2), + HealthyDeadline: helper.TimeToPtr(2), + }, + expected: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(11), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(2), + HealthyDeadline: helper.TimeToPtr(2), + }, + }, + { + desc: "Override from group", + jobType: "service", + jobMigrate: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(11), + }, + taskMigrate: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(5), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(2), + HealthyDeadline: helper.TimeToPtr(2), + }, + expected: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(5), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(2), + HealthyDeadline: helper.TimeToPtr(2), + }, + }, + { + desc: "Parallel from job, defaulting", + jobType: "service", + jobMigrate: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(5), + }, + taskMigrate: nil, + expected: &MigrateStrategy{ + MaxParallel: helper.IntToPtr(5), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(10 * time.Second), + HealthyDeadline: helper.TimeToPtr(5 * time.Minute), + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + job := &Job{ + ID: helper.StringToPtr("test"), + Migrate: tc.jobMigrate, + Type: helper.StringToPtr(tc.jobType), + } + job.Canonicalize() + tg := &TaskGroup{ + Name: helper.StringToPtr("foo"), + Migrate: tc.taskMigrate, + } + tg.Canonicalize(job) + assert.Equal(t, tc.expected, tg.Migrate) + }) + } +} + // TestService_CheckRestart asserts Service.CheckRestart settings are properly // inherited by Checks. func TestService_CheckRestart(t *testing.T) { diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 840fb1feeda9..ce1605728740 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -649,6 +649,15 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) { } } + if taskGroup.Migrate != nil { + tg.Migrate = &structs.MigrateStrategy{ + MaxParallel: *taskGroup.Migrate.MaxParallel, + HealthCheck: *taskGroup.Migrate.HealthCheck, + MinHealthyTime: *taskGroup.Migrate.MinHealthyTime, + HealthyDeadline: *taskGroup.Migrate.HealthyDeadline, + } + } + tg.EphemeralDisk = &structs.EphemeralDisk{ Sticky: *taskGroup.EphemeralDisk.Sticky, SizeMB: *taskGroup.EphemeralDisk.SizeMB, diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index f59acaaf2eef..57b5d1869d24 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -1179,6 +1179,12 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { Unlimited: helper.BoolToPtr(true), MaxDelay: helper.TimeToPtr(20 * time.Minute), }, + Migrate: &api.MigrateStrategy{ + MaxParallel: helper.IntToPtr(12), + HealthCheck: helper.StringToPtr("task_events"), + MinHealthyTime: helper.TimeToPtr(12 * time.Hour), + HealthyDeadline: helper.TimeToPtr(12 * time.Hour), + }, EphemeralDisk: &api.EphemeralDisk{ SizeMB: helper.IntToPtr(100), Sticky: helper.BoolToPtr(true), @@ -1395,6 +1401,12 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { Unlimited: true, MaxDelay: 20 * time.Minute, }, + Migrate: &structs.MigrateStrategy{ + MaxParallel: 12, + HealthCheck: "task_events", + MinHealthyTime: 12 * time.Hour, + HealthyDeadline: 12 * time.Hour, + }, EphemeralDisk: &structs.EphemeralDisk{ SizeMB: 100, Sticky: true, diff --git a/jobspec/parse.go b/jobspec/parse.go index 4bfebc9099aa..e56161cd4c40 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -104,11 +104,12 @@ func parseJob(result *api.Job, list *ast.ObjectList) error { } delete(m, "constraint") delete(m, "meta") - delete(m, "update") - delete(m, "periodic") - delete(m, "vault") + delete(m, "migrate") delete(m, "parameterized") + delete(m, "periodic") delete(m, "reschedule") + delete(m, "update") + delete(m, "vault") // Set the ID and name to the object key result.ID = helper.StringToPtr(obj.Keys[0].Token.Value().(string)) @@ -132,19 +133,20 @@ func parseJob(result *api.Job, list *ast.ObjectList) error { "all_at_once", "constraint", "datacenters", - "parameterized", "group", "id", "meta", + "migrate", "name", "namespace", + "parameterized", "periodic", "priority", "region", + "reschedule", "task", "type", "update", - "reschedule", "vault", "vault_token", } @@ -187,6 +189,13 @@ func parseJob(result *api.Job, list *ast.ObjectList) error { } } + // If we have a migration strategy, then parse that + if o := listVal.Filter("migrate"); len(o.Items) > 0 { + if err := parseMigrate(&result.Migrate, o); err != nil { + return multierror.Prefix(err, "migrate ->") + } + } + // Parse out meta fields. These are in HCL as a list so we need // to iterate over them and merge them. if metaO := listVal.Filter("meta"); len(metaO.Items) > 0 { diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index c3989a68ca94..1275cd51c90f 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -741,6 +741,44 @@ func TestParse(t *testing.T) { }, false, }, + { + "migrate-job.hcl", + &api.Job{ + ID: helper.StringToPtr("foo"), + Name: helper.StringToPtr("foo"), + Type: helper.StringToPtr("batch"), + Datacenters: []string{"dc1"}, + Migrate: &api.MigrateStrategy{ + MaxParallel: helper.IntToPtr(2), + HealthCheck: helper.StringToPtr("task_states"), + MinHealthyTime: helper.TimeToPtr(11 * time.Second), + HealthyDeadline: helper.TimeToPtr(11 * time.Minute), + }, + TaskGroups: []*api.TaskGroup{ + { + Name: helper.StringToPtr("bar"), + Count: helper.IntToPtr(3), + Migrate: &api.MigrateStrategy{ + MaxParallel: helper.IntToPtr(3), + HealthCheck: helper.StringToPtr("checks"), + MinHealthyTime: helper.TimeToPtr(1 * time.Second), + HealthyDeadline: helper.TimeToPtr(1 * time.Minute), + }, + Tasks: []*api.Task{ + { + Name: "bar", + Driver: "raw_exec", + Config: map[string]interface{}{ + "command": "bash", + "args": []interface{}{"-c", "echo hi"}, + }, + }, + }, + }, + }, + }, + false, + }, } for _, tc := range cases { diff --git a/jobspec/test-fixtures/migrate-job.hcl b/jobspec/test-fixtures/migrate-job.hcl new file mode 100644 index 000000000000..5ec05e6b5141 --- /dev/null +++ b/jobspec/test-fixtures/migrate-job.hcl @@ -0,0 +1,28 @@ +job "foo" { + datacenters = ["dc1"] + type = "batch" + migrate { + max_parallel = 2 + health_check = "task_states" + min_healthy_time = "11s" + healthy_deadline = "11m" + } + + group "bar" { + count = 3 + task "bar" { + driver = "raw_exec" + config { + command = "bash" + args = ["-c", "echo hi"] + } + } + + migrate { + max_parallel = 3 + health_check = "checks" + min_healthy_time = "1s" + healthy_deadline = "1m" + } + } +} From a027016b87dab56fd004424f3a757674feb763ab Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 1 Mar 2018 11:27:36 -0800 Subject: [PATCH 34/79] Fix file names --- nomad/drainer/jobwatcher.go | 140 ----------------------------------- nomad/drainer/nodewatcher.go | 121 ------------------------------ 2 files changed, 261 deletions(-) delete mode 100644 nomad/drainer/jobwatcher.go delete mode 100644 nomad/drainer/nodewatcher.go diff --git a/nomad/drainer/jobwatcher.go b/nomad/drainer/jobwatcher.go deleted file mode 100644 index 95a1be5d157e..000000000000 --- a/nomad/drainer/jobwatcher.go +++ /dev/null @@ -1,140 +0,0 @@ -package drainer - -import ( - "context" - "log" - "sync" - - memdb "github.com/hashicorp/go-memdb" - "github.com/hashicorp/nomad/nomad/state" - "github.com/hashicorp/nomad/nomad/structs" -) - -// jobWatcher watches allocation changes for jobs with at least one allocation -// on a draining node. -type jobWatcher struct { - // allocsIndex to start watching from - allocsIndex uint64 - - // job -> node.ID - jobs map[jobKey]string - jobsMu sync.Mutex - - jobsCh chan map[jobKey]struct{} - - state *state.StateStore - - logger *log.Logger -} - -func newJobWatcher(logger *log.Logger, jobs map[jobKey]string, allocsIndex uint64, state *state.StateStore) *jobWatcher { - return &jobWatcher{ - allocsIndex: allocsIndex, - logger: logger, - jobs: jobs, - jobsCh: make(chan map[jobKey]struct{}), - state: state, - } -} - -func (j *jobWatcher) watch(k jobKey, nodeID string) { - j.logger.Printf("[TRACE] nomad.drain: watching job %s on draining node %s", k.jobid, nodeID[:6]) - j.jobsMu.Lock() - j.jobs[k] = nodeID - j.jobsMu.Unlock() -} - -func (j *jobWatcher) nodeDone(nodeID string) { - j.jobsMu.Lock() - defer j.jobsMu.Unlock() - for k, v := range j.jobs { - if v == nodeID { - j.logger.Printf("[TRACE] nomad.drain: UNwatching job %s on done draining node %s", k.jobid, nodeID[:6]) - delete(j.jobs, k) - } - } -} - -func (j *jobWatcher) WaitCh() <-chan map[jobKey]struct{} { - return j.jobsCh -} - -func (j *jobWatcher) run(ctx context.Context) { - var resp interface{} - var err error - - for { - //FIXME have watchAllocs create a closure and give it a copy of j.jobs to remove locking? - //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? - var newIndex uint64 - resp, newIndex, err = j.state.BlockingQuery(j.watchAllocs, j.allocsIndex, ctx) - if err != nil { - if err == context.Canceled { - j.logger.Printf("[TRACE] nomad.drain: job watcher shutting down") - return - } - j.logger.Printf("[ERR] nomad.drain: error blocking on alloc updates: %v", err) - return - } - - j.logger.Printf("[TRACE] nomad.drain: job watcher old index: %d new index: %d", j.allocsIndex, newIndex) - j.allocsIndex = newIndex - - changedJobs := resp.(map[jobKey]struct{}) - if len(changedJobs) > 0 { - select { - case j.jobsCh <- changedJobs: - case <-ctx.Done(): - return - } - } - } -} - -func (j *jobWatcher) watchAllocs(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { - iter, err := state.Allocs(ws) - if err != nil { - return nil, 0, err - } - - index, err := state.Index("allocs") - if err != nil { - return nil, 0, err - } - - skipped := 0 - - // job ids - resp := map[jobKey]struct{}{} - - for { - raw := iter.Next() - if raw == nil { - break - } - - alloc := raw.(*structs.Allocation) - - j.jobsMu.Lock() - _, ok := j.jobs[jobKey{alloc.Namespace, alloc.JobID}] - j.jobsMu.Unlock() - - if !ok { - // alloc is not part of a draining job - skipped++ - continue - } - - // don't wake drain loop if alloc hasn't updated its health - if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { - j.logger.Printf("[TRACE] nomad.drain: job watcher found alloc %s - deployment status: %t", alloc.ID[:6], *alloc.DeploymentStatus.Healthy) - resp[jobKey{alloc.Namespace, alloc.JobID}] = struct{}{} - } else { - j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring alloc %s - no deployment status", alloc.ID[:6]) - } - } - - j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring %d allocs - not part of draining job at index %d", skipped, index) - - return resp, index, nil -} diff --git a/nomad/drainer/nodewatcher.go b/nomad/drainer/nodewatcher.go deleted file mode 100644 index 5f419ea2ca91..000000000000 --- a/nomad/drainer/nodewatcher.go +++ /dev/null @@ -1,121 +0,0 @@ -package drainer - -import ( - "context" - "log" - - memdb "github.com/hashicorp/go-memdb" - "github.com/hashicorp/nomad/nomad/state" - "github.com/hashicorp/nomad/nomad/structs" -) - -// nodeWatcher watches for nodes to start or stop draining -type nodeWatcher struct { - index uint64 - nodes map[string]*structs.Node - nodesCh chan map[string]*structs.Node - state *state.StateStore - logger *log.Logger -} - -func newNodeWatcher(logger *log.Logger, nodes map[string]*structs.Node, index uint64, state *state.StateStore) *nodeWatcher { - return &nodeWatcher{ - nodes: nodes, - nodesCh: make(chan map[string]*structs.Node), - index: index, - state: state, - logger: logger, - } -} - -func (n *nodeWatcher) run(ctx context.Context) { - // Trigger an initial drain pass if there are already nodes draining - //FIXME this is unneccessary if a node has reached a deadline - n.logger.Printf("[TRACE] nomad.drain: initial draining nodes: %d", len(n.nodes)) - if len(n.nodes) > 0 { - n.nodesCh <- n.nodes - } - - for { - //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? - resp, index, err := n.state.BlockingQuery(n.queryNodeDrain, n.index, ctx) - if err != nil { - if err == context.Canceled { - n.logger.Printf("[TRACE] nomad.drain: draining node watcher shutting down") - return - } - n.logger.Printf("[ERR] nomad.drain: error blocking on node updates at index %d: %v", n.index, err) - return - } - - // update index for next run - n.index = index - - changed := false - newNodes := resp.([]*structs.Node) - n.logger.Printf("[TRACE] nomad.drain: %d nodes to consider", len(newNodes)) //FIXME remove - for _, newNode := range newNodes { - if existingNode, ok := n.nodes[newNode.ID]; ok { - // Node was draining, see if it has changed - if newNode.DrainStrategy == nil { - // Node stopped draining - delete(n.nodes, newNode.ID) - changed = true - } else if !newNode.DrainStrategy.Equal(existingNode.DrainStrategy) { - // Update deadline - n.nodes[newNode.ID] = newNode - changed = true - } - } else { - // Node was not draining - if newNode.DrainStrategy != nil { - // Node started draining - n.nodes[newNode.ID] = newNode - changed = true - } - } - } - - // Send a copy of the draining nodes if there were changes - if !changed { - continue - } - - nodesCopy := make(map[string]*structs.Node, len(n.nodes)) - for k, v := range n.nodes { - nodesCopy[k] = v - } - - select { - case n.nodesCh <- nodesCopy: - case <-ctx.Done(): - return - } - } -} - -func (n *nodeWatcher) queryNodeDrain(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { - iter, err := state.Nodes(ws) - if err != nil { - return nil, 0, err - } - - index, err := state.Index("nodes") - if err != nil { - return nil, 0, err - } - - resp := make([]*structs.Node, 0, 8) - - for { - raw := iter.Next() - if raw == nil { - break - } - - node := raw.(*structs.Node) - resp = append(resp, node) - } - - return resp, index, nil -} From c00c02df6258296427e0ba78df06e1763b24fba2 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 1 Mar 2018 13:36:26 -0800 Subject: [PATCH 35/79] System test runs on mac --- nomad/drainer/drain_test.go | 4 +- nomad/drainer/job_watcher.go | 140 ++++++++++++++++++++++++++++++++++ nomad/drainer/node_watcher.go | 121 +++++++++++++++++++++++++++++ 3 files changed, 263 insertions(+), 2 deletions(-) create mode 100644 nomad/drainer/job_watcher.go create mode 100644 nomad/drainer/node_watcher.go diff --git a/nomad/drainer/drain_test.go b/nomad/drainer/drain_test.go index f92f2503e14f..993a65fcd0ed 100644 --- a/nomad/drainer/drain_test.go +++ b/nomad/drainer/drain_test.go @@ -297,6 +297,7 @@ func TestNodeDrainer_SystemDrain(t *testing.T) { serviceJob := mock.Job() serviceJob.Name = "service-job" serviceJob.Type = structs.JobTypeService + serviceJob.Constraints = nil serviceJob.TaskGroups[0].Count = 2 serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{ MaxParallel: 1, @@ -315,6 +316,7 @@ func TestNodeDrainer_SystemDrain(t *testing.T) { systemJob := mock.SystemJob() systemJob.Name = "system-job" systemJob.Type = structs.JobTypeSystem + systemJob.Constraints = nil systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" systemJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ "run_for": "10m", @@ -486,6 +488,4 @@ func TestNodeDrainer_SystemDrain(t *testing.T) { t.Logf("job: %s node: %s alloc: %s desired_status: %s desired_transition: %s actual: %s replaces: %s", alloc.Job.Name, alloc.NodeID[:6], alloc.ID[:6], alloc.DesiredStatus, pretty.Sprint(alloc.DesiredTransition.Migrate), alloc.ClientStatus, alloc.PreviousAllocation) } - - t.Logf("==> PASS") } diff --git a/nomad/drainer/job_watcher.go b/nomad/drainer/job_watcher.go new file mode 100644 index 000000000000..95a1be5d157e --- /dev/null +++ b/nomad/drainer/job_watcher.go @@ -0,0 +1,140 @@ +package drainer + +import ( + "context" + "log" + "sync" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +// jobWatcher watches allocation changes for jobs with at least one allocation +// on a draining node. +type jobWatcher struct { + // allocsIndex to start watching from + allocsIndex uint64 + + // job -> node.ID + jobs map[jobKey]string + jobsMu sync.Mutex + + jobsCh chan map[jobKey]struct{} + + state *state.StateStore + + logger *log.Logger +} + +func newJobWatcher(logger *log.Logger, jobs map[jobKey]string, allocsIndex uint64, state *state.StateStore) *jobWatcher { + return &jobWatcher{ + allocsIndex: allocsIndex, + logger: logger, + jobs: jobs, + jobsCh: make(chan map[jobKey]struct{}), + state: state, + } +} + +func (j *jobWatcher) watch(k jobKey, nodeID string) { + j.logger.Printf("[TRACE] nomad.drain: watching job %s on draining node %s", k.jobid, nodeID[:6]) + j.jobsMu.Lock() + j.jobs[k] = nodeID + j.jobsMu.Unlock() +} + +func (j *jobWatcher) nodeDone(nodeID string) { + j.jobsMu.Lock() + defer j.jobsMu.Unlock() + for k, v := range j.jobs { + if v == nodeID { + j.logger.Printf("[TRACE] nomad.drain: UNwatching job %s on done draining node %s", k.jobid, nodeID[:6]) + delete(j.jobs, k) + } + } +} + +func (j *jobWatcher) WaitCh() <-chan map[jobKey]struct{} { + return j.jobsCh +} + +func (j *jobWatcher) run(ctx context.Context) { + var resp interface{} + var err error + + for { + //FIXME have watchAllocs create a closure and give it a copy of j.jobs to remove locking? + //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? + var newIndex uint64 + resp, newIndex, err = j.state.BlockingQuery(j.watchAllocs, j.allocsIndex, ctx) + if err != nil { + if err == context.Canceled { + j.logger.Printf("[TRACE] nomad.drain: job watcher shutting down") + return + } + j.logger.Printf("[ERR] nomad.drain: error blocking on alloc updates: %v", err) + return + } + + j.logger.Printf("[TRACE] nomad.drain: job watcher old index: %d new index: %d", j.allocsIndex, newIndex) + j.allocsIndex = newIndex + + changedJobs := resp.(map[jobKey]struct{}) + if len(changedJobs) > 0 { + select { + case j.jobsCh <- changedJobs: + case <-ctx.Done(): + return + } + } + } +} + +func (j *jobWatcher) watchAllocs(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + iter, err := state.Allocs(ws) + if err != nil { + return nil, 0, err + } + + index, err := state.Index("allocs") + if err != nil { + return nil, 0, err + } + + skipped := 0 + + // job ids + resp := map[jobKey]struct{}{} + + for { + raw := iter.Next() + if raw == nil { + break + } + + alloc := raw.(*structs.Allocation) + + j.jobsMu.Lock() + _, ok := j.jobs[jobKey{alloc.Namespace, alloc.JobID}] + j.jobsMu.Unlock() + + if !ok { + // alloc is not part of a draining job + skipped++ + continue + } + + // don't wake drain loop if alloc hasn't updated its health + if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { + j.logger.Printf("[TRACE] nomad.drain: job watcher found alloc %s - deployment status: %t", alloc.ID[:6], *alloc.DeploymentStatus.Healthy) + resp[jobKey{alloc.Namespace, alloc.JobID}] = struct{}{} + } else { + j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring alloc %s - no deployment status", alloc.ID[:6]) + } + } + + j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring %d allocs - not part of draining job at index %d", skipped, index) + + return resp, index, nil +} diff --git a/nomad/drainer/node_watcher.go b/nomad/drainer/node_watcher.go new file mode 100644 index 000000000000..5f419ea2ca91 --- /dev/null +++ b/nomad/drainer/node_watcher.go @@ -0,0 +1,121 @@ +package drainer + +import ( + "context" + "log" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +// nodeWatcher watches for nodes to start or stop draining +type nodeWatcher struct { + index uint64 + nodes map[string]*structs.Node + nodesCh chan map[string]*structs.Node + state *state.StateStore + logger *log.Logger +} + +func newNodeWatcher(logger *log.Logger, nodes map[string]*structs.Node, index uint64, state *state.StateStore) *nodeWatcher { + return &nodeWatcher{ + nodes: nodes, + nodesCh: make(chan map[string]*structs.Node), + index: index, + state: state, + logger: logger, + } +} + +func (n *nodeWatcher) run(ctx context.Context) { + // Trigger an initial drain pass if there are already nodes draining + //FIXME this is unneccessary if a node has reached a deadline + n.logger.Printf("[TRACE] nomad.drain: initial draining nodes: %d", len(n.nodes)) + if len(n.nodes) > 0 { + n.nodesCh <- n.nodes + } + + for { + //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? + resp, index, err := n.state.BlockingQuery(n.queryNodeDrain, n.index, ctx) + if err != nil { + if err == context.Canceled { + n.logger.Printf("[TRACE] nomad.drain: draining node watcher shutting down") + return + } + n.logger.Printf("[ERR] nomad.drain: error blocking on node updates at index %d: %v", n.index, err) + return + } + + // update index for next run + n.index = index + + changed := false + newNodes := resp.([]*structs.Node) + n.logger.Printf("[TRACE] nomad.drain: %d nodes to consider", len(newNodes)) //FIXME remove + for _, newNode := range newNodes { + if existingNode, ok := n.nodes[newNode.ID]; ok { + // Node was draining, see if it has changed + if newNode.DrainStrategy == nil { + // Node stopped draining + delete(n.nodes, newNode.ID) + changed = true + } else if !newNode.DrainStrategy.Equal(existingNode.DrainStrategy) { + // Update deadline + n.nodes[newNode.ID] = newNode + changed = true + } + } else { + // Node was not draining + if newNode.DrainStrategy != nil { + // Node started draining + n.nodes[newNode.ID] = newNode + changed = true + } + } + } + + // Send a copy of the draining nodes if there were changes + if !changed { + continue + } + + nodesCopy := make(map[string]*structs.Node, len(n.nodes)) + for k, v := range n.nodes { + nodesCopy[k] = v + } + + select { + case n.nodesCh <- nodesCopy: + case <-ctx.Done(): + return + } + } +} + +func (n *nodeWatcher) queryNodeDrain(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + iter, err := state.Nodes(ws) + if err != nil { + return nil, 0, err + } + + index, err := state.Index("nodes") + if err != nil { + return nil, 0, err + } + + resp := make([]*structs.Node, 0, 8) + + for { + raw := iter.Next() + if raw == nil { + break + } + + node := raw.(*structs.Node) + resp = append(resp, node) + } + + return resp, index, nil +} From 6026af2a8a3e04e1ddc4930f849109f58aaa73fd Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 1 Mar 2018 16:37:19 -0800 Subject: [PATCH 36/79] Initial design --- nomad/drainerv2/drain_heap.go | 20 ++++ nomad/drainerv2/drain_interfaces.go | 1 + nomad/drainerv2/drainer.go | 167 ++++++++++++++++++++++++++++ nomad/drainerv2/draining_node.go | 65 +++++++++++ nomad/drainerv2/watch_jobs.go | 8 ++ nomad/drainerv2/watch_nodes.go | 7 ++ 6 files changed, 268 insertions(+) create mode 100644 nomad/drainerv2/drain_heap.go create mode 100644 nomad/drainerv2/drain_interfaces.go create mode 100644 nomad/drainerv2/drainer.go create mode 100644 nomad/drainerv2/draining_node.go create mode 100644 nomad/drainerv2/watch_jobs.go create mode 100644 nomad/drainerv2/watch_nodes.go diff --git a/nomad/drainerv2/drain_heap.go b/nomad/drainerv2/drain_heap.go new file mode 100644 index 000000000000..899b8dd16b7f --- /dev/null +++ b/nomad/drainerv2/drain_heap.go @@ -0,0 +1,20 @@ +package drainerv2 + +import ( + "time" + + "github.com/hashicorp/nomad/nomad/structs" +) + +type DrainDeadlineNotifier interface { + NextBatch() <-chan []*structs.Node + Remove(nodeID string) + Watch(nodeID string, deadline time.Time) +} + +type deadlineHeap struct { +} + +func (d *deadlineHeap) NextBatch() <-chan []structs.Node { return nil } +func (d *deadlineHeap) Remove(nodeID string) {} +func (d *deadlineHeap) Watch(nodeID string, deadline time.Time) {} diff --git a/nomad/drainerv2/drain_interfaces.go b/nomad/drainerv2/drain_interfaces.go new file mode 100644 index 000000000000..008537619830 --- /dev/null +++ b/nomad/drainerv2/drain_interfaces.go @@ -0,0 +1 @@ +package drainerv2 diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go new file mode 100644 index 000000000000..a7156dc91d9c --- /dev/null +++ b/nomad/drainerv2/drainer.go @@ -0,0 +1,167 @@ +package drainerv2 + +import ( + "context" + "log" + "sync" + + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" + "golang.org/x/time/rate" +) + +const ( + // LimitStateQueriesPerSecond is the number of state queries allowed per + // second + LimitStateQueriesPerSecond = 100.0 +) + +// RaftApplier contains methods for applying the raft requests required by the +// NodeDrainer. +type RaftApplier interface { + AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) error + NodeDrainComplete(nodeID string) error +} + +type AllocDrainer interface { + drain(allocs []*structs.Allocation) +} + +type DrainingJobWatcherFactory func(context.Context, *rate.Limiter, AllocDrainer) DrainingJobWatcher +type DrainingNodeWatcherFactory func(context.Context, *rate.Limiter, AllocDrainer) DrainingNodeWatcher +type DrainDeadlineNotifierFactory func(context.Context) DrainDeadlineNotifier + +type NodeDrainerConfig struct { + Logger *log.Logger + Raft RaftApplier + JobFactory DrainingJobWatcherFactory + NodeFactory DrainingNodeWatcherFactory + DrainDeadlineFactory DrainDeadlineNotifierFactory + StateQueriesPerSecond float64 +} + +type NodeDrainer struct { + enabled bool + logger *log.Logger + + // nodes is the set of draining nodes + nodes map[string]*drainingNode + + // doneNodeCh is used to signal that a node is done draining + doneNodeCh chan string + + nodeWatcher DrainingNodeWatcher + nodeFactory DrainingNodeWatcherFactory + + jobWatcher DrainingJobWatcher + jobFactory DrainingJobWatcherFactory + + deadlineNotifier DrainDeadlineNotifier + deadlineNotifierFactory DrainDeadlineNotifierFactory + + // state is the state that is watched for state changes. + state *state.StateStore + + // queryLimiter is used to limit the rate of blocking queries + queryLimiter *rate.Limiter + + // raft is a shim around the raft messages necessary for draining + raft RaftApplier + + // ctx and exitFn are used to cancel the watcher + ctx context.Context + exitFn context.CancelFunc + + l sync.RWMutex +} + +func NewNodeDrainer(c *NodeDrainerConfig) *NodeDrainer { + return &NodeDrainer{ + raft: c.Raft, + logger: c.Logger, + jobFactory: c.JobFactory, + nodeFactory: c.NodeFactory, + deadlineNotifierFactory: c.DrainDeadlineFactory, + queryLimiter: rate.NewLimiter(rate.Limit(c.StateQueriesPerSecond), 100), + } +} + +// SetEnabled will start or stop the node draining goroutine depending on the +// enabled boolean. +func (n *NodeDrainer) SetEnabled(enabled bool, state *state.StateStore) { + n.l.Lock() + defer n.l.Unlock() + + wasEnabled := n.enabled + n.enabled = enabled + + if state != nil { + n.state = state + } + + // Flush the state to create the necessary objects + n.flush() + + // If we are starting now, launch the watch daemon + if enabled && !wasEnabled { + n.run(n.ctx) + } +} + +// flush is used to clear the state of the watcher +func (n *NodeDrainer) flush() { + // Kill everything associated with the watcher + if n.exitFn != nil { + n.exitFn() + } + + n.ctx, n.exitFn = context.WithCancel(context.Background()) + n.jobWatcher = n.jobFactory(n.ctx, n.queryLimiter, n) + n.nodeWatcher = n.nodeFactory(n.ctx, n.queryLimiter, n) + n.deadlineNotifier = n.deadlineNotifierFactory(n.ctx) + n.nodes = make(map[string]*drainingNode, 32) + n.doneNodeCh = make(chan string, 4) +} + +func (n *NodeDrainer) run(ctx context.Context) { + for { + select { + case <-n.ctx.Done(): + return + case nodes := <-n.deadlineNotifier.NextBatch(): + n.handleDeadlinedNodes(nodes) + case nodes := <-n.nodeWatcher.Transistioning(): + n.handleNodeDrainTransistion(nodes) + case allocs := <-n.jobWatcher.Drain(): + n.handleJobAllocDrain(allocs) + case node := <-n.doneNodeCh: + n.handleDoneNode(node) + } + } +} + +func (n *NodeDrainer) handleDeadlinedNodes(nodes []*structs.Node) { + // TODO +} + +func (n *NodeDrainer) handleNodeDrainTransistion(nodes []*structs.Node) { + // TODO +} + +func (n *NodeDrainer) handleJobAllocDrain(allocs []*structs.Allocation) { + // TODO + + // TODO Call check on the appropriate nodes when the final allocs + // transistion to stop so we have a place to determine with the node + // is done and the final drain of system allocs + // TODO This probably requires changing the interface such that it + // returns replaced allocs as well. +} + +func (n *NodeDrainer) handleDoneNode(nodeID string) { + // TODO +} + +func (n *NodeDrainer) drain(allocs []*structs.Allocation) { + // TODO +} diff --git a/nomad/drainerv2/draining_node.go b/nomad/drainerv2/draining_node.go new file mode 100644 index 000000000000..3150be1fd52d --- /dev/null +++ b/nomad/drainerv2/draining_node.go @@ -0,0 +1,65 @@ +package drainerv2 + +import ( + "sync" + "time" + + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +// TODO make this an interface and then I can optimize the infinite case by +// using a singleton object + +type drainCoordinator interface { + done(nodeID string) +} + +func (n *NodeDrainer) nodeDone(nodeID string) { + select { + case <-n.ctx.Done(): + case n.doneNodeCh <- nodeID: + } +} + +type drainingNode struct { + coordinator drainCoordinator + state *state.StateStore + node *structs.Node + l sync.RWMutex +} + +func NewDrainingNode(node *structs.Node, state *state.StateStore, coordinator drainCoordinator) *drainingNode { + return &drainingNode{ + coordinator: coordinator, + state: state, + node: node, + } +} + +func (n *drainingNode) Update(node *structs.Node) { + n.l.Lock() + defer n.l.Unlock() + n.node = node +} + +// DeadlineTime returns if the node has a deadline and if so what it is +func (n *drainingNode) DeadlineTime() (bool, time.Time) { + n.l.RLock() + defer n.l.RUnlock() + + // Should never happen + if n.node == nil || n.node.DrainStrategy == nil { + return false, time.Time{} + } + + return n.node.DrainStrategy.DeadlineTime() +} + +// DeadlineAllocs returns the set of allocations that should be drained given a +// node is at its deadline +func (n *drainingNode) DeadlineAllocs() ([]*structs.Allocation, error) { + n.l.RLock() + defer n.l.RUnlock() + return nil, nil +} diff --git a/nomad/drainerv2/watch_jobs.go b/nomad/drainerv2/watch_jobs.go new file mode 100644 index 000000000000..836cea6856e6 --- /dev/null +++ b/nomad/drainerv2/watch_jobs.go @@ -0,0 +1,8 @@ +package drainerv2 + +import "github.com/hashicorp/nomad/nomad/structs" + +type DrainingJobWatcher interface { + RegisterJob(jobID, namespace string) + Drain() <-chan []*structs.Allocation +} diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainerv2/watch_nodes.go new file mode 100644 index 000000000000..623c2edb234f --- /dev/null +++ b/nomad/drainerv2/watch_nodes.go @@ -0,0 +1,7 @@ +package drainerv2 + +import "github.com/hashicorp/nomad/nomad/structs" + +type DrainingNodeWatcher interface { + Transistioning() <-chan []*structs.Node +} From e566fcdf5f7443b5931db7ea4f0109ce8f974360 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 2 Mar 2018 15:19:55 -0800 Subject: [PATCH 37/79] drain heap --- nomad/drainerv2/drain_heap.go | 165 +++++++++++++++++++++++++++-- nomad/drainerv2/drain_heap_test.go | 149 ++++++++++++++++++++++++++ nomad/drainerv2/drainer.go | 2 +- 3 files changed, 309 insertions(+), 7 deletions(-) create mode 100644 nomad/drainerv2/drain_heap_test.go diff --git a/nomad/drainerv2/drain_heap.go b/nomad/drainerv2/drain_heap.go index 899b8dd16b7f..b661447e2b12 100644 --- a/nomad/drainerv2/drain_heap.go +++ b/nomad/drainerv2/drain_heap.go @@ -1,20 +1,173 @@ package drainerv2 import ( + "context" + "sync" "time" - - "github.com/hashicorp/nomad/nomad/structs" ) +// DrainDeadlineNotifier allows batch notification of nodes that have reached +// their drain deadline. type DrainDeadlineNotifier interface { - NextBatch() <-chan []*structs.Node + // NextBatch returns the next batch of nodes that have reached their + // deadline. + NextBatch() <-chan []string + + // Remove removes the given node from being tracked for a deadline. Remove(nodeID string) + + // Watch marks the given node for being watched for its deadline. Watch(nodeID string, deadline time.Time) } +// TODO Make any of what I just wrote true :) Initially it is just a simple +// implementation. + +// deadlineHeap implements the DrainDeadlineNotifier and is backed by a min-heap +// to efficiently determine the next deadlining node. It also supports +// coalescing several deadlines into a single emission. type deadlineHeap struct { + ctx context.Context + coalesceWindow time.Duration + batch chan []string + nodes map[string]time.Time + trigger chan string + l sync.RWMutex +} + +// NewDeadlineHeap returns a new deadline heap that coalesces for the given +// duration and will stop watching when the passed context is cancelled. +func NewDeadlineHeap(ctx context.Context, coalesceWindow time.Duration) *deadlineHeap { + d := &deadlineHeap{ + ctx: ctx, + coalesceWindow: coalesceWindow, + batch: make(chan []string, 4), + nodes: make(map[string]time.Time, 64), + trigger: make(chan string, 4), + } + + go d.watch() + return d +} + +func (d *deadlineHeap) watch() { + timer := time.NewTimer(0 * time.Millisecond) + if !timer.Stop() { + select { + case <-timer.C: + default: + } + } + + var nextDeadline time.Time + defer timer.Stop() + + for { + select { + case <-d.ctx.Done(): + return + case <-timer.C: + if nextDeadline.IsZero() { + continue + } + + d.l.Lock() + var batch []string + for nodeID, nodeDeadline := range d.nodes { + if !nodeDeadline.After(nextDeadline) { + batch = append(batch, nodeID) + } + } + + // If there is nothing exit early + if len(batch) == 0 { + d.l.Unlock() + goto CALC + } + + // Send the batch + select { + case d.batch <- batch: + case <-d.ctx.Done(): + d.l.Unlock() + return + } + + // Clean up the nodes + for _, nodeID := range batch { + delete(d.nodes, nodeID) + } + d.l.Unlock() + case <-d.trigger: + } + + CALC: + deadline, ok := d.calculateNextDeadline() + if !ok { + continue + } + + if !deadline.Equal(nextDeadline) { + timer.Reset(deadline.Sub(time.Now())) + nextDeadline = deadline + } + } +} + +// calculateNextDeadline returns the next deadline in which to scan for +// deadlined nodes. It applies the coalesce window. +func (d *deadlineHeap) calculateNextDeadline() (time.Time, bool) { + d.l.Lock() + defer d.l.Unlock() + + if len(d.nodes) == 0 { + return time.Time{}, false + } + + // Calculate the new timer value + var deadline time.Time + for _, v := range d.nodes { + if deadline.IsZero() || v.Before(deadline) { + deadline = v + } + } + + var maxWithinWindow time.Time + coalescedDeadline := deadline.Add(d.coalesceWindow) + for _, nodeDeadline := range d.nodes { + if nodeDeadline.Before(coalescedDeadline) { + if maxWithinWindow.IsZero() || nodeDeadline.After(maxWithinWindow) { + maxWithinWindow = nodeDeadline + } + } + } + + return maxWithinWindow, true } -func (d *deadlineHeap) NextBatch() <-chan []structs.Node { return nil } -func (d *deadlineHeap) Remove(nodeID string) {} -func (d *deadlineHeap) Watch(nodeID string, deadline time.Time) {} +// NextBatch returns the next batch of nodes to be drained. +func (d *deadlineHeap) NextBatch() <-chan []string { + return d.batch +} + +func (d *deadlineHeap) Remove(nodeID string) { + d.l.Lock() + defer d.l.Unlock() + delete(d.nodes, nodeID) + + select { + case d.trigger <- nodeID: + default: + } +} + +func (d *deadlineHeap) Watch(nodeID string, deadline time.Time) { + d.l.Lock() + defer d.l.Unlock() + d.nodes[nodeID] = deadline + + select { + case d.trigger <- nodeID: + default: + } +} diff --git a/nomad/drainerv2/drain_heap_test.go b/nomad/drainerv2/drain_heap_test.go new file mode 100644 index 000000000000..a47a98ff7473 --- /dev/null +++ b/nomad/drainerv2/drain_heap_test.go @@ -0,0 +1,149 @@ +package drainerv2 + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestDeadlineHeap_Interface(t *testing.T) { + t.Parallel() + require := require.New(t) + h := NewDeadlineHeap(context.Background(), 1*time.Second) + require.Implements((*DrainDeadlineNotifier)(nil), h) +} + +func TestDeadlineHeap_WatchAndGet(t *testing.T) { + t.Parallel() + require := require.New(t) + h := NewDeadlineHeap(context.Background(), 1*time.Second) + + now := time.Now() + nodeID := "1" + wait := 10 * time.Millisecond + deadline := now.Add(wait) + h.Watch(nodeID, deadline) + + var batch []string + select { + case batch = <-h.NextBatch(): + case <-time.After(2 * wait): + t.Fatal("timeout") + } + + require.Len(batch, 1) + require.Equal(nodeID, batch[0]) +} + +func TestDeadlineHeap_WatchThenUpdateAndGet(t *testing.T) { + t.Parallel() + require := require.New(t) + h := NewDeadlineHeap(context.Background(), 1*time.Second) + + now := time.Now() + nodeID := "1" + wait := 10 * time.Millisecond + deadline := now.Add(wait) + + // Initially watch way in the future + h.Watch(nodeID, now.Add(24*time.Hour)) + + // Rewatch + h.Watch(nodeID, deadline) + + var batch []string + select { + case batch = <-h.NextBatch(): + case <-time.After(2 * wait): + t.Fatal("timeout") + } + + require.Len(batch, 1) + require.Equal(nodeID, batch[0]) +} + +func TestDeadlineHeap_MultiwatchAndDelete(t *testing.T) { + t.Parallel() + require := require.New(t) + h := NewDeadlineHeap(context.Background(), 1*time.Second) + + now := time.Now() + wait := 50 * time.Millisecond + deadline := now.Add(wait) + + nodeID1 := "1" + nodeID2 := "2" + h.Watch(nodeID1, deadline) + h.Watch(nodeID2, deadline) + + time.Sleep(1 * time.Millisecond) + h.Remove(nodeID2) + + var batch []string + select { + case batch = <-h.NextBatch(): + case <-time.After(2 * wait): + t.Fatal("timeout") + } + + require.Len(batch, 1) + require.Equal(nodeID1, batch[0]) +} + +func TestDeadlineHeap_WatchCoalesce(t *testing.T) { + t.Parallel() + require := require.New(t) + h := NewDeadlineHeap(context.Background(), 250*time.Millisecond) + + now := time.Now() + + group1 := map[string]time.Time{ + "1": now.Add(5 * time.Millisecond), + "2": now.Add(10 * time.Millisecond), + "3": now.Add(20 * time.Millisecond), + "4": now.Add(100 * time.Millisecond), + } + + group2 := map[string]time.Time{ + "10": now.Add(355 * time.Millisecond), + "11": now.Add(360 * time.Millisecond), + } + + for _, g := range []map[string]time.Time{group1, group2} { + for n, d := range g { + h.Watch(n, d) + } + } + + var batch []string + select { + case batch = <-h.NextBatch(): + case <-time.After(1 * time.Second): + t.Fatal("timeout") + } + + require.Len(batch, len(group1)) + for nodeID := range group1 { + require.Contains(batch, nodeID) + } + batch = nil + + select { + case batch = <-h.NextBatch(): + case <-time.After(2 * time.Second): + t.Fatal("timeout") + } + + require.Len(batch, len(group2)) + for nodeID := range group2 { + require.Contains(batch, nodeID) + } + + select { + case <-h.NextBatch(): + t.Fatal("unexpected batch") + case <-time.After(100 * time.Millisecond): + } +} diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go index a7156dc91d9c..6e9b4b73b570 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainerv2/drainer.go @@ -140,7 +140,7 @@ func (n *NodeDrainer) run(ctx context.Context) { } } -func (n *NodeDrainer) handleDeadlinedNodes(nodes []*structs.Node) { +func (n *NodeDrainer) handleDeadlinedNodes(nodes []string) { // TODO } From da368105e6317564ad5d251780ff26c57130f656 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 2 Mar 2018 17:15:38 -0800 Subject: [PATCH 38/79] node watcher --- nomad/drainerv2/drain_interfaces.go | 1 - nomad/drainerv2/drain_testing.go | 46 ++++++++ nomad/drainerv2/drainer.go | 27 +++-- nomad/drainerv2/draining_node.go | 8 +- nomad/drainerv2/watch_nodes.go | 177 +++++++++++++++++++++++++++- nomad/drainerv2/watch_nodes_test.go | 153 ++++++++++++++++++++++++ 6 files changed, 397 insertions(+), 15 deletions(-) delete mode 100644 nomad/drainerv2/drain_interfaces.go create mode 100644 nomad/drainerv2/drain_testing.go create mode 100644 nomad/drainerv2/watch_nodes_test.go diff --git a/nomad/drainerv2/drain_interfaces.go b/nomad/drainerv2/drain_interfaces.go deleted file mode 100644 index 008537619830..000000000000 --- a/nomad/drainerv2/drain_interfaces.go +++ /dev/null @@ -1 +0,0 @@ -package drainerv2 diff --git a/nomad/drainerv2/drain_testing.go b/nomad/drainerv2/drain_testing.go new file mode 100644 index 000000000000..af143894bd93 --- /dev/null +++ b/nomad/drainerv2/drain_testing.go @@ -0,0 +1,46 @@ +package drainerv2 + +import ( + "sync" + + "github.com/hashicorp/nomad/nomad/structs" +) + +type MockNodeTrackerEvent struct { + NodeUpdate *structs.Node + NodeRemove string +} + +type MockNodeTracker struct { + Nodes map[string]*structs.Node + Events []*MockNodeTrackerEvent + sync.Mutex +} + +func NewMockNodeTracker() *MockNodeTracker { + return &MockNodeTracker{ + Nodes: make(map[string]*structs.Node), + Events: make([]*MockNodeTrackerEvent, 0, 16), + } +} + +func (m *MockNodeTracker) Tracking(nodeID string) (*structs.Node, bool) { + m.Lock() + defer m.Unlock() + n, ok := m.Nodes[nodeID] + return n, ok +} + +func (m *MockNodeTracker) Remove(nodeID string) { + m.Lock() + defer m.Unlock() + delete(m.Nodes, nodeID) + m.Events = append(m.Events, &MockNodeTrackerEvent{NodeRemove: nodeID}) +} + +func (m *MockNodeTracker) Update(node *structs.Node) { + m.Lock() + defer m.Unlock() + m.Nodes[node.ID] = node + m.Events = append(m.Events, &MockNodeTrackerEvent{NodeUpdate: node}) +} diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go index 6e9b4b73b570..18b07eff5606 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainerv2/drainer.go @@ -4,12 +4,19 @@ import ( "context" "log" "sync" + "time" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "golang.org/x/time/rate" ) +var ( + // stateReadErrorDelay is the delay to apply before retrying reading state + // when there is an error + stateReadErrorDelay = 1 * time.Second +) + const ( // LimitStateQueriesPerSecond is the number of state queries allowed per // second @@ -27,8 +34,14 @@ type AllocDrainer interface { drain(allocs []*structs.Allocation) } -type DrainingJobWatcherFactory func(context.Context, *rate.Limiter, AllocDrainer) DrainingJobWatcher -type DrainingNodeWatcherFactory func(context.Context, *rate.Limiter, AllocDrainer) DrainingNodeWatcher +type NodeTracker interface { + Tracking(nodeID string) (*structs.Node, bool) + Remove(nodeID string) + Update(node *structs.Node) +} + +type DrainingJobWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger, AllocDrainer) DrainingJobWatcher +type DrainingNodeWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger, NodeTracker) DrainingNodeWatcher type DrainDeadlineNotifierFactory func(context.Context) DrainDeadlineNotifier type NodeDrainerConfig struct { @@ -116,8 +129,8 @@ func (n *NodeDrainer) flush() { } n.ctx, n.exitFn = context.WithCancel(context.Background()) - n.jobWatcher = n.jobFactory(n.ctx, n.queryLimiter, n) - n.nodeWatcher = n.nodeFactory(n.ctx, n.queryLimiter, n) + n.jobWatcher = n.jobFactory(n.ctx, n.queryLimiter, n.state, n.logger, n) + n.nodeWatcher = n.nodeFactory(n.ctx, n.queryLimiter, n.state, n.logger, n) n.deadlineNotifier = n.deadlineNotifierFactory(n.ctx) n.nodes = make(map[string]*drainingNode, 32) n.doneNodeCh = make(chan string, 4) @@ -130,8 +143,6 @@ func (n *NodeDrainer) run(ctx context.Context) { return case nodes := <-n.deadlineNotifier.NextBatch(): n.handleDeadlinedNodes(nodes) - case nodes := <-n.nodeWatcher.Transistioning(): - n.handleNodeDrainTransistion(nodes) case allocs := <-n.jobWatcher.Drain(): n.handleJobAllocDrain(allocs) case node := <-n.doneNodeCh: @@ -144,10 +155,6 @@ func (n *NodeDrainer) handleDeadlinedNodes(nodes []string) { // TODO } -func (n *NodeDrainer) handleNodeDrainTransistion(nodes []*structs.Node) { - // TODO -} - func (n *NodeDrainer) handleJobAllocDrain(allocs []*structs.Allocation) { // TODO diff --git a/nomad/drainerv2/draining_node.go b/nomad/drainerv2/draining_node.go index 3150be1fd52d..32233573b3e3 100644 --- a/nomad/drainerv2/draining_node.go +++ b/nomad/drainerv2/draining_node.go @@ -12,7 +12,7 @@ import ( // using a singleton object type drainCoordinator interface { - done(nodeID string) + nodeDone(nodeID string) } func (n *NodeDrainer) nodeDone(nodeID string) { @@ -37,6 +37,12 @@ func NewDrainingNode(node *structs.Node, state *state.StateStore, coordinator dr } } +func (n *drainingNode) GetNode() *structs.Node { + n.l.Lock() + defer n.l.Unlock() + return n.node +} + func (n *drainingNode) Update(node *structs.Node) { n.l.Lock() defer n.l.Unlock() diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainerv2/watch_nodes.go index 623c2edb234f..ddf5f2b9a8f9 100644 --- a/nomad/drainerv2/watch_nodes.go +++ b/nomad/drainerv2/watch_nodes.go @@ -1,7 +1,178 @@ package drainerv2 -import "github.com/hashicorp/nomad/nomad/structs" +import ( + "context" + "log" + "time" -type DrainingNodeWatcher interface { - Transistioning() <-chan []*structs.Node + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" + "golang.org/x/time/rate" +) + +// DrainingNodeWatcher is the interface for watching for draining nodes. +type DrainingNodeWatcher interface{} + +// Tracking returns the whether the node is being tracked and if so the copy of +// the node object that is tracked. +func (n *NodeDrainer) Tracking(nodeID string) (*structs.Node, bool) { + n.l.RLock() + defer n.l.RUnlock() + + draining, ok := n.nodes[nodeID] + if !ok { + return nil, false + } + + return draining.GetNode(), true +} + +// Remove removes the given node from being tracked +func (n *NodeDrainer) Remove(nodeID string) { + n.l.Lock() + defer n.l.Unlock() + delete(n.nodes, nodeID) +} + +// Update updates the node, either updating the tracked version or starting to +// track the node. +func (n *NodeDrainer) Update(node *structs.Node) { + n.l.Lock() + defer n.l.Unlock() + + if node == nil { + return + } + + draining, ok := n.nodes[node.ID] + if !ok { + n.nodes[node.ID] = NewDrainingNode(node, n.state, n) + return + } + + draining.Update(node) +} + +// nodeDrainWatcher is used to watch nodes that are entering, leaving or +// changing their drain strategy. +type nodeDrainWatcher struct { + ctx context.Context + logger *log.Logger + + // state is the state that is watched for state changes. + state *state.StateStore + + // limiter is used to limit the rate of blocking queries + limiter *rate.Limiter + + // tracker is the object that is tracking the nodes and provides us with the + // needed callbacks + tracker NodeTracker +} + +// NewNodeDrainWatcher returns a new node drain watcher. +func NewNodeDrainWatcher(ctx context.Context, limiter *rate.Limiter, state *state.StateStore, logger *log.Logger, tracker NodeTracker) *nodeDrainWatcher { + w := &nodeDrainWatcher{ + ctx: ctx, + limiter: limiter, + logger: logger, + tracker: tracker, + state: state, + } + + go w.watch() + return w +} + +// watch is the long lived watching routine that detects node changes. +func (w *nodeDrainWatcher) watch() { + nindex := uint64(1) + for { + w.logger.Printf("[TRACE] nomad.drain.node_watcher: getting nodes at index %d", nindex) + nodes, index, err := w.getNodes(nindex) + if err != nil { + if err == context.Canceled { + w.logger.Printf("[TRACE] nomad.drain.node_watcher: shutting down") + return + } + + w.logger.Printf("[ERR] nomad.drain.node_watcher: error watching node updates at index %d: %v", nindex, err) + select { + case <-w.ctx.Done(): + w.logger.Printf("[TRACE] nomad.drain.node_watcher: shutting down") + return + case <-time.After(stateReadErrorDelay): + continue + } + } + + // update index for next run + nindex = index + + for _, node := range nodes { + newDraining := node.DrainStrategy != nil + currentNode, tracked := w.tracker.Tracking(node.ID) + + switch { + // If the node is tracked but not draining, untrack + case tracked && !newDraining: + w.logger.Printf("[TRACE] nomad.drain.node_watcher: tracked node %q is no longer draining", node.ID) + w.tracker.Remove(node.ID) + + // If the node is not being tracked but is draining, track + case !tracked && newDraining: + w.logger.Printf("[TRACE] nomad.drain.node_watcher: untracked node %q is draining", node.ID) + w.tracker.Update(node) + + // If the node is being tracked but has changed, update: + case tracked && newDraining && !currentNode.DrainStrategy.Equal(node.DrainStrategy): + w.logger.Printf("[TRACE] nomad.drain.node_watcher: tracked node %q has updated drain", node.ID) + w.tracker.Update(node) + default: + w.logger.Printf("[TRACE] nomad.drain.node_watcher: node %q at index %v: tracked %v, draining %v", node.ID, node.ModifyIndex, tracked, newDraining) + } + } + } +} + +// getNodes returns all nodes blocking until the nodes are after the given index. +func (w *nodeDrainWatcher) getNodes(minIndex uint64) ([]*structs.Node, uint64, error) { + if err := w.limiter.Wait(w.ctx); err != nil { + return nil, 0, err + } + + resp, index, err := w.state.BlockingQuery(w.getNodesImpl, minIndex, w.ctx) + if err != nil { + return nil, 0, err + } + + return resp.([]*structs.Node), index, nil +} + +// getNodesImpl is used to get nodes from the state store, returning the set of +// nodes and the given index. +func (w *nodeDrainWatcher) getNodesImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + iter, err := state.Nodes(ws) + if err != nil { + return nil, 0, err + } + + index, err := state.Index("nodes") + if err != nil { + return nil, 0, err + } + + resp := make([]*structs.Node, 0, 64) + for { + raw := iter.Next() + if raw == nil { + break + } + + node := raw.(*structs.Node) + resp = append(resp, node) + } + + return resp, index, nil } diff --git a/nomad/drainerv2/watch_nodes_test.go b/nomad/drainerv2/watch_nodes_test.go new file mode 100644 index 000000000000..8b3a63e1c250 --- /dev/null +++ b/nomad/drainerv2/watch_nodes_test.go @@ -0,0 +1,153 @@ +package drainerv2 + +import ( + "context" + "testing" + "time" + + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/require" + "golang.org/x/time/rate" +) + +func testNodeDrainWatcher(t *testing.T) (*nodeDrainWatcher, *state.StateStore, *MockNodeTracker) { + t.Helper() + + sconfig := &state.StateStoreConfig{ + LogOutput: testlog.NewWriter(t), + Region: "global", + } + state, err := state.NewStateStore(sconfig) + if err != nil { + t.Fatalf("failed to create state store: %v", err) + } + + limiter := rate.NewLimiter(100.0, 100) + logger := testlog.Logger(t) + m := NewMockNodeTracker() + w := NewNodeDrainWatcher(context.Background(), limiter, state, logger, m) + return w, state, m +} + +func TestNodeDrainWatcher_Interface(t *testing.T) { + t.Parallel() + require := require.New(t) + w, _, _ := testNodeDrainWatcher(t) + require.Implements((*DrainingNodeWatcher)(nil), w) +} + +func TestNodeDrainWatcher_AddDraining(t *testing.T) { + t.Parallel() + require := require.New(t) + _, state, m := testNodeDrainWatcher(t) + + // Create two nodes, one draining and one not draining + n1, n2 := mock.Node(), mock.Node() + n2.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: time.Hour, + }, + ForceDeadline: time.Now().Add(time.Hour), + } + + require.Nil(state.UpsertNode(100, n1)) + require.Nil(state.UpsertNode(101, n2)) + + testutil.WaitForResult(func() (bool, error) { + return len(m.Events) == 1, nil + }, func(err error) { + t.Fatal("No node drain events") + }) + + _, ok1 := m.Tracking(n1.ID) + out2, ok2 := m.Tracking(n2.ID) + require.False(ok1) + require.True(ok2) + require.Equal(n2, out2) + +} + +func TestNodeDrainWatcher_Remove(t *testing.T) { + t.Parallel() + require := require.New(t) + _, state, m := testNodeDrainWatcher(t) + + // Create a draining node + n := mock.Node() + n.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: time.Hour, + }, + ForceDeadline: time.Now().Add(time.Hour), + } + + // Wait for it to be tracked + require.Nil(state.UpsertNode(100, n)) + testutil.WaitForResult(func() (bool, error) { + return len(m.Events) == 1, nil + }, func(err error) { + t.Fatal("No node drain events") + }) + + out, ok := m.Tracking(n.ID) + require.True(ok) + require.Equal(n, out) + + // Change the node to be not draining and wait for it to be untracked + require.Nil(state.UpdateNodeDrain(101, n.ID, nil)) + testutil.WaitForResult(func() (bool, error) { + return len(m.Events) == 2, nil + }, func(err error) { + t.Fatal("No new node drain events") + }) + + _, ok = m.Tracking(n.ID) + require.False(ok) +} + +func TestNodeDrainWatcher_Update(t *testing.T) { + t.Parallel() + require := require.New(t) + _, state, m := testNodeDrainWatcher(t) + + // Create a draining node + n := mock.Node() + n.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: time.Hour, + }, + ForceDeadline: time.Now().Add(time.Hour), + } + + // Wait for it to be tracked + require.Nil(state.UpsertNode(100, n)) + testutil.WaitForResult(func() (bool, error) { + return len(m.Events) == 1, nil + }, func(err error) { + t.Fatal("No node drain events") + }) + + out, ok := m.Tracking(n.ID) + require.True(ok) + require.Equal(n, out) + + // Change the node to have a new spec + s2 := n.DrainStrategy.Copy() + s2.Deadline += time.Hour + require.Nil(state.UpdateNodeDrain(101, n.ID, s2)) + + // Wait for it to be updated + testutil.WaitForResult(func() (bool, error) { + return len(m.Events) == 2, nil + }, func(err error) { + t.Fatal("No new node drain events") + }) + + out, ok = m.Tracking(n.ID) + require.True(ok) + require.Equal(out.DrainStrategy, s2) +} From d45532d038a4f887be2ee870ca3b55cc5f8bf9a2 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 2 Mar 2018 17:24:48 -0800 Subject: [PATCH 39/79] Node's being untracked or having updated deadlines, updates the deadliner --- nomad/drainerv2/watch_nodes.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainerv2/watch_nodes.go index ddf5f2b9a8f9..7b0bd8573389 100644 --- a/nomad/drainerv2/watch_nodes.go +++ b/nomad/drainerv2/watch_nodes.go @@ -32,7 +32,11 @@ func (n *NodeDrainer) Tracking(nodeID string) (*structs.Node, bool) { func (n *NodeDrainer) Remove(nodeID string) { n.l.Lock() defer n.l.Unlock() + + // TODO test the notifier is updated + // Remove it from being tracked and remove it from the dealiner delete(n.nodes, nodeID) + n.deadlineNotifier.Remove(nodeID) } // Update updates the node, either updating the tracked version or starting to @@ -51,7 +55,21 @@ func (n *NodeDrainer) Update(node *structs.Node) { return } + // Update it and update the dealiner draining.Update(node) + + // TODO test the notifier is updated + if inf, deadline := node.DrainStrategy.DeadlineTime(); !inf { + n.deadlineNotifier.Watch(node.ID, deadline) + } else { + // TODO think about handling any race that may occur. I believe it is + // totally fine as long as the handlers are locked. + + // There is an infinite deadline so it shouldn't be tracked for + // deadlining + n.deadlineNotifier.Remove(node.ID) + } + } // nodeDrainWatcher is used to watch nodes that are entering, leaving or From 0e51b2065745c94651c74231229bd2bcf376ec58 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 6 Mar 2018 10:12:17 -0800 Subject: [PATCH 40/79] job watcher --- nomad/drainerv2/drain_testing.go | 5 +- nomad/drainerv2/drainer.go | 7 +- nomad/drainerv2/watch_jobs.go | 411 +++++++++++++++++++++++++++- nomad/drainerv2/watch_jobs_test.go | 372 +++++++++++++++++++++++++ nomad/drainerv2/watch_nodes.go | 43 +-- nomad/drainerv2/watch_nodes_test.go | 69 +++-- nomad/state/testing.go | 5 +- nomad/structs/structs.go | 20 ++ 8 files changed, 888 insertions(+), 44 deletions(-) create mode 100644 nomad/drainerv2/watch_jobs_test.go diff --git a/nomad/drainerv2/drain_testing.go b/nomad/drainerv2/drain_testing.go index af143894bd93..60d710e4a593 100644 --- a/nomad/drainerv2/drain_testing.go +++ b/nomad/drainerv2/drain_testing.go @@ -24,11 +24,10 @@ func NewMockNodeTracker() *MockNodeTracker { } } -func (m *MockNodeTracker) Tracking(nodeID string) (*structs.Node, bool) { +func (m *MockNodeTracker) TrackedNodes() map[string]*structs.Node { m.Lock() defer m.Unlock() - n, ok := m.Nodes[nodeID] - return n, ok + return m.Nodes } func (m *MockNodeTracker) Remove(nodeID string) { diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go index 18b07eff5606..d78019b8499e 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainerv2/drainer.go @@ -35,12 +35,12 @@ type AllocDrainer interface { } type NodeTracker interface { - Tracking(nodeID string) (*structs.Node, bool) + TrackedNodes() map[string]*structs.Node Remove(nodeID string) Update(node *structs.Node) } -type DrainingJobWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger, AllocDrainer) DrainingJobWatcher +type DrainingJobWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger) DrainingJobWatcher type DrainingNodeWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger, NodeTracker) DrainingNodeWatcher type DrainDeadlineNotifierFactory func(context.Context) DrainDeadlineNotifier @@ -129,7 +129,7 @@ func (n *NodeDrainer) flush() { } n.ctx, n.exitFn = context.WithCancel(context.Background()) - n.jobWatcher = n.jobFactory(n.ctx, n.queryLimiter, n.state, n.logger, n) + n.jobWatcher = n.jobFactory(n.ctx, n.queryLimiter, n.state, n.logger) n.nodeWatcher = n.nodeFactory(n.ctx, n.queryLimiter, n.state, n.logger, n) n.deadlineNotifier = n.deadlineNotifierFactory(n.ctx) n.nodes = make(map[string]*drainingNode, 32) @@ -146,6 +146,7 @@ func (n *NodeDrainer) run(ctx context.Context) { case allocs := <-n.jobWatcher.Drain(): n.handleJobAllocDrain(allocs) case node := <-n.doneNodeCh: + // TODO probably remove this as a channel n.handleDoneNode(node) } } diff --git a/nomad/drainerv2/watch_jobs.go b/nomad/drainerv2/watch_jobs.go index 836cea6856e6..a2e6ef45ef50 100644 --- a/nomad/drainerv2/watch_jobs.go +++ b/nomad/drainerv2/watch_jobs.go @@ -1,8 +1,417 @@ package drainerv2 -import "github.com/hashicorp/nomad/nomad/structs" +import ( + "context" + "fmt" + "log" + "sync" + "time" + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" + "golang.org/x/time/rate" +) + +// DrainingJobWatcher is the interface for watching a job drain type DrainingJobWatcher interface { + // RegisterJob is used to start watching a draining job RegisterJob(jobID, namespace string) + + // TODO This should probably be a drain future such that we can block the + // next loop till the raft apply happens such that we don't emit the same + // drain many times. We would get the applied index back and block till + // then. + // Drain is used to emit allocations that should be drained. Drain() <-chan []*structs.Allocation + + // Migrated is allocations for draining jobs that have transistioned to + // stop. There is no guarantee that duplicates won't be published. + Migrated() <-chan []*structs.Allocation +} + +// drainingJobWatcher is used to watch draining jobs and emit events when +// draining allocations have replacements +type drainingJobWatcher struct { + ctx context.Context + logger *log.Logger + + // state is the state that is watched for state changes. + state *state.StateStore + + // limiter is used to limit the rate of blocking queries + limiter *rate.Limiter + + // jobs is the set of tracked jobs. + jobs map[structs.JobNs]struct{} + + // queryCtx is used to cancel a blocking query. + queryCtx context.Context + queryCancel context.CancelFunc + + // drainCh and migratedCh are used to emit allocations + drainCh chan []*structs.Allocation + migratedCh chan []*structs.Allocation + + l sync.RWMutex +} + +// NewDrainingJobWatcher returns a new job watcher. The caller is expected to +// cancel the context to clean up the drainer. +func NewDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *state.StateStore, logger *log.Logger) *drainingJobWatcher { + + // Create a context that can cancel the blocking query so that when a new + // job gets registered it is handled. + queryCtx, queryCancel := context.WithCancel(ctx) + + w := &drainingJobWatcher{ + ctx: ctx, + queryCtx: queryCtx, + queryCancel: queryCancel, + limiter: limiter, + logger: logger, + state: state, + jobs: make(map[structs.JobNs]struct{}, 64), + drainCh: make(chan []*structs.Allocation, 8), + migratedCh: make(chan []*structs.Allocation, 8), + } + + go w.watch() + return w +} + +// RegisterJob marks the given job as draining and adds it to being watched. +func (w *drainingJobWatcher) RegisterJob(jobID, namespace string) { + w.l.Lock() + defer w.l.Unlock() + + jns := structs.JobNs{ + ID: jobID, + Namespace: namespace, + } + if _, ok := w.jobs[jns]; ok { + return + } + + // Add the job and cancel the context + w.jobs[jns] = struct{}{} + w.queryCancel() + + // Create a new query context + w.queryCtx, w.queryCancel = context.WithCancel(w.ctx) +} + +// Drain returns the channel that emits allocations to drain. +func (w *drainingJobWatcher) Drain() <-chan []*structs.Allocation { + return w.drainCh +} + +// Migrated returns the channel that emits allocations for draining jobs that +// have been migrated. +func (w *drainingJobWatcher) Migrated() <-chan []*structs.Allocation { + return w.migratedCh +} + +// deregisterJob removes the job from being watched. +func (w *drainingJobWatcher) deregisterJob(jobID, namespace string) { + w.l.Lock() + defer w.l.Unlock() + jns := structs.JobNs{ + ID: jobID, + Namespace: namespace, + } + delete(w.jobs, jns) + w.logger.Printf("[TRACE] nomad.drain.job_watcher: deregistering job %v", jns) +} + +// watch is the long lived watching routine that detects job drain changes. +func (w *drainingJobWatcher) watch() { + jindex := uint64(1) + for { + w.logger.Printf("[TRACE] nomad.drain.job_watcher: getting job allocs at index %d", jindex) + jobAllocs, index, err := w.getJobAllocs(w.getQueryCtx(), jindex) + if err != nil { + if err == context.Canceled { + // Determine if it is a cancel or a shutdown + select { + case <-w.ctx.Done(): + w.logger.Printf("[TRACE] nomad.drain.job_watcher: shutting down") + return + default: + // The query context was cancelled + continue + } + } + + w.logger.Printf("[ERR] nomad.drain.job_watcher: error watching job allocs updates at index %d: %v", jindex, err) + select { + case <-w.ctx.Done(): + w.logger.Printf("[TRACE] nomad.drain.job_watcher: shutting down") + return + case <-time.After(stateReadErrorDelay): + continue + } + } + + // update index for next run + lastHandled := jindex + jindex = index + + // Snapshot the state store + snap, err := w.state.Snapshot() + if err != nil { + w.logger.Printf("[WARN] nomad.drain.job_watcher: failed to snapshot statestore: %v", err) + continue + } + + currentJobs := w.drainingJobs() + var allDrain, allMigrated []*structs.Allocation + for job, allocs := range jobAllocs { + // Check if the job is still registered + if _, ok := currentJobs[job]; !ok { + continue + } + + w.logger.Printf("[TRACE] nomad.drain.job_watcher: handling job %v", job) + + // Lookup the job + job, err := w.state.JobByID(nil, job.Namespace, job.ID) + if err != nil { + w.logger.Printf("[WARN] nomad.drain.job_watcher: failed to lookup job %v: %v", job, err) + continue + } + + // Ignore all non-service jobs + if job.Type != structs.JobTypeService { + w.deregisterJob(job.ID, job.Namespace) + continue + } + + result, err := handleJob(snap, job, allocs, lastHandled) + if err != nil { + w.logger.Printf("[ERR] nomad.drain.job_watcher: handling drain for job %v failed: %v", job, err) + continue + } + + allDrain = append(allDrain, result.drain...) + allMigrated = append(allMigrated, result.migrated...) + + // Stop tracking this job + if result.done { + w.deregisterJob(job.ID, job.Namespace) + } + } + + if allDrain != nil { + select { + case w.drainCh <- allDrain: + case <-w.ctx.Done(): + w.logger.Printf("[TRACE] nomad.drain.job_watcher: shutting down") + return + } + } + + if allMigrated != nil { + select { + case w.migratedCh <- allMigrated: + case <-w.ctx.Done(): + w.logger.Printf("[TRACE] nomad.drain.job_watcher: shutting down") + return + } + } + } +} + +// jobResult is the set of actions to take for a draining job given its current +// state. +type jobResult struct { + // drain is the set of allocations to emit for draining. + drain []*structs.Allocation + + // migrated is the set of allocations to emit as migrated + migrated []*structs.Allocation + + // done marks whether the job has been fully drained. + done bool +} + +// newJobResult returns an initialized jobResult +func newJobResult() *jobResult { + return &jobResult{ + done: true, + } +} + +// handleJob takes the state of a draining job and returns the desired actions. +func handleJob(snap *state.StateSnapshot, job *structs.Job, allocs []*structs.Allocation, lastHandledIndex uint64) (*jobResult, error) { + r := newJobResult() + taskGroups := make(map[string]*structs.TaskGroup, len(job.TaskGroups)) + for _, tg := range job.TaskGroups { + if tg.Migrate != nil { + // TODO handle the upgrade path + // Only capture the groups that have a migrate strategy + taskGroups[tg.Name] = tg + } + } + + // Sort the allocations by TG + tgAllocs := make(map[string][]*structs.Allocation, len(taskGroups)) + for _, alloc := range allocs { + if _, ok := taskGroups[alloc.TaskGroup]; !ok { + continue + } + + tgAllocs[alloc.TaskGroup] = append(tgAllocs[alloc.TaskGroup], alloc) + } + + for name, tg := range taskGroups { + allocs := tgAllocs[name] + if err := handleTaskGroup(snap, tg, allocs, lastHandledIndex, r); err != nil { + return nil, fmt.Errorf("drain for task group %q failed: %v", name, err) + } + } + + return r, nil +} + +// handleTaskGroup takes the state of a draining task group and computes the desired actions. +func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, + allocs []*structs.Allocation, lastHandledIndex uint64, result *jobResult) error { + + // Determine how many allocations can be drained + drainingNodes := make(map[string]bool, 4) + healthy := 0 + remainingDrainingAlloc := false + var drainable []*structs.Allocation + + for _, alloc := range allocs { + // Check if the alloc is on a draining node. + onDrainingNode, ok := drainingNodes[alloc.NodeID] + if !ok { + // Look up the node + node, err := snap.NodeByID(nil, alloc.NodeID) + if err != nil { + return err + } + + onDrainingNode = node.DrainStrategy != nil + drainingNodes[node.ID] = onDrainingNode + } + + // Check if the alloc should be considered migrated. A migrated + // allocation is one that is terminal, is on a draining + // allocation, and has only happened since our last handled index to + // avoid emitting many duplicate migrate events. + if alloc.TerminalStatus() && + onDrainingNode && + alloc.ModifyIndex > lastHandledIndex { + result.migrated = append(result.migrated, alloc) + continue + } + + // If the alloc is running and has its deployment status set, it is + // considered healthy from a migration standpoint. + if !alloc.TerminalStatus() && + alloc.DeploymentStatus != nil && + alloc.DeploymentStatus.Healthy != nil { + healthy++ + } + + // An alloc can't be considered for migration if: + // - It isn't on a draining node + // - It is already terminal + // - It has already been marked for draining + if !onDrainingNode || alloc.TerminalStatus() || alloc.DesiredTransition.ShouldMigrate() { + continue + } + + // This alloc is drainable, so capture it and the fact that the job + // isn't done draining yet. + remainingDrainingAlloc = true + drainable = append(drainable, alloc) + } + + // Update the done status + if remainingDrainingAlloc { + result.done = false + } + + // Determine how many we can drain + thresholdCount := tg.Count - tg.Migrate.MaxParallel + numToDrain := healthy - thresholdCount + numToDrain = helper.IntMin(len(drainable), numToDrain) + if numToDrain <= 0 { + return nil + } + + result.drain = append(result.drain, drainable[0:numToDrain]...) + return nil +} + +// getJobAllocs returns all allocations for draining jobs +func (w *drainingJobWatcher) getJobAllocs(ctx context.Context, minIndex uint64) (map[structs.JobNs][]*structs.Allocation, uint64, error) { + if err := w.limiter.Wait(ctx); err != nil { + return nil, 0, err + } + + resp, index, err := w.state.BlockingQuery(w.getJobAllocsImpl, minIndex, ctx) + if err != nil { + return nil, 0, err + } + + return resp.(map[structs.JobNs][]*structs.Allocation), index, nil +} + +// getJobAllocsImpl returns a map of draining jobs to their allocations. +func (w *drainingJobWatcher) getJobAllocsImpl(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + index, err := state.Index("allocs") + if err != nil { + return nil, 0, err + } + + // Capture the draining jobs. + draining := w.drainingJobs() + l := len(draining) + if l == 0 { + return nil, index, nil + } + + // Capture the allocs for each draining job. + resp := make(map[structs.JobNs][]*structs.Allocation, l) + for jns := range draining { + allocs, err := state.AllocsByJob(ws, jns.Namespace, jns.ID, false) + if err != nil { + return nil, index, err + } + + resp[jns] = allocs + } + + return resp, index, nil +} + +// drainingJobs captures the set of draining jobs. +func (w *drainingJobWatcher) drainingJobs() map[structs.JobNs]struct{} { + w.l.RLock() + defer w.l.RUnlock() + + l := len(w.jobs) + if l == 0 { + return nil + } + + draining := make(map[structs.JobNs]struct{}, l) + for k := range w.jobs { + draining[k] = struct{}{} + } + + return draining +} + +// getQueryCtx is a helper for getting the query context. +func (w *drainingJobWatcher) getQueryCtx() context.Context { + w.l.RLock() + defer w.l.RUnlock() + return w.queryCtx } diff --git a/nomad/drainerv2/watch_jobs_test.go b/nomad/drainerv2/watch_jobs_test.go new file mode 100644 index 000000000000..6d9b1846ec5c --- /dev/null +++ b/nomad/drainerv2/watch_jobs_test.go @@ -0,0 +1,372 @@ +package drainerv2 + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/stretchr/testify/require" + "golang.org/x/time/rate" +) + +func testDrainingJobWatcher(t *testing.T) (*drainingJobWatcher, *state.StateStore) { + t.Helper() + + state := state.TestStateStore(t) + limiter := rate.NewLimiter(100.0, 100) + logger := testlog.Logger(t) + w := NewDrainingJobWatcher(context.Background(), limiter, state, logger) + return w, state +} + +func TestDrainingJobWatcher_Interface(t *testing.T) { + t.Parallel() + require := require.New(t) + w, _ := testDrainingJobWatcher(t) + require.Implements((*DrainingJobWatcher)(nil), w) +} + +// DrainingJobWatcher tests: +// TODO Test that several jobs allocation changes get batched +// TODO Test that jobs are deregistered when they have no more to migrate +// TODO Test that the watcher gets triggered on alloc changes +// TODO Test that the watcher cancels its query when a new job is registered + +func TestHandleTaskGroup_AllDone(t *testing.T) { + t.Parallel() + require := require.New(t) + + // Create a non-draining node + state := state.TestStateStore(t) + n := mock.Node() + require.Nil(state.UpsertNode(100, n)) + + job := mock.Job() + require.Nil(state.UpsertJob(101, job)) + + // Create 10 running allocs on the healthy node + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + a := mock.Alloc() + a.Job = job + a.TaskGroup = job.TaskGroups[0].Name + a.NodeID = n.ID + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + } + allocs = append(allocs, a) + } + require.Nil(state.UpsertAllocs(102, allocs)) + + snap, err := state.Snapshot() + require.Nil(err) + + res := &jobResult{} + require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res)) + require.Empty(res.drain) + require.Empty(res.migrated) + require.True(res.done) +} + +func TestHandleTaskGroup_AllOnDrainingNodes(t *testing.T) { + t.Parallel() + require := require.New(t) + + // The loop value sets the max parallel for the drain strategy + for i := 1; i < 8; i++ { + // Create a draining node + state := state.TestStateStore(t) + n := mock.Node() + n.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 5 * time.Minute, + }, + ForceDeadline: time.Now().Add(1 * time.Minute), + } + require.Nil(state.UpsertNode(100, n)) + + job := mock.Job() + job.TaskGroups[0].Migrate.MaxParallel = i + require.Nil(state.UpsertJob(101, job)) + + // Create 10 running allocs on the draining node + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + a := mock.Alloc() + a.Job = job + a.TaskGroup = job.TaskGroups[0].Name + a.NodeID = n.ID + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + } + allocs = append(allocs, a) + } + require.Nil(state.UpsertAllocs(102, allocs)) + + snap, err := state.Snapshot() + require.Nil(err) + + res := &jobResult{} + require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res)) + require.Len(res.drain, i) + require.Empty(res.migrated) + require.False(res.done) + } +} + +func TestHandleTaskGroup_MixedHealth(t *testing.T) { + cases := []struct { + maxParallel int + drainingNodeAllocs int + healthSet int + healthUnset int + expectedDrain int + expectedMigrated int + expectedDone bool + }{ + { + maxParallel: 2, + drainingNodeAllocs: 10, + healthSet: 0, + healthUnset: 0, + expectedDrain: 2, + expectedMigrated: 0, + expectedDone: false, + }, + { + maxParallel: 2, + drainingNodeAllocs: 9, + healthSet: 0, + healthUnset: 0, + expectedDrain: 1, + expectedMigrated: 1, + expectedDone: false, + }, + { + maxParallel: 5, + drainingNodeAllocs: 9, + healthSet: 0, + healthUnset: 0, + expectedDrain: 4, + expectedMigrated: 1, + expectedDone: false, + }, + { + maxParallel: 2, + drainingNodeAllocs: 5, + healthSet: 2, + healthUnset: 0, + expectedDrain: 0, + expectedMigrated: 5, + expectedDone: false, + }, + { + maxParallel: 2, + drainingNodeAllocs: 5, + healthSet: 3, + healthUnset: 0, + expectedDrain: 0, + expectedMigrated: 5, + expectedDone: false, + }, + { + maxParallel: 2, + drainingNodeAllocs: 5, + healthSet: 4, + healthUnset: 0, + expectedDrain: 1, + expectedMigrated: 5, + expectedDone: false, + }, + { + maxParallel: 2, + drainingNodeAllocs: 5, + healthSet: 4, + healthUnset: 1, + expectedDrain: 1, + expectedMigrated: 5, + expectedDone: false, + }, + { + maxParallel: 1, + drainingNodeAllocs: 5, + healthSet: 4, + healthUnset: 1, + expectedDrain: 0, + expectedMigrated: 5, + expectedDone: false, + }, + { + maxParallel: 3, + drainingNodeAllocs: 5, + healthSet: 3, + healthUnset: 0, + expectedDrain: 1, + expectedMigrated: 5, + expectedDone: false, + }, + { + maxParallel: 3, + drainingNodeAllocs: 0, + healthSet: 10, + healthUnset: 0, + expectedDrain: 0, + expectedMigrated: 10, + expectedDone: true, + }, + { + // Is the case where deadline is hit and all 10 are just marked + // stopped. We should detect the job as done. + maxParallel: 3, + drainingNodeAllocs: 0, + healthSet: 0, + healthUnset: 0, + expectedDrain: 0, + expectedMigrated: 10, + expectedDone: true, + }, + } + + for cnum, c := range cases { + t.Run(fmt.Sprintf("%d", cnum), func(t *testing.T) { + require := require.New(t) + + // Create a draining node + state := state.TestStateStore(t) + + drainingNode := mock.Node() + drainingNode.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 5 * time.Minute, + }, + ForceDeadline: time.Now().Add(1 * time.Minute), + } + require.Nil(state.UpsertNode(100, drainingNode)) + + healthyNode := mock.Node() + require.Nil(state.UpsertNode(101, healthyNode)) + + job := mock.Job() + job.TaskGroups[0].Migrate.MaxParallel = c.maxParallel + require.Nil(state.UpsertJob(101, job)) + + // Create running allocs on the draining node with health set + var allocs []*structs.Allocation + for i := 0; i < c.drainingNodeAllocs; i++ { + a := mock.Alloc() + a.Job = job + a.TaskGroup = job.TaskGroups[0].Name + a.NodeID = drainingNode.ID + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + } + allocs = append(allocs, a) + } + + // Create stopped allocs on the draining node + for i := 10 - c.drainingNodeAllocs; i > 0; i-- { + a := mock.Alloc() + a.Job = job + a.TaskGroup = job.TaskGroups[0].Name + a.NodeID = drainingNode.ID + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + } + a.DesiredStatus = structs.AllocDesiredStatusStop + allocs = append(allocs, a) + } + + // Create allocs on the healthy node with health set + for i := 0; i < c.healthSet; i++ { + a := mock.Alloc() + a.Job = job + a.TaskGroup = job.TaskGroups[0].Name + a.NodeID = healthyNode.ID + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + } + allocs = append(allocs, a) + } + + // Create allocs on the healthy node with health not set + for i := 0; i < c.healthUnset; i++ { + a := mock.Alloc() + a.Job = job + a.TaskGroup = job.TaskGroups[0].Name + a.NodeID = healthyNode.ID + allocs = append(allocs, a) + } + require.Nil(state.UpsertAllocs(103, allocs)) + + snap, err := state.Snapshot() + require.Nil(err) + + res := &jobResult{} + require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res)) + require.Len(res.drain, c.expectedDrain) + require.Len(res.migrated, c.expectedMigrated) + require.Equal(c.expectedDone, res.done) + }) + } +} + +func TestHandleTaskGroup_Migrations(t *testing.T) { + t.Parallel() + require := require.New(t) + + // Create a draining node + state := state.TestStateStore(t) + n := mock.Node() + n.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 5 * time.Minute, + }, + ForceDeadline: time.Now().Add(1 * time.Minute), + } + require.Nil(state.UpsertNode(100, n)) + + job := mock.Job() + require.Nil(state.UpsertJob(101, job)) + + // Create 10 done allocs + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + a := mock.Alloc() + a.Job = job + a.TaskGroup = job.TaskGroups[0].Name + a.NodeID = n.ID + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(false), + } + + if i%2 == 0 { + a.DesiredStatus = structs.AllocDesiredStatusStop + } else { + a.ClientStatus = structs.AllocClientStatusFailed + } + allocs = append(allocs, a) + } + require.Nil(state.UpsertAllocs(102, allocs)) + + snap, err := state.Snapshot() + require.Nil(err) + + // Handle before and after indexes + res := &jobResult{} + require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res)) + require.Empty(res.drain) + require.Len(res.migrated, 10) + require.True(res.done) + + res = &jobResult{} + require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 103, res)) + require.Empty(res.drain) + require.Empty(res.migrated) + require.True(res.done) +} diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainerv2/watch_nodes.go index 7b0bd8573389..568678f747d6 100644 --- a/nomad/drainerv2/watch_nodes.go +++ b/nomad/drainerv2/watch_nodes.go @@ -14,18 +14,17 @@ import ( // DrainingNodeWatcher is the interface for watching for draining nodes. type DrainingNodeWatcher interface{} -// Tracking returns the whether the node is being tracked and if so the copy of -// the node object that is tracked. -func (n *NodeDrainer) Tracking(nodeID string) (*structs.Node, bool) { +// TrackedNodes returns the set of tracked nodes +func (n *NodeDrainer) TrackedNodes() map[string]*structs.Node { n.l.RLock() defer n.l.RUnlock() - draining, ok := n.nodes[nodeID] - if !ok { - return nil, false + t := make(map[string]*structs.Node, len(n.nodes)) + for n, d := range n.nodes { + t[n] = d.GetNode() } - return draining.GetNode(), true + return t } // Remove removes the given node from being tracked @@ -128,34 +127,42 @@ func (w *nodeDrainWatcher) watch() { // update index for next run nindex = index - for _, node := range nodes { + tracked := w.tracker.TrackedNodes() + for nodeID, node := range nodes { newDraining := node.DrainStrategy != nil - currentNode, tracked := w.tracker.Tracking(node.ID) + currentNode, tracked := tracked[nodeID] switch { // If the node is tracked but not draining, untrack case tracked && !newDraining: - w.logger.Printf("[TRACE] nomad.drain.node_watcher: tracked node %q is no longer draining", node.ID) - w.tracker.Remove(node.ID) + w.logger.Printf("[TRACE] nomad.drain.node_watcher: tracked node %q is no longer draining", nodeID) + w.tracker.Remove(nodeID) // If the node is not being tracked but is draining, track case !tracked && newDraining: - w.logger.Printf("[TRACE] nomad.drain.node_watcher: untracked node %q is draining", node.ID) + w.logger.Printf("[TRACE] nomad.drain.node_watcher: untracked node %q is draining", nodeID) w.tracker.Update(node) // If the node is being tracked but has changed, update: case tracked && newDraining && !currentNode.DrainStrategy.Equal(node.DrainStrategy): - w.logger.Printf("[TRACE] nomad.drain.node_watcher: tracked node %q has updated drain", node.ID) + w.logger.Printf("[TRACE] nomad.drain.node_watcher: tracked node %q has updated drain", nodeID) w.tracker.Update(node) default: - w.logger.Printf("[TRACE] nomad.drain.node_watcher: node %q at index %v: tracked %v, draining %v", node.ID, node.ModifyIndex, tracked, newDraining) + w.logger.Printf("[TRACE] nomad.drain.node_watcher: node %q at index %v: tracked %v, draining %v", nodeID, node.ModifyIndex, tracked, newDraining) + } + } + + for nodeID := range tracked { + if _, ok := nodes[nodeID]; !ok { + w.logger.Printf("[TRACE] nomad.drain.node_watcher: tracked node %q is no longer exists", nodeID) + w.tracker.Remove(nodeID) } } } } // getNodes returns all nodes blocking until the nodes are after the given index. -func (w *nodeDrainWatcher) getNodes(minIndex uint64) ([]*structs.Node, uint64, error) { +func (w *nodeDrainWatcher) getNodes(minIndex uint64) (map[string]*structs.Node, uint64, error) { if err := w.limiter.Wait(w.ctx); err != nil { return nil, 0, err } @@ -165,7 +172,7 @@ func (w *nodeDrainWatcher) getNodes(minIndex uint64) ([]*structs.Node, uint64, e return nil, 0, err } - return resp.([]*structs.Node), index, nil + return resp.(map[string]*structs.Node), index, nil } // getNodesImpl is used to get nodes from the state store, returning the set of @@ -181,7 +188,7 @@ func (w *nodeDrainWatcher) getNodesImpl(ws memdb.WatchSet, state *state.StateSto return nil, 0, err } - resp := make([]*structs.Node, 0, 64) + resp := make(map[string]*structs.Node, 64) for { raw := iter.Next() if raw == nil { @@ -189,7 +196,7 @@ func (w *nodeDrainWatcher) getNodesImpl(ws memdb.WatchSet, state *state.StateSto } node := raw.(*structs.Node) - resp = append(resp, node) + resp[node.ID] = node } return resp, index, nil diff --git a/nomad/drainerv2/watch_nodes_test.go b/nomad/drainerv2/watch_nodes_test.go index 8b3a63e1c250..dab304c32c9c 100644 --- a/nomad/drainerv2/watch_nodes_test.go +++ b/nomad/drainerv2/watch_nodes_test.go @@ -63,11 +63,10 @@ func TestNodeDrainWatcher_AddDraining(t *testing.T) { t.Fatal("No node drain events") }) - _, ok1 := m.Tracking(n1.ID) - out2, ok2 := m.Tracking(n2.ID) - require.False(ok1) - require.True(ok2) - require.Equal(n2, out2) + tracked := m.TrackedNodes() + require.NotContains(tracked, n1.ID) + require.Contains(tracked, n2.ID) + require.Equal(n2, tracked[n2.ID]) } @@ -93,9 +92,9 @@ func TestNodeDrainWatcher_Remove(t *testing.T) { t.Fatal("No node drain events") }) - out, ok := m.Tracking(n.ID) - require.True(ok) - require.Equal(n, out) + tracked := m.TrackedNodes() + require.Contains(tracked, n.ID) + require.Equal(n, tracked[n.ID]) // Change the node to be not draining and wait for it to be untracked require.Nil(state.UpdateNodeDrain(101, n.ID, nil)) @@ -105,8 +104,46 @@ func TestNodeDrainWatcher_Remove(t *testing.T) { t.Fatal("No new node drain events") }) - _, ok = m.Tracking(n.ID) - require.False(ok) + tracked = m.TrackedNodes() + require.NotContains(tracked, n.ID) +} + +func TestNodeDrainWatcher_Remove_Nonexistent(t *testing.T) { + t.Parallel() + require := require.New(t) + _, state, m := testNodeDrainWatcher(t) + + // Create a draining node + n := mock.Node() + n.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: time.Hour, + }, + ForceDeadline: time.Now().Add(time.Hour), + } + + // Wait for it to be tracked + require.Nil(state.UpsertNode(100, n)) + testutil.WaitForResult(func() (bool, error) { + return len(m.Events) == 1, nil + }, func(err error) { + t.Fatal("No node drain events") + }) + + tracked := m.TrackedNodes() + require.Contains(tracked, n.ID) + require.Equal(n, tracked[n.ID]) + + // Delete the node + require.Nil(state.DeleteNode(101, n.ID)) + testutil.WaitForResult(func() (bool, error) { + return len(m.Events) == 2, nil + }, func(err error) { + t.Fatal("No new node drain events") + }) + + tracked = m.TrackedNodes() + require.NotContains(tracked, n.ID) } func TestNodeDrainWatcher_Update(t *testing.T) { @@ -131,9 +168,9 @@ func TestNodeDrainWatcher_Update(t *testing.T) { t.Fatal("No node drain events") }) - out, ok := m.Tracking(n.ID) - require.True(ok) - require.Equal(n, out) + tracked := m.TrackedNodes() + require.Contains(tracked, n.ID) + require.Equal(n, tracked[n.ID]) // Change the node to have a new spec s2 := n.DrainStrategy.Copy() @@ -147,7 +184,7 @@ func TestNodeDrainWatcher_Update(t *testing.T) { t.Fatal("No new node drain events") }) - out, ok = m.Tracking(n.ID) - require.True(ok) - require.Equal(out.DrainStrategy, s2) + tracked = m.TrackedNodes() + require.Contains(tracked, n.ID) + require.Equal(s2, tracked[n.ID].DrainStrategy) } diff --git a/nomad/state/testing.go b/nomad/state/testing.go index 69509714d179..ee7dce1d6c7f 100644 --- a/nomad/state/testing.go +++ b/nomad/state/testing.go @@ -1,14 +1,13 @@ package state import ( - "os" - + "github.com/hashicorp/nomad/helper/testlog" "github.com/mitchellh/go-testing-interface" ) func TestStateStore(t testing.T) *StateStore { config := &StateStoreConfig{ - LogOutput: os.Stderr, + LogOutput: testlog.NewWriter(t), Region: "global", } state, err := NewStateStore(config) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 018b96c422ec..29e794cbf2ba 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1771,6 +1771,26 @@ func (n *NetworkResource) PortLabels() map[string]int { return labelValues } +// JobNs is a Job.ID and Namespace tuple +type JobNs struct { + ID, Namespace string +} + +func NewJobNs(namespace, id string) *JobNs { + return &JobNs{ + ID: id, + Namespace: namespace, + } +} + +func (j *JobNs) String() string { + if j == nil { + return "" + } + + return fmt.Sprintf("", j.Namespace, j.ID) +} + const ( // JobTypeNomad is reserved for internal system tasks and is // always handled by the CoreScheduler. From cec2c5a72652581cd726a1791c6ace1ef238a4be Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Tue, 6 Mar 2018 14:37:37 -0800 Subject: [PATCH 41/79] Drainer --- nomad/drainer_shims.go | 20 ++- nomad/drainerv2/drainer.go | 207 +++++++++++++++++++++++++++---- nomad/drainerv2/draining_node.go | 99 +++++++++++---- nomad/drainerv2/watch_jobs.go | 48 +++++-- nomad/drainerv2/watch_nodes.go | 5 +- nomad/node_endpoint.go | 38 +----- nomad/node_endpoint_test.go | 30 +---- nomad/server.go | 18 ++- nomad/structs/structs.go | 42 +++++++ nomad/structs/structs_test.go | 28 +++++ 10 files changed, 390 insertions(+), 145 deletions(-) diff --git a/nomad/drainer_shims.go b/nomad/drainer_shims.go index 09a1a8f6635c..1c7ffb1a9b76 100644 --- a/nomad/drainer_shims.go +++ b/nomad/drainer_shims.go @@ -8,38 +8,36 @@ type drainerShim struct { s *Server } -func (d drainerShim) NodeDrainComplete(nodeID string) error { +func (d drainerShim) NodeDrainComplete(nodeID string) (uint64, error) { args := &structs.NodeUpdateDrainRequest{ NodeID: nodeID, Drain: false, WriteRequest: structs.WriteRequest{Region: d.s.config.Region}, } - resp, _, err := d.s.raftApply(structs.NodeUpdateDrainRequestType, args) - return d.convertApplyErrors(resp, err) + resp, index, err := d.s.raftApply(structs.NodeUpdateDrainRequestType, args) + return d.convertApplyErrors(resp, index, err) } -func (d drainerShim) AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) error { +func (d drainerShim) AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) (uint64, error) { args := &structs.AllocUpdateDesiredTransitionRequest{ Allocs: allocs, Evals: evals, WriteRequest: structs.WriteRequest{Region: d.s.config.Region}, } - resp, _, err := d.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, args) - return d.convertApplyErrors(resp, err) + resp, index, err := d.s.raftApply(structs.AllocUpdateDesiredTransitionRequestType, args) + return d.convertApplyErrors(resp, index, err) } // convertApplyErrors parses the results of a raftApply and returns the index at // which it was applied and any error that occurred. Raft Apply returns two // separate errors, Raft library errors and user returned errors from the FSM. // This helper, joins the errors by inspecting the applyResponse for an error. -// -// Similar to deployment watcher's convertApplyErrors -func (d drainerShim) convertApplyErrors(applyResp interface{}, err error) error { +func (d drainerShim) convertApplyErrors(applyResp interface{}, index uint64, err error) (uint64, error) { if applyResp != nil { if fsmErr, ok := applyResp.(error); ok && fsmErr != nil { - return fsmErr + return index, fsmErr } } - return err + return index, err } diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go index d78019b8499e..f3553da1fcc3 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainerv2/drainer.go @@ -6,6 +6,8 @@ import ( "sync" "time" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "golang.org/x/time/rate" @@ -21,17 +23,20 @@ const ( // LimitStateQueriesPerSecond is the number of state queries allowed per // second LimitStateQueriesPerSecond = 100.0 + + // BatchUpdateInterval is how long we wait to batch updates + BatchUpdateInterval = 1 * time.Second + + // NodeDeadlineCoalesceWindow is the duration in which deadlining nodes will + // be coalesced together + NodeDeadlineCoalesceWindow = 5 * time.Second ) // RaftApplier contains methods for applying the raft requests required by the // NodeDrainer. type RaftApplier interface { - AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) error - NodeDrainComplete(nodeID string) error -} - -type AllocDrainer interface { - drain(allocs []*structs.Allocation) + AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) (uint64, error) + NodeDrainComplete(nodeID string) (uint64, error) } type NodeTracker interface { @@ -44,6 +49,38 @@ type DrainingJobWatcherFactory func(context.Context, *rate.Limiter, *state.State type DrainingNodeWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger, NodeTracker) DrainingNodeWatcher type DrainDeadlineNotifierFactory func(context.Context) DrainDeadlineNotifier +func GetDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *state.StateStore, logger *log.Logger) DrainingJobWatcher { + return NewDrainingJobWatcher(ctx, limiter, state, logger) +} + +func GetDeadlineNotifier(ctx context.Context) DrainDeadlineNotifier { + return NewDeadlineHeap(ctx, NodeDeadlineCoalesceWindow) +} + +func GetNodeWatcherFactory() DrainingNodeWatcherFactory { + return func(ctx context.Context, limiter *rate.Limiter, state *state.StateStore, logger *log.Logger, tracker NodeTracker) DrainingNodeWatcher { + return NewNodeDrainWatcher(ctx, limiter, state, logger, tracker) + } +} + +type allocMigrateBatcher struct { + // updates holds pending client status updates for allocations + updates []*structs.Allocation + + // updateFuture is used to wait for the pending batch update + // to complete. This may be nil if no batch is pending. + updateFuture *structs.BatchFuture + + // updateTimer is the timer that will trigger the next batch + // update, and may be nil if there is no batch pending. + updateTimer *time.Timer + + batchWindow time.Duration + + // synchronizes access to the updates list, the future and the timer. + sync.Mutex +} + type NodeDrainerConfig struct { Logger *log.Logger Raft RaftApplier @@ -51,8 +88,10 @@ type NodeDrainerConfig struct { NodeFactory DrainingNodeWatcherFactory DrainDeadlineFactory DrainDeadlineNotifierFactory StateQueriesPerSecond float64 + BatchUpdateInterval time.Duration } +// TODO Add stats type NodeDrainer struct { enabled bool logger *log.Logger @@ -60,9 +99,6 @@ type NodeDrainer struct { // nodes is the set of draining nodes nodes map[string]*drainingNode - // doneNodeCh is used to signal that a node is done draining - doneNodeCh chan string - nodeWatcher DrainingNodeWatcher nodeFactory DrainingNodeWatcherFactory @@ -81,6 +117,9 @@ type NodeDrainer struct { // raft is a shim around the raft messages necessary for draining raft RaftApplier + // batcher is used to batch alloc migrations. + batcher allocMigrateBatcher + // ctx and exitFn are used to cancel the watcher ctx context.Context exitFn context.CancelFunc @@ -96,6 +135,9 @@ func NewNodeDrainer(c *NodeDrainerConfig) *NodeDrainer { nodeFactory: c.NodeFactory, deadlineNotifierFactory: c.DrainDeadlineFactory, queryLimiter: rate.NewLimiter(rate.Limit(c.StateQueriesPerSecond), 100), + batcher: allocMigrateBatcher{ + batchWindow: c.BatchUpdateInterval, + }, } } @@ -133,7 +175,6 @@ func (n *NodeDrainer) flush() { n.nodeWatcher = n.nodeFactory(n.ctx, n.queryLimiter, n.state, n.logger, n) n.deadlineNotifier = n.deadlineNotifierFactory(n.ctx) n.nodes = make(map[string]*drainingNode, 32) - n.doneNodeCh = make(chan string, 4) } func (n *NodeDrainer) run(ctx context.Context) { @@ -143,33 +184,145 @@ func (n *NodeDrainer) run(ctx context.Context) { return case nodes := <-n.deadlineNotifier.NextBatch(): n.handleDeadlinedNodes(nodes) - case allocs := <-n.jobWatcher.Drain(): - n.handleJobAllocDrain(allocs) - case node := <-n.doneNodeCh: - // TODO probably remove this as a channel - n.handleDoneNode(node) + case req := <-n.jobWatcher.Drain(): + n.handleJobAllocDrain(req) + case allocs := <-n.jobWatcher.Migrated(): + n.handleMigratedAllocs(allocs) } } } func (n *NodeDrainer) handleDeadlinedNodes(nodes []string) { - // TODO + // Retrieve the set of allocations that will be force stopped. + n.l.RLock() + var forceStop []*structs.Allocation + for _, node := range nodes { + draining, ok := n.nodes[node] + if !ok { + n.logger.Printf("[DEBUG] nomad.node_drainer: skipping untracked deadlined node %q", node) + continue + } + + allocs, err := draining.DeadlineAllocs() + if err != nil { + n.logger.Printf("[ERR] nomad.node_drainer: failed to retrive allocs on deadlined node %q: %v", node, err) + continue + } + + forceStop = append(forceStop, allocs...) + } + n.l.RUnlock() + n.batchDrainAllocs(forceStop) +} + +func (n *NodeDrainer) handleJobAllocDrain(req *DrainRequest) { + // This should be syncronous + index, err := n.batchDrainAllocs(req.Allocs) + req.Resp.Respond(index, err) } -func (n *NodeDrainer) handleJobAllocDrain(allocs []*structs.Allocation) { - // TODO +func (n *NodeDrainer) handleMigratedAllocs(allocs []*structs.Allocation) { + // Determine the set of nodes that were effected + nodes := make(map[string]struct{}) + for _, alloc := range allocs { + nodes[alloc.NodeID] = struct{}{} + } + + // For each node, check if it is now done + n.l.RLock() + var done []string + for node := range nodes { + draining, ok := n.nodes[node] + if !ok { + continue + } + + isDone, err := draining.IsDone() + if err != nil { + n.logger.Printf("[ERR] nomad.drain: checking if node %q is done draining: %v", node, err) + continue + } + + if !isDone { + continue + } - // TODO Call check on the appropriate nodes when the final allocs - // transistion to stop so we have a place to determine with the node - // is done and the final drain of system allocs - // TODO This probably requires changing the interface such that it - // returns replaced allocs as well. + done = append(done, node) + } + n.l.RUnlock() + + // TODO This should probably be a single Raft transaction + for _, doneNode := range done { + index, err := n.raft.NodeDrainComplete(doneNode) + if err != nil { + n.logger.Printf("[ERR] nomad.drain: failed to unset drain for node %q: %v", doneNode, err) + } else { + n.logger.Printf("[INFO] nomad.drain: node %q completed draining at index %d", doneNode, index) + } + } } -func (n *NodeDrainer) handleDoneNode(nodeID string) { - // TODO +func (n *NodeDrainer) batchDrainAllocs(allocs []*structs.Allocation) (uint64, error) { + // Add this to the batch + n.batcher.Lock() + n.batcher.updates = append(n.batcher.updates, allocs...) + + // Start a new batch if none + future := n.batcher.updateFuture + if future == nil { + future = structs.NewBatchFuture() + n.batcher.updateFuture = future + n.batcher.updateTimer = time.AfterFunc(n.batcher.batchWindow, func() { + // Get the pending updates + n.batcher.Lock() + updates := n.batcher.updates + future := n.batcher.updateFuture + n.batcher.updates = nil + n.batcher.updateFuture = nil + n.batcher.updateTimer = nil + n.batcher.Unlock() + + // Perform the batch update + n.drainAllocs(future, updates) + }) + } + n.batcher.Unlock() + + // Wait for the future + if err := future.Wait(); err != nil { + return 0, err + } + + return future.Index(), nil } -func (n *NodeDrainer) drain(allocs []*structs.Allocation) { - // TODO +func (n *NodeDrainer) drainAllocs(future *structs.BatchFuture, allocs []*structs.Allocation) { + // TODO This should shard to limit the size of the transaction. + + // Compute the effected jobs and make the transistion map + jobs := make(map[string]*structs.Allocation, 4) + transistions := make(map[string]*structs.DesiredTransition, len(allocs)) + for _, alloc := range allocs { + transistions[alloc.ID] = &structs.DesiredTransition{ + Migrate: helper.BoolToPtr(true), + } + jobs[alloc.JobID] = alloc + } + + evals := make([]*structs.Evaluation, 0, len(jobs)) + for job, alloc := range jobs { + evals = append(evals, &structs.Evaluation{ + ID: uuid.Generate(), + Namespace: alloc.Namespace, + Priority: alloc.Job.Priority, + Type: alloc.Job.Type, + TriggeredBy: structs.EvalTriggerNodeDrain, + JobID: job, + Status: structs.EvalStatusPending, + }) + } + + // Commit this update via Raft + index, err := n.raft.AllocUpdateDesiredTransition(transistions, evals) + future.Respond(index, err) } diff --git a/nomad/drainerv2/draining_node.go b/nomad/drainerv2/draining_node.go index 32233573b3e3..01ce49123d65 100644 --- a/nomad/drainerv2/draining_node.go +++ b/nomad/drainerv2/draining_node.go @@ -1,6 +1,7 @@ package drainerv2 import ( + "fmt" "sync" "time" @@ -8,32 +9,16 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -// TODO make this an interface and then I can optimize the infinite case by -// using a singleton object - -type drainCoordinator interface { - nodeDone(nodeID string) -} - -func (n *NodeDrainer) nodeDone(nodeID string) { - select { - case <-n.ctx.Done(): - case n.doneNodeCh <- nodeID: - } -} - type drainingNode struct { - coordinator drainCoordinator - state *state.StateStore - node *structs.Node - l sync.RWMutex + state *state.StateStore + node *structs.Node + l sync.RWMutex } -func NewDrainingNode(node *structs.Node, state *state.StateStore, coordinator drainCoordinator) *drainingNode { +func NewDrainingNode(node *structs.Node, state *state.StateStore) *drainingNode { return &drainingNode{ - coordinator: coordinator, - state: state, - node: node, + state: state, + node: node, } } @@ -62,10 +47,78 @@ func (n *drainingNode) DeadlineTime() (bool, time.Time) { return n.node.DrainStrategy.DeadlineTime() } +// IsDone returns if the node is done draining +func (n *drainingNode) IsDone() (bool, error) { + n.l.RLock() + defer n.l.RUnlock() + + // Should never happen + if n.node == nil || n.node.DrainStrategy == nil { + return false, fmt.Errorf("node doesn't have a drain strategy set") + } + + // Grab the relevant drain info + ignoreSystem := n.node.DrainStrategy.IgnoreSystemJobs + + // Retrieve the allocs on the node + allocs, err := n.state.AllocsByNode(nil, n.node.ID) + if err != nil { + return false, err + } + + for _, alloc := range allocs { + // Skip system if configured to + if alloc.Job.Type == structs.JobTypeSystem && ignoreSystem { + continue + } + + // If there is a non-terminal we aren't done + if !alloc.TerminalStatus() { + return false, nil + } + } + + return true, nil +} + // DeadlineAllocs returns the set of allocations that should be drained given a // node is at its deadline func (n *drainingNode) DeadlineAllocs() ([]*structs.Allocation, error) { n.l.RLock() defer n.l.RUnlock() - return nil, nil + + // Should never happen + if n.node == nil || n.node.DrainStrategy == nil { + return nil, fmt.Errorf("node doesn't have a drain strategy set") + } + + // Grab the relevant drain info + inf, _ := n.node.DrainStrategy.DeadlineTime() + if inf { + return nil, nil + } + ignoreSystem := n.node.DrainStrategy.IgnoreSystemJobs + + // Retrieve the allocs on the node + allocs, err := n.state.AllocsByNode(nil, n.node.ID) + if err != nil { + return nil, err + } + + var drain []*structs.Allocation + for _, alloc := range allocs { + // Nothing to do on a terminal allocation + if alloc.TerminalStatus() { + continue + } + + // Skip system if configured to + if alloc.Job.Type == structs.JobTypeSystem && ignoreSystem { + continue + } + + drain = append(drain, alloc) + } + + return drain, nil } diff --git a/nomad/drainerv2/watch_jobs.go b/nomad/drainerv2/watch_jobs.go index a2e6ef45ef50..b4442cd02469 100644 --- a/nomad/drainerv2/watch_jobs.go +++ b/nomad/drainerv2/watch_jobs.go @@ -14,17 +14,25 @@ import ( "golang.org/x/time/rate" ) +type DrainRequest struct { + Allocs []*structs.Allocation + Resp *structs.BatchFuture +} + +func NewDrainRequest(allocs []*structs.Allocation) *DrainRequest { + return &DrainRequest{ + Allocs: allocs, + Resp: structs.NewBatchFuture(), + } +} + // DrainingJobWatcher is the interface for watching a job drain type DrainingJobWatcher interface { // RegisterJob is used to start watching a draining job RegisterJob(jobID, namespace string) - // TODO This should probably be a drain future such that we can block the - // next loop till the raft apply happens such that we don't emit the same - // drain many times. We would get the applied index back and block till - // then. // Drain is used to emit allocations that should be drained. - Drain() <-chan []*structs.Allocation + Drain() <-chan *DrainRequest // Migrated is allocations for draining jobs that have transistioned to // stop. There is no guarantee that duplicates won't be published. @@ -51,7 +59,7 @@ type drainingJobWatcher struct { queryCancel context.CancelFunc // drainCh and migratedCh are used to emit allocations - drainCh chan []*structs.Allocation + drainCh chan *DrainRequest migratedCh chan []*structs.Allocation l sync.RWMutex @@ -73,7 +81,7 @@ func NewDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *st logger: logger, state: state, jobs: make(map[structs.JobNs]struct{}, 64), - drainCh: make(chan []*structs.Allocation, 8), + drainCh: make(chan *DrainRequest, 8), migratedCh: make(chan []*structs.Allocation, 8), } @@ -103,7 +111,7 @@ func (w *drainingJobWatcher) RegisterJob(jobID, namespace string) { } // Drain returns the channel that emits allocations to drain. -func (w *drainingJobWatcher) Drain() <-chan []*structs.Allocation { +func (w *drainingJobWatcher) Drain() <-chan *DrainRequest { return w.drainCh } @@ -203,16 +211,34 @@ func (w *drainingJobWatcher) watch() { } } - if allDrain != nil { + if len(allDrain) != 0 { + // Create the request + req := NewDrainRequest(allDrain) + select { - case w.drainCh <- allDrain: + case w.drainCh <- req: case <-w.ctx.Done(): w.logger.Printf("[TRACE] nomad.drain.job_watcher: shutting down") return } + + // Wait for the request to be commited + select { + case <-req.Resp.WaitCh(): + case <-w.ctx.Done(): + w.logger.Printf("[TRACE] nomad.drain.job_watcher: shutting down") + return + } + + // See if it successfully committed + if err := req.Resp.Error(); err != nil { + w.logger.Printf("[ERR] nomad.drain.job_watcher: failed to transistion allocations: %v", err) + } + + // TODO Probably want to wait till the new index } - if allMigrated != nil { + if len(allMigrated) != 0 { select { case w.migratedCh <- allMigrated: case <-w.ctx.Done(): diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainerv2/watch_nodes.go index 568678f747d6..dd1686e566b3 100644 --- a/nomad/drainerv2/watch_nodes.go +++ b/nomad/drainerv2/watch_nodes.go @@ -50,7 +50,7 @@ func (n *NodeDrainer) Update(node *structs.Node) { draining, ok := n.nodes[node.ID] if !ok { - n.nodes[node.ID] = NewDrainingNode(node, n.state, n) + n.nodes[node.ID] = NewDrainingNode(node, n.state) return } @@ -61,9 +61,6 @@ func (n *NodeDrainer) Update(node *structs.Node) { if inf, deadline := node.DrainStrategy.DeadlineTime(); !inf { n.deadlineNotifier.Watch(node.ID, deadline) } else { - // TODO think about handling any race that may occur. I believe it is - // totally fine as long as the handlers are locked. - // There is an infinite deadline so it shouldn't be tracked for // deadlining n.deadlineNotifier.Remove(node.ID) diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index 6cfe62ae7e5c..e8726a2f4125 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -41,7 +41,7 @@ type Node struct { // updateFuture is used to wait for the pending batch update // to complete. This may be nil if no batch is pending. - updateFuture *batchFuture + updateFuture *structs.BatchFuture // updateTimer is the timer that will trigger the next batch // update, and may be nil if there is no batch pending. @@ -933,7 +933,7 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene // Start a new batch if none future := n.updateFuture if future == nil { - future = NewBatchFuture() + future = structs.NewBatchFuture() n.updateFuture = future n.updateTimer = time.AfterFunc(batchUpdateInterval, func() { // Get the pending updates @@ -962,7 +962,7 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene } // batchUpdate is used to update all the allocations -func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) { +func (n *Node) batchUpdate(future *structs.BatchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) { // Prepare the batch update batch := &structs.AllocUpdateRequest{ Alloc: updates, @@ -1166,38 +1166,6 @@ func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint6 return evalIDs, evalIndex, nil } -// batchFuture is used to wait on a batch update to complete -type batchFuture struct { - doneCh chan struct{} - err error - index uint64 -} - -// NewBatchFuture creates a new batch future -func NewBatchFuture() *batchFuture { - return &batchFuture{ - doneCh: make(chan struct{}), - } -} - -// Wait is used to block for the future to complete and returns the error -func (b *batchFuture) Wait() error { - <-b.doneCh - return b.err -} - -// Index is used to return the index of the batch, only after Wait() -func (b *batchFuture) Index() uint64 { - return b.index -} - -// Respond is used to unblock the future -func (b *batchFuture) Respond(index uint64, err error) { - b.index = index - b.err = err - close(b.doneCh) -} - // DeriveVaultToken is used by the clients to request wrapped Vault tokens for // tasks func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 87c418d0d8a9..3d98a942f52b 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -1975,7 +1975,7 @@ func TestClientEndpoint_BatchUpdate(t *testing.T) { clientAlloc.ClientStatus = structs.AllocClientStatusFailed // Call to do the batch update - bf := NewBatchFuture() + bf := structs.NewBatchFuture() endpoint := s1.staticEndpoints.Node endpoint.batchUpdate(bf, []*structs.Allocation{clientAlloc}, nil) if err := bf.Wait(); err != nil { @@ -2541,34 +2541,6 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { } } -func TestBatchFuture(t *testing.T) { - t.Parallel() - bf := NewBatchFuture() - - // Async respond to the future - expect := fmt.Errorf("testing") - go func() { - time.Sleep(10 * time.Millisecond) - bf.Respond(1000, expect) - }() - - // Block for the result - start := time.Now() - err := bf.Wait() - diff := time.Since(start) - if diff < 5*time.Millisecond { - t.Fatalf("too fast") - } - - // Check the results - if err != expect { - t.Fatalf("bad: %s", err) - } - if bf.Index() != 1000 { - t.Fatalf("bad: %d", bf.Index()) - } -} - func TestClientEndpoint_DeriveVaultToken_Bad(t *testing.T) { t.Parallel() s1 := TestServer(t, nil) diff --git a/nomad/server.go b/nomad/server.go index a9984ac34afb..afe7ee9871ca 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -27,7 +27,7 @@ import ( "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/nomad/deploymentwatcher" - "github.com/hashicorp/nomad/nomad/drainer" + "github.com/hashicorp/nomad/nomad/drainerv2" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" @@ -174,7 +174,7 @@ type Server struct { deploymentWatcher *deploymentwatcher.Watcher // nodeDrainer is used to drain allocations from nodes. - nodeDrainer *drainer.NodeDrainer + nodeDrainer *drainerv2.NodeDrainer // evalBroker is used to manage the in-progress evaluations // that are waiting to be brokered to a sub-scheduler @@ -890,10 +890,18 @@ func (s *Server) setupDeploymentWatcher() error { // setupNodeDrainer creates a node drainer which will be enabled when a server // becomes a leader. func (s *Server) setupNodeDrainer() { - // create a shim around raft requests + // Create a shim around Raft requests shim := drainerShim{s} - s.nodeDrainer = drainer.NewNodeDrainer(s.logger, s.shutdownCh, shim) - go s.nodeDrainer.Run() + c := &drainerv2.NodeDrainerConfig{ + Logger: s.logger, + Raft: shim, + JobFactory: drainerv2.GetDrainingJobWatcher, + NodeFactory: drainerv2.GetNodeWatcherFactory(), + DrainDeadlineFactory: drainerv2.GetDeadlineNotifier, + StateQueriesPerSecond: drainerv2.LimitStateQueriesPerSecond, + BatchUpdateInterval: drainerv2.BatchUpdateInterval, + } + s.nodeDrainer = drainerv2.NewNodeDrainer(c) } // setupVaultClient is used to set up the Vault API client. diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 29e794cbf2ba..8750e7c0829c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -7015,3 +7015,45 @@ type ACLTokenUpsertResponse struct { Tokens []*ACLToken WriteMeta } + +// BatchFuture is used to wait on a batch update to complete +type BatchFuture struct { + doneCh chan struct{} + err error + index uint64 +} + +// NewBatchFuture creates a new batch future +func NewBatchFuture() *BatchFuture { + return &BatchFuture{ + doneCh: make(chan struct{}), + } +} + +// Wait is used to block for the future to complete and returns the error +func (b *BatchFuture) Wait() error { + <-b.doneCh + return b.err +} + +// WaitCh is used to block for the future to complete +func (b *BatchFuture) WaitCh() <-chan struct{} { + return b.doneCh +} + +// Error is used to return the error of the batch, only after Wait() +func (b *BatchFuture) Error() error { + return b.err +} + +// Index is used to return the index of the batch, only after Wait() +func (b *BatchFuture) Index() uint64 { + return b.index +} + +// Respond is used to unblock the future +func (b *BatchFuture) Respond(index uint64, err error) { + b.index = index + b.err = err + close(b.doneCh) +} diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index f3cbe3d055e0..9df3d9e4f706 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -3597,3 +3597,31 @@ func TestNetworkResourcesEquals(t *testing.T) { require.Equal(testCase.expected, first.Equals(second), testCase.errorMsg) } } + +func TestBatchFuture(t *testing.T) { + t.Parallel() + bf := NewBatchFuture() + + // Async respond to the future + expect := fmt.Errorf("testing") + go func() { + time.Sleep(10 * time.Millisecond) + bf.Respond(1000, expect) + }() + + // Block for the result + start := time.Now() + err := bf.Wait() + diff := time.Since(start) + if diff < 5*time.Millisecond { + t.Fatalf("too fast") + } + + // Check the results + if err != expect { + t.Fatalf("bad: %s", err) + } + if bf.Index() != 1000 { + t.Fatalf("bad: %d", bf.Index()) + } +} From c0354223c233e1150eb323c28e560bcc0d6cd3aa Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 7 Mar 2018 14:57:35 -0800 Subject: [PATCH 42/79] integration test and basic fixes --- nomad/drainer_int_test.go | 188 +++++++++++++++++++++++++++++++ nomad/drainerv2/drainer.go | 2 +- nomad/drainerv2/draining_node.go | 29 +++++ nomad/drainerv2/watch_jobs.go | 77 ++++++++----- nomad/drainerv2/watch_nodes.go | 26 ++++- nomad/structs/structs.go | 10 +- nomad/worker.go | 2 +- 7 files changed, 294 insertions(+), 40 deletions(-) create mode 100644 nomad/drainer_int_test.go diff --git a/nomad/drainer_int_test.go b/nomad/drainer_int_test.go new file mode 100644 index 000000000000..0c0372d16925 --- /dev/null +++ b/nomad/drainer_int_test.go @@ -0,0 +1,188 @@ +package nomad + +import ( + "context" + "fmt" + "log" + "net/rpc" + "testing" + "time" + + memdb "github.com/hashicorp/go-memdb" + msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/require" +) + +func allocPromoter(t *testing.T, ctx context.Context, + state *state.StateStore, codec rpc.ClientCodec, nodeID string, + logger *log.Logger) { + t.Helper() + + nindex := uint64(1) + for { + allocs, index, err := getNodeAllocs(ctx, state, nodeID, nindex) + if err != nil { + if err == context.Canceled { + return + } + + t.Fatalf("failed to get node allocs: %v", err) + } + nindex = index + + // For each alloc that doesn't have its deployment status set, set it + var updates []*structs.Allocation + for _, alloc := range allocs { + if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil { + continue + } + + newAlloc := alloc.Copy() + newAlloc.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + updates = append(updates, newAlloc) + logger.Printf("Marked deployment health for alloc %q", alloc.ID) + } + + if len(updates) == 0 { + continue + } + + // Send the update + req := &structs.AllocUpdateRequest{ + Alloc: updates, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var resp structs.NodeAllocsResponse + require.Nil(t, msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", req, &resp)) + } +} + +func getNodeAllocs(ctx context.Context, state *state.StateStore, nodeID string, index uint64) ([]*structs.Allocation, uint64, error) { + resp, index, err := state.BlockingQuery(getNodeAllocsImpl(nodeID), index, ctx) + if err != nil { + return nil, 0, err + } + if err := ctx.Err(); err != nil { + return nil, 0, err + } + + return resp.([]*structs.Allocation), index, nil +} + +func getNodeAllocsImpl(nodeID string) func(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + return func(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { + // Capture all the allocations + allocs, err := state.AllocsByNode(ws, nodeID) + if err != nil { + return nil, 0, err + } + + // Use the last index that affected the jobs table + index, err := state.Index("allocs") + if err != nil { + return nil, index, err + } + + return allocs, index, nil + } +} + +func TestDrainer_Simple_ServiceOnly(t *testing.T) { + t.Parallel() + require := require.New(t) + s1 := TestServer(t, nil) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create two nodes + n1, n2 := mock.Node(), mock.Node() + nodeReg := &structs.NodeRegisterRequest{ + Node: n1, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var nodeResp structs.NodeUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", nodeReg, &nodeResp)) + + // Create a job that runs on just one + job := mock.Job() + job.TaskGroups[0].Count = 2 + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: job.Namespace, + }, + } + + // Fetch the response + var resp structs.JobRegisterResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp)) + require.NotZero(resp.Index) + + // Wait for the two allocations to be placed + state := s1.State() + testutil.WaitForResult(func() (bool, error) { + allocs, err := state.AllocsByJob(nil, job.Namespace, job.ID, false) + if err != nil { + return false, err + } + return len(allocs) == 2, fmt.Errorf("got %d allocs", len(allocs)) + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + // Create the second node + nodeReg = &structs.NodeRegisterRequest{ + Node: n2, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", nodeReg, &nodeResp)) + + // Drain the first node + drainReq := &structs.NodeUpdateDrainRequest{ + NodeID: n1.ID, + DrainStrategy: &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Minute, + }, + }, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var drainResp structs.NodeDrainUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", drainReq, &drainResp)) + + // Wait for the allocs to be replaced + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go allocPromoter(t, ctx, state, codec, n1.ID, s1.logger) + go allocPromoter(t, ctx, state, codec, n2.ID, s1.logger) + + testutil.WaitForResult(func() (bool, error) { + allocs, err := state.AllocsByNode(nil, n2.ID) + if err != nil { + return false, err + } + return len(allocs) == 2, fmt.Errorf("got %d allocs", len(allocs)) + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + // Check that the node drain is removed + testutil.WaitForResult(func() (bool, error) { + node, err := state.NodeByID(nil, n1.ID) + if err != nil { + return false, err + } + return node.DrainStrategy == nil, fmt.Errorf("has drain strategy still set") + }, func(err error) { + t.Fatalf("err: %v", err) + }) +} diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go index f3553da1fcc3..a44ea1e8f876 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainerv2/drainer.go @@ -159,7 +159,7 @@ func (n *NodeDrainer) SetEnabled(enabled bool, state *state.StateStore) { // If we are starting now, launch the watch daemon if enabled && !wasEnabled { - n.run(n.ctx) + go n.run(n.ctx) } } diff --git a/nomad/drainerv2/draining_node.go b/nomad/drainerv2/draining_node.go index 01ce49123d65..93b3e5fb31ca 100644 --- a/nomad/drainerv2/draining_node.go +++ b/nomad/drainerv2/draining_node.go @@ -122,3 +122,32 @@ func (n *drainingNode) DeadlineAllocs() ([]*structs.Allocation, error) { return drain, nil } + +// RunningServices returns the set of jobs on the node +func (n *drainingNode) RunningServices() ([]structs.JobNs, error) { + n.l.RLock() + defer n.l.RUnlock() + + // Retrieve the allocs on the node + allocs, err := n.state.AllocsByNode(nil, n.node.ID) + if err != nil { + return nil, err + } + + jobIDs := make(map[structs.JobNs]struct{}) + var jobs []structs.JobNs + for _, alloc := range allocs { + if alloc.TerminalStatus() || alloc.Job.Type != structs.JobTypeService { + continue + } + + jns := structs.NewJobNs(alloc.Namespace, alloc.JobID) + if _, ok := jobIDs[jns]; ok { + continue + } + jobIDs[jns] = struct{}{} + jobs = append(jobs, jns) + } + + return jobs, nil +} diff --git a/nomad/drainerv2/watch_jobs.go b/nomad/drainerv2/watch_jobs.go index b4442cd02469..b0235e6ccda2 100644 --- a/nomad/drainerv2/watch_jobs.go +++ b/nomad/drainerv2/watch_jobs.go @@ -29,7 +29,7 @@ func NewDrainRequest(allocs []*structs.Allocation) *DrainRequest { // DrainingJobWatcher is the interface for watching a job drain type DrainingJobWatcher interface { // RegisterJob is used to start watching a draining job - RegisterJob(jobID, namespace string) + RegisterJob(job structs.JobNs) // Drain is used to emit allocations that should be drained. Drain() <-chan *DrainRequest @@ -90,20 +90,17 @@ func NewDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *st } // RegisterJob marks the given job as draining and adds it to being watched. -func (w *drainingJobWatcher) RegisterJob(jobID, namespace string) { +func (w *drainingJobWatcher) RegisterJob(job structs.JobNs) { w.l.Lock() defer w.l.Unlock() - jns := structs.JobNs{ - ID: jobID, - Namespace: namespace, - } - if _, ok := w.jobs[jns]; ok { + if _, ok := w.jobs[job]; ok { return } // Add the job and cancel the context - w.jobs[jns] = struct{}{} + w.logger.Printf("[TRACE] nomad.drain.job_watcher: registering job %v", job) + w.jobs[job] = struct{}{} w.queryCancel() // Create a new query context @@ -135,10 +132,11 @@ func (w *drainingJobWatcher) deregisterJob(jobID, namespace string) { // watch is the long lived watching routine that detects job drain changes. func (w *drainingJobWatcher) watch() { - jindex := uint64(1) + waitIndex := uint64(1) for { - w.logger.Printf("[TRACE] nomad.drain.job_watcher: getting job allocs at index %d", jindex) - jobAllocs, index, err := w.getJobAllocs(w.getQueryCtx(), jindex) + w.logger.Printf("[TRACE] nomad.drain.job_watcher: getting job allocs at index %d", waitIndex) + jobAllocs, index, err := w.getJobAllocs(w.getQueryCtx(), waitIndex) + w.logger.Printf("[TRACE] nomad.drain.job_watcher: got job allocs %d at index %d: %v", len(jobAllocs), waitIndex, err) if err != nil { if err == context.Canceled { // Determine if it is a cancel or a shutdown @@ -152,7 +150,7 @@ func (w *drainingJobWatcher) watch() { } } - w.logger.Printf("[ERR] nomad.drain.job_watcher: error watching job allocs updates at index %d: %v", jindex, err) + w.logger.Printf("[ERR] nomad.drain.job_watcher: error watching job allocs updates at index %d: %v", waitIndex, err) select { case <-w.ctx.Done(): w.logger.Printf("[TRACE] nomad.drain.job_watcher: shutting down") @@ -163,8 +161,8 @@ func (w *drainingJobWatcher) watch() { } // update index for next run - lastHandled := jindex - jindex = index + lastHandled := waitIndex + waitIndex = index // Snapshot the state store snap, err := w.state.Snapshot() @@ -175,18 +173,19 @@ func (w *drainingJobWatcher) watch() { currentJobs := w.drainingJobs() var allDrain, allMigrated []*structs.Allocation - for job, allocs := range jobAllocs { + for jns, allocs := range jobAllocs { // Check if the job is still registered - if _, ok := currentJobs[job]; !ok { + if _, ok := currentJobs[jns]; !ok { + w.logger.Printf("[TRACE] nomad.drain.job_watcher: skipping job %v as it is no longer registered for draining", jns) continue } - w.logger.Printf("[TRACE] nomad.drain.job_watcher: handling job %v", job) + w.logger.Printf("[TRACE] nomad.drain.job_watcher: handling job %v", jns) // Lookup the job - job, err := w.state.JobByID(nil, job.Namespace, job.ID) + job, err := w.state.JobByID(nil, jns.Namespace, jns.ID) if err != nil { - w.logger.Printf("[WARN] nomad.drain.job_watcher: failed to lookup job %v: %v", job, err) + w.logger.Printf("[WARN] nomad.drain.job_watcher: failed to lookup job %v: %v", jns, err) continue } @@ -198,10 +197,12 @@ func (w *drainingJobWatcher) watch() { result, err := handleJob(snap, job, allocs, lastHandled) if err != nil { - w.logger.Printf("[ERR] nomad.drain.job_watcher: handling drain for job %v failed: %v", job, err) + w.logger.Printf("[ERR] nomad.drain.job_watcher: handling drain for job %v failed: %v", jns, err) continue } + w.logger.Printf("[TRACE] nomad.drain.job_watcher: result for job %v: %v", jns, result) + allDrain = append(allDrain, result.drain...) allMigrated = append(allMigrated, result.migrated...) @@ -214,6 +215,7 @@ func (w *drainingJobWatcher) watch() { if len(allDrain) != 0 { // Create the request req := NewDrainRequest(allDrain) + w.logger.Printf("[TRACE] nomad.drain.job_watcher: sending drain request for %d allocs", len(allDrain)) select { case w.drainCh <- req: @@ -235,10 +237,14 @@ func (w *drainingJobWatcher) watch() { w.logger.Printf("[ERR] nomad.drain.job_watcher: failed to transistion allocations: %v", err) } - // TODO Probably want to wait till the new index + // Wait until the new index + if index := req.Resp.Index(); index > waitIndex { + waitIndex = index + } } if len(allMigrated) != 0 { + w.logger.Printf("[TRACE] nomad.drain.job_watcher: sending migrated for %d allocs", len(allMigrated)) select { case w.migratedCh <- allMigrated: case <-w.ctx.Done(): @@ -269,6 +275,10 @@ func newJobResult() *jobResult { } } +func (r *jobResult) String() string { + return fmt.Sprintf("Drain %d ; Migrate %d ; Done %v", len(r.drain), len(r.migrated), r.done) +} + // handleJob takes the state of a draining job and returns the desired actions. func handleJob(snap *state.StateSnapshot, job *structs.Job, allocs []*structs.Allocation, lastHandledIndex uint64) (*jobResult, error) { r := newJobResult() @@ -312,6 +322,8 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, var drainable []*structs.Allocation for _, alloc := range allocs { + fmt.Printf("--- Looking at alloc %q\n", alloc.ID) + // Check if the alloc is on a draining node. onDrainingNode, ok := drainingNodes[alloc.NodeID] if !ok { @@ -333,6 +345,7 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, onDrainingNode && alloc.ModifyIndex > lastHandledIndex { result.migrated = append(result.migrated, alloc) + fmt.Printf("------- Alloc %q marked as migrated\n", alloc.ID) continue } @@ -341,25 +354,33 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, if !alloc.TerminalStatus() && alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil { + fmt.Printf("------- Alloc %q considered as healthy\n", alloc.ID) healthy++ } // An alloc can't be considered for migration if: // - It isn't on a draining node // - It is already terminal - // - It has already been marked for draining - if !onDrainingNode || alloc.TerminalStatus() || alloc.DesiredTransition.ShouldMigrate() { + if !onDrainingNode || alloc.TerminalStatus() { + fmt.Printf("------- Alloc %q not drainable\n", alloc.ID) continue } - // This alloc is drainable, so capture it and the fact that the job - // isn't done draining yet. + // Capture the fact that there is an allocation that is still draining + // for this job. remainingDrainingAlloc = true - drainable = append(drainable, alloc) + + // If we haven't marked this allocation for migration already, capture + // it as eligible for draining. + if !alloc.DesiredTransition.ShouldMigrate() { + drainable = append(drainable, alloc) + fmt.Printf("------- Alloc %q drainable\n", alloc.ID) + } } // Update the done status if remainingDrainingAlloc { + fmt.Printf("------- Job has remaining allocs to drain\n") result.done = false } @@ -368,6 +389,7 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, numToDrain := healthy - thresholdCount numToDrain = helper.IntMin(len(drainable), numToDrain) if numToDrain <= 0 { + fmt.Printf("------- Not draining any allocs\n") return nil } @@ -385,6 +407,9 @@ func (w *drainingJobWatcher) getJobAllocs(ctx context.Context, minIndex uint64) if err != nil { return nil, 0, err } + if resp == nil { + return nil, index, nil + } return resp.(map[structs.JobNs][]*structs.Allocation), index, nil } diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainerv2/watch_nodes.go index dd1686e566b3..9b6b32b3a0d4 100644 --- a/nomad/drainerv2/watch_nodes.go +++ b/nomad/drainerv2/watch_nodes.go @@ -50,13 +50,13 @@ func (n *NodeDrainer) Update(node *structs.Node) { draining, ok := n.nodes[node.ID] if !ok { - n.nodes[node.ID] = NewDrainingNode(node, n.state) - return + draining = NewDrainingNode(node, n.state) + n.nodes[node.ID] = draining + } else { + // Update it + draining.Update(node) } - // Update it and update the dealiner - draining.Update(node) - // TODO test the notifier is updated if inf, deadline := node.DrainStrategy.DeadlineTime(); !inf { n.deadlineNotifier.Watch(node.ID, deadline) @@ -66,6 +66,21 @@ func (n *NodeDrainer) Update(node *structs.Node) { n.deadlineNotifier.Remove(node.ID) } + // TODO Test this + // Register interest in the draining jobs. + jobs, err := draining.RunningServices() + if err != nil { + n.logger.Printf("[ERR] nomad.drain: error retrieving services on node %q: %v", node.ID, err) + return + } + n.logger.Printf("[TRACE] nomad.drain: node %q has %d services on it", node.ID, len(jobs)) + for _, job := range jobs { + n.jobWatcher.RegisterJob(job) + } + + // TODO we need to check if the node is done such that if an operator drains + // a node with nothing on it we unset drain + } // nodeDrainWatcher is used to watch nodes that are entering, leaving or @@ -105,6 +120,7 @@ func (w *nodeDrainWatcher) watch() { for { w.logger.Printf("[TRACE] nomad.drain.node_watcher: getting nodes at index %d", nindex) nodes, index, err := w.getNodes(nindex) + w.logger.Printf("[TRACE] nomad.drain.node_watcher: got nodes %d at index %d: %v", len(nodes), nindex, err) if err != nil { if err == context.Canceled { w.logger.Printf("[TRACE] nomad.drain.node_watcher: shutting down") diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 8750e7c0829c..f85a4bf48c3b 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1776,18 +1776,14 @@ type JobNs struct { ID, Namespace string } -func NewJobNs(namespace, id string) *JobNs { - return &JobNs{ +func NewJobNs(namespace, id string) JobNs { + return JobNs{ ID: id, Namespace: namespace, } } -func (j *JobNs) String() string { - if j == nil { - return "" - } - +func (j JobNs) String() string { return fmt.Sprintf("", j.Namespace, j.ID) } diff --git a/nomad/worker.go b/nomad/worker.go index 209d0b2938f8..6908188fbaf2 100644 --- a/nomad/worker.go +++ b/nomad/worker.go @@ -327,7 +327,7 @@ SUBMIT: } return nil, nil, err } else { - w.logger.Printf("[DEBUG] worker: submitted plan for evaluation %s", plan.EvalID) + w.logger.Printf("[DEBUG] worker: submitted plan at index %d for evaluation %s", resp.Index, plan.EvalID) w.backoffReset() } From fb40e8babe259632a3dbb0956266141d8b9ec8d4 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 7 Mar 2018 15:16:45 -0800 Subject: [PATCH 43/79] handle empty node case --- nomad/drainer_int_test.go | 43 ++++++++++++++++++++++++++++++++++ nomad/drainerv2/watch_nodes.go | 17 ++++++++++++-- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/nomad/drainer_int_test.go b/nomad/drainer_int_test.go index 0c0372d16925..39422a5a0ddb 100644 --- a/nomad/drainer_int_test.go +++ b/nomad/drainer_int_test.go @@ -186,3 +186,46 @@ func TestDrainer_Simple_ServiceOnly(t *testing.T) { t.Fatalf("err: %v", err) }) } + +func TestDrainer_DrainEmptyNode(t *testing.T) { + t.Parallel() + require := require.New(t) + s1 := TestServer(t, nil) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create a node + n1 := mock.Node() + nodeReg := &structs.NodeRegisterRequest{ + Node: n1, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var nodeResp structs.NodeUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", nodeReg, &nodeResp)) + + // Drain the node + drainReq := &structs.NodeUpdateDrainRequest{ + NodeID: n1.ID, + DrainStrategy: &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Minute, + }, + }, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var drainResp structs.NodeDrainUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", drainReq, &drainResp)) + + // Check that the node drain is removed + state := s1.State() + testutil.WaitForResult(func() (bool, error) { + node, err := state.NodeByID(nil, n1.ID) + if err != nil { + return false, err + } + return node.DrainStrategy == nil, fmt.Errorf("has drain strategy still set") + }, func(err error) { + t.Fatalf("err: %v", err) + }) +} diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainerv2/watch_nodes.go index 9b6b32b3a0d4..289767d344e7 100644 --- a/nomad/drainerv2/watch_nodes.go +++ b/nomad/drainerv2/watch_nodes.go @@ -78,9 +78,22 @@ func (n *NodeDrainer) Update(node *structs.Node) { n.jobWatcher.RegisterJob(job) } - // TODO we need to check if the node is done such that if an operator drains - // a node with nothing on it we unset drain + // Check if the node is done such that if an operator drains a node with + // nothing on it we unset drain + done, err := draining.IsDone() + if err != nil { + n.logger.Printf("[ERR] nomad.drain: failed to check if node %q is done draining: %v", node.ID, err) + return + } + if done { + index, err := n.raft.NodeDrainComplete(node.ID) + if err != nil { + n.logger.Printf("[ERR] nomad.drain: failed to unset drain for node %q: %v", node.ID, err) + } else { + n.logger.Printf("[INFO] nomad.drain: node %q completed draining at index %d", node.ID, index) + } + } } // nodeDrainWatcher is used to watch nodes that are entering, leaving or From 4b4e234516aef11a7bda444e3b001fe285224ea8 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 7 Mar 2018 15:42:17 -0800 Subject: [PATCH 44/79] Comments --- nomad/drainerv2/drainer.go | 67 +++++++++++++++++++++++++++----- nomad/drainerv2/draining_node.go | 1 + nomad/drainerv2/watch_jobs.go | 1 + nomad/drainerv2/watch_nodes.go | 4 ++ 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go index a44ea1e8f876..787f65bfd6aa 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainerv2/drainer.go @@ -39,30 +39,48 @@ type RaftApplier interface { NodeDrainComplete(nodeID string) (uint64, error) } +// NodeTracker is the interface to notify an object that is tracking draining +// nodes of changes type NodeTracker interface { + // TrackedNodes returns all the nodes that are currently tracked as + // draining. TrackedNodes() map[string]*structs.Node + + // Remove removes a node from the draining set. Remove(nodeID string) + + // Update either updates the specification of a draining node or tracks the + // node as draining. Update(node *structs.Node) } +// DrainingJobWatcherFactory returns a new DrainingJobWatcher type DrainingJobWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger) DrainingJobWatcher + +// DrainingNodeWatcherFactory returns a new DrainingNodeWatcher type DrainingNodeWatcherFactory func(context.Context, *rate.Limiter, *state.StateStore, *log.Logger, NodeTracker) DrainingNodeWatcher + +// DrainDeadlineNotifierFactory returns a new DrainDeadlineNotifier type DrainDeadlineNotifierFactory func(context.Context) DrainDeadlineNotifier +// GetDrainingJobWatcher returns a draining job watcher func GetDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *state.StateStore, logger *log.Logger) DrainingJobWatcher { return NewDrainingJobWatcher(ctx, limiter, state, logger) } +// GetDeadlineNotifier returns a node deadline notifier with default coalescing. func GetDeadlineNotifier(ctx context.Context) DrainDeadlineNotifier { return NewDeadlineHeap(ctx, NodeDeadlineCoalesceWindow) } +// GetNodeWatcherFactory returns a DrainingNodeWatcherFactory func GetNodeWatcherFactory() DrainingNodeWatcherFactory { return func(ctx context.Context, limiter *rate.Limiter, state *state.StateStore, logger *log.Logger, tracker NodeTracker) DrainingNodeWatcher { return NewNodeDrainWatcher(ctx, limiter, state, logger, tracker) } } +// allocMigrateBatcher is used to batch allocation updates. type allocMigrateBatcher struct { // updates holds pending client status updates for allocations updates []*structs.Allocation @@ -81,17 +99,24 @@ type allocMigrateBatcher struct { sync.Mutex } +// NodeDrainerConfig is used to configure a new node drainer. type NodeDrainerConfig struct { - Logger *log.Logger - Raft RaftApplier - JobFactory DrainingJobWatcherFactory - NodeFactory DrainingNodeWatcherFactory - DrainDeadlineFactory DrainDeadlineNotifierFactory + Logger *log.Logger + Raft RaftApplier + JobFactory DrainingJobWatcherFactory + NodeFactory DrainingNodeWatcherFactory + DrainDeadlineFactory DrainDeadlineNotifierFactory + + // StateQueriesPerSecond configures the query limit against the state store + // that is allowed by the node drainer. StateQueriesPerSecond float64 - BatchUpdateInterval time.Duration + + // BatchUpdateInterval is the interval in which allocation updates are + // batched. + BatchUpdateInterval time.Duration } -// TODO Add stats +// TODO(alex) Add stats type NodeDrainer struct { enabled bool logger *log.Logger @@ -99,12 +124,16 @@ type NodeDrainer struct { // nodes is the set of draining nodes nodes map[string]*drainingNode + // nodeWatcher watches for nodes to transistion in and out of drain state. nodeWatcher DrainingNodeWatcher nodeFactory DrainingNodeWatcherFactory + // jobWatcher watches draining jobs and emits desired drains and notifies + // when migrations take place. jobWatcher DrainingJobWatcher jobFactory DrainingJobWatcherFactory + // deadlineNotifier notifies when nodes reach their drain deadline. deadlineNotifier DrainDeadlineNotifier deadlineNotifierFactory DrainDeadlineNotifierFactory @@ -127,6 +156,10 @@ type NodeDrainer struct { l sync.RWMutex } +// NewNodeDrainer returns a new new node drainer. The node drainer is +// responsible for marking allocations on draining nodes with a desired +// migration transistion, updating the drain strategy on nodes when they are +// complete and creating evaluations for the system to react to these changes. func NewNodeDrainer(c *NodeDrainerConfig) *NodeDrainer { return &NodeDrainer{ raft: c.Raft, @@ -177,6 +210,8 @@ func (n *NodeDrainer) flush() { n.nodes = make(map[string]*drainingNode, 32) } +// run is a long lived event handler that receives changes from the relevant +// watchers and takes action based on them. func (n *NodeDrainer) run(ctx context.Context) { for { select { @@ -192,6 +227,9 @@ func (n *NodeDrainer) run(ctx context.Context) { } } +// handleDeadlinedNodes handles a set of nodes reaching their drain deadline. +// The handler detects the remaining allocations on the nodes and immediately +// marks them for migration. func (n *NodeDrainer) handleDeadlinedNodes(nodes []string) { // Retrieve the set of allocations that will be force stopped. n.l.RLock() @@ -215,12 +253,18 @@ func (n *NodeDrainer) handleDeadlinedNodes(nodes []string) { n.batchDrainAllocs(forceStop) } +// handleJobAllocDrain handles marking a set of allocations as having a desired +// transistion to drain. The handler blocks till the changes to the allocation +// have occured. func (n *NodeDrainer) handleJobAllocDrain(req *DrainRequest) { // This should be syncronous index, err := n.batchDrainAllocs(req.Allocs) req.Resp.Respond(index, err) } +// handleMigratedAllocs checks to see if any nodes can be considered done +// draining based on the set of allocations that have migrated because of an +// ongoing drain for a job. func (n *NodeDrainer) handleMigratedAllocs(allocs []*structs.Allocation) { // Determine the set of nodes that were effected nodes := make(map[string]struct{}) @@ -251,7 +295,7 @@ func (n *NodeDrainer) handleMigratedAllocs(allocs []*structs.Allocation) { } n.l.RUnlock() - // TODO This should probably be a single Raft transaction + // TODO(alex) This should probably be a single Raft transaction for _, doneNode := range done { index, err := n.raft.NodeDrainComplete(doneNode) if err != nil { @@ -262,6 +306,8 @@ func (n *NodeDrainer) handleMigratedAllocs(allocs []*structs.Allocation) { } } +// batchDrainAllocs is used to batch the draining of allocations. It will block +// until the batch is complete. func (n *NodeDrainer) batchDrainAllocs(allocs []*structs.Allocation) (uint64, error) { // Add this to the batch n.batcher.Lock() @@ -296,8 +342,11 @@ func (n *NodeDrainer) batchDrainAllocs(allocs []*structs.Allocation) (uint64, er return future.Index(), nil } +// drainAllocs is a non batch, marking of the desired transistion to migrate for +// the set of allocations. It will also create the necessary evaluations for the +// affected jobs. func (n *NodeDrainer) drainAllocs(future *structs.BatchFuture, allocs []*structs.Allocation) { - // TODO This should shard to limit the size of the transaction. + // TODO(alex) This should shard to limit the size of the transaction. // Compute the effected jobs and make the transistion map jobs := make(map[string]*structs.Allocation, 4) diff --git a/nomad/drainerv2/draining_node.go b/nomad/drainerv2/draining_node.go index 93b3e5fb31ca..0f13a1b74a77 100644 --- a/nomad/drainerv2/draining_node.go +++ b/nomad/drainerv2/draining_node.go @@ -81,6 +81,7 @@ func (n *drainingNode) IsDone() (bool, error) { return true, nil } +// TODO test that we return the right thing given the strategies // DeadlineAllocs returns the set of allocations that should be drained given a // node is at its deadline func (n *drainingNode) DeadlineAllocs() ([]*structs.Allocation, error) { diff --git a/nomad/drainerv2/watch_jobs.go b/nomad/drainerv2/watch_jobs.go index b0235e6ccda2..0644f347f0f8 100644 --- a/nomad/drainerv2/watch_jobs.go +++ b/nomad/drainerv2/watch_jobs.go @@ -322,6 +322,7 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, var drainable []*structs.Allocation for _, alloc := range allocs { + // TODO Remove at the end/when no more bugs fmt.Printf("--- Looking at alloc %q\n", alloc.ID) // Check if the alloc is on a draining node. diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainerv2/watch_nodes.go index 289767d344e7..34cc7a9c97d3 100644 --- a/nomad/drainerv2/watch_nodes.go +++ b/nomad/drainerv2/watch_nodes.go @@ -78,6 +78,8 @@ func (n *NodeDrainer) Update(node *structs.Node) { n.jobWatcher.RegisterJob(job) } + // TODO Test at this layer as well that a node drain on a node without + // allocs immediately gets unmarked as draining // Check if the node is done such that if an operator drains a node with // nothing on it we unset drain done, err := draining.IsDone() @@ -176,6 +178,8 @@ func (w *nodeDrainWatcher) watch() { default: w.logger.Printf("[TRACE] nomad.drain.node_watcher: node %q at index %v: tracked %v, draining %v", nodeID, node.ModifyIndex, tracked, newDraining) } + + // TODO(schmichael) handle the case of a lost node } for nodeID := range tracked { From bd701979b8f9ee4b7d5c24abc5dd8b7f165c7495 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 7 Mar 2018 16:51:57 -0800 Subject: [PATCH 45/79] spelling fixes --- nomad/drainerv2/drainer.go | 19 +++++++++---------- nomad/drainerv2/watch_jobs.go | 6 +++--- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go index 787f65bfd6aa..3ab57a7c5c22 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainerv2/drainer.go @@ -124,7 +124,7 @@ type NodeDrainer struct { // nodes is the set of draining nodes nodes map[string]*drainingNode - // nodeWatcher watches for nodes to transistion in and out of drain state. + // nodeWatcher watches for nodes to transition in and out of drain state. nodeWatcher DrainingNodeWatcher nodeFactory DrainingNodeWatcherFactory @@ -158,7 +158,7 @@ type NodeDrainer struct { // NewNodeDrainer returns a new new node drainer. The node drainer is // responsible for marking allocations on draining nodes with a desired -// migration transistion, updating the drain strategy on nodes when they are +// migration transition, updating the drain strategy on nodes when they are // complete and creating evaluations for the system to react to these changes. func NewNodeDrainer(c *NodeDrainerConfig) *NodeDrainer { return &NodeDrainer{ @@ -254,10 +254,9 @@ func (n *NodeDrainer) handleDeadlinedNodes(nodes []string) { } // handleJobAllocDrain handles marking a set of allocations as having a desired -// transistion to drain. The handler blocks till the changes to the allocation -// have occured. +// transition to drain. The handler blocks till the changes to the allocation +// have occurred. func (n *NodeDrainer) handleJobAllocDrain(req *DrainRequest) { - // This should be syncronous index, err := n.batchDrainAllocs(req.Allocs) req.Resp.Respond(index, err) } @@ -342,17 +341,17 @@ func (n *NodeDrainer) batchDrainAllocs(allocs []*structs.Allocation) (uint64, er return future.Index(), nil } -// drainAllocs is a non batch, marking of the desired transistion to migrate for +// drainAllocs is a non batch, marking of the desired transition to migrate for // the set of allocations. It will also create the necessary evaluations for the // affected jobs. func (n *NodeDrainer) drainAllocs(future *structs.BatchFuture, allocs []*structs.Allocation) { // TODO(alex) This should shard to limit the size of the transaction. - // Compute the effected jobs and make the transistion map + // Compute the effected jobs and make the transition map jobs := make(map[string]*structs.Allocation, 4) - transistions := make(map[string]*structs.DesiredTransition, len(allocs)) + transitions := make(map[string]*structs.DesiredTransition, len(allocs)) for _, alloc := range allocs { - transistions[alloc.ID] = &structs.DesiredTransition{ + transitions[alloc.ID] = &structs.DesiredTransition{ Migrate: helper.BoolToPtr(true), } jobs[alloc.JobID] = alloc @@ -372,6 +371,6 @@ func (n *NodeDrainer) drainAllocs(future *structs.BatchFuture, allocs []*structs } // Commit this update via Raft - index, err := n.raft.AllocUpdateDesiredTransition(transistions, evals) + index, err := n.raft.AllocUpdateDesiredTransition(transitions, evals) future.Respond(index, err) } diff --git a/nomad/drainerv2/watch_jobs.go b/nomad/drainerv2/watch_jobs.go index 0644f347f0f8..0f5b04872869 100644 --- a/nomad/drainerv2/watch_jobs.go +++ b/nomad/drainerv2/watch_jobs.go @@ -34,7 +34,7 @@ type DrainingJobWatcher interface { // Drain is used to emit allocations that should be drained. Drain() <-chan *DrainRequest - // Migrated is allocations for draining jobs that have transistioned to + // Migrated is allocations for draining jobs that have transitioned to // stop. There is no guarantee that duplicates won't be published. Migrated() <-chan []*structs.Allocation } @@ -224,7 +224,7 @@ func (w *drainingJobWatcher) watch() { return } - // Wait for the request to be commited + // Wait for the request to be committed select { case <-req.Resp.WaitCh(): case <-w.ctx.Done(): @@ -234,7 +234,7 @@ func (w *drainingJobWatcher) watch() { // See if it successfully committed if err := req.Resp.Error(); err != nil { - w.logger.Printf("[ERR] nomad.drain.job_watcher: failed to transistion allocations: %v", err) + w.logger.Printf("[ERR] nomad.drain.job_watcher: failed to transition allocations: %v", err) } // Wait until the new index From 5b36af986005422dac47b39537a832b27602c360 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 8 Mar 2018 13:25:09 -0800 Subject: [PATCH 46/79] code review --- nomad/drainerv2/drain_heap.go | 36 +++++++++++++++-------------------- nomad/drainerv2/drainer.go | 29 ++++++++++++++-------------- nomad/drainerv2/watch_jobs.go | 4 ++-- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/nomad/drainerv2/drain_heap.go b/nomad/drainerv2/drain_heap.go index b661447e2b12..efde8a92d380 100644 --- a/nomad/drainerv2/drain_heap.go +++ b/nomad/drainerv2/drain_heap.go @@ -31,8 +31,8 @@ type deadlineHeap struct { coalesceWindow time.Duration batch chan []string nodes map[string]time.Time - trigger chan string - l sync.RWMutex + trigger chan struct{} + mu sync.Mutex } // NewDeadlineHeap returns a new deadline heap that coalesces for the given @@ -41,9 +41,9 @@ func NewDeadlineHeap(ctx context.Context, coalesceWindow time.Duration) *deadlin d := &deadlineHeap{ ctx: ctx, coalesceWindow: coalesceWindow, - batch: make(chan []string, 4), + batch: make(chan []string), nodes: make(map[string]time.Time, 64), - trigger: make(chan string, 4), + trigger: make(chan struct{}), } go d.watch() @@ -71,17 +71,18 @@ func (d *deadlineHeap) watch() { continue } - d.l.Lock() + d.mu.Lock() var batch []string for nodeID, nodeDeadline := range d.nodes { if !nodeDeadline.After(nextDeadline) { batch = append(batch, nodeID) + delete(d.nodes, nodeID) } } + d.mu.Unlock() // If there is nothing exit early if len(batch) == 0 { - d.l.Unlock() goto CALC } @@ -89,15 +90,8 @@ func (d *deadlineHeap) watch() { select { case d.batch <- batch: case <-d.ctx.Done(): - d.l.Unlock() return } - - // Clean up the nodes - for _, nodeID := range batch { - delete(d.nodes, nodeID) - } - d.l.Unlock() case <-d.trigger: } @@ -117,8 +111,8 @@ func (d *deadlineHeap) watch() { // calculateNextDeadline returns the next deadline in which to scan for // deadlined nodes. It applies the coalesce window. func (d *deadlineHeap) calculateNextDeadline() (time.Time, bool) { - d.l.Lock() - defer d.l.Unlock() + d.mu.Lock() + defer d.mu.Unlock() if len(d.nodes) == 0 { return time.Time{}, false @@ -151,23 +145,23 @@ func (d *deadlineHeap) NextBatch() <-chan []string { } func (d *deadlineHeap) Remove(nodeID string) { - d.l.Lock() - defer d.l.Unlock() + d.mu.Lock() + defer d.mu.Unlock() delete(d.nodes, nodeID) select { - case d.trigger <- nodeID: + case d.trigger <- struct{}{}: default: } } func (d *deadlineHeap) Watch(nodeID string, deadline time.Time) { - d.l.Lock() - defer d.l.Unlock() + d.mu.Lock() + defer d.mu.Unlock() d.nodes[nodeID] = deadline select { - case d.trigger <- nodeID: + case d.trigger <- struct{}{}: default: } } diff --git a/nomad/drainerv2/drainer.go b/nomad/drainerv2/drainer.go index 3ab57a7c5c22..b5842559d0b5 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainerv2/drainer.go @@ -116,7 +116,8 @@ type NodeDrainerConfig struct { BatchUpdateInterval time.Duration } -// TODO(alex) Add stats +// NodeDrainer is used to orchestrate migrating allocations off of draining +// nodes. type NodeDrainer struct { enabled bool logger *log.Logger @@ -180,29 +181,29 @@ func (n *NodeDrainer) SetEnabled(enabled bool, state *state.StateStore) { n.l.Lock() defer n.l.Unlock() - wasEnabled := n.enabled + // If we are starting now or have a new state, init state and start the + // run loop n.enabled = enabled - - if state != nil { - n.state = state - } - - // Flush the state to create the necessary objects - n.flush() - - // If we are starting now, launch the watch daemon - if enabled && !wasEnabled { + if enabled { + n.flush(state) go n.run(n.ctx) + } else if !enabled && n.exitFn != nil { + n.exitFn() } } // flush is used to clear the state of the watcher -func (n *NodeDrainer) flush() { - // Kill everything associated with the watcher +func (n *NodeDrainer) flush(state *state.StateStore) { + // Cancel anything that may be running. if n.exitFn != nil { n.exitFn() } + // Store the new state + if state != nil { + n.state = state + } + n.ctx, n.exitFn = context.WithCancel(context.Background()) n.jobWatcher = n.jobFactory(n.ctx, n.queryLimiter, n.state, n.logger) n.nodeWatcher = n.nodeFactory(n.ctx, n.queryLimiter, n.state, n.logger, n) diff --git a/nomad/drainerv2/watch_jobs.go b/nomad/drainerv2/watch_jobs.go index 0f5b04872869..3a28f647ceff 100644 --- a/nomad/drainerv2/watch_jobs.go +++ b/nomad/drainerv2/watch_jobs.go @@ -81,8 +81,8 @@ func NewDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *st logger: logger, state: state, jobs: make(map[structs.JobNs]struct{}, 64), - drainCh: make(chan *DrainRequest, 8), - migratedCh: make(chan []*structs.Allocation, 8), + drainCh: make(chan *DrainRequest), + migratedCh: make(chan []*structs.Allocation), } go w.watch() From d15371405fb356a0fc244c4cf150d1b97d46e52b Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Thu, 8 Mar 2018 11:06:30 -0800 Subject: [PATCH 47/79] Toggle Drain allows resetting eligibility This PR allows marking a node as eligible for scheduling while toggling drain. By default the `nomad node drain -disable` commmand will mark it as eligible but the drainer will maintain in-eligibility. --- api/nodes.go | 14 +++++++---- api/nodes_test.go | 7 ++++-- command/agent/node_endpoint.go | 3 ++- command/node_drain.go | 27 ++++++++++++++-------- nomad/fsm.go | 2 +- nomad/node_endpoint_test.go | 2 +- nomad/state/state_store.go | 5 +++- nomad/state/state_store_test.go | 41 +++++++++++++++++++++++++++++++-- nomad/structs/structs.go | 3 +++ 9 files changed, 82 insertions(+), 22 deletions(-) diff --git a/api/nodes.go b/api/nodes.go index a505d9ae369f..9261528544f8 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -51,13 +51,19 @@ type NodeUpdateDrainRequest struct { // DrainSpec is the drain specification to set for the node. A nil DrainSpec // will disable draining. DrainSpec *DrainSpec + + // MarkEligible marks the node as eligible if removing the drain strategy. + MarkEligible bool } -// UpdateDrain is used to update the drain strategy for a given node. -func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, q *WriteOptions) (*WriteMeta, error) { +// UpdateDrain is used to update the drain strategy for a given node. If +// markEligible is true and the drain is being removed, the node will be marked +// as having its scheduling being elibile +func (n *Nodes) UpdateDrain(nodeID string, spec *DrainSpec, markEligible bool, q *WriteOptions) (*WriteMeta, error) { req := &NodeUpdateDrainRequest{ - NodeID: nodeID, - DrainSpec: spec, + NodeID: nodeID, + DrainSpec: spec, + MarkEligible: markEligible, } wm, err := n.client.write("/v1/node/"+nodeID+"/drain", req, nil, q) diff --git a/api/nodes_test.go b/api/nodes_test.go index 22d61c4011af..d2b02b82c243 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -177,7 +177,7 @@ func TestNodes_ToggleDrain(t *testing.T) { spec := &DrainSpec{ Deadline: 10 * time.Second, } - wm, err := nodes.UpdateDrain(nodeID, spec, nil) + wm, err := nodes.UpdateDrain(nodeID, spec, false, nil) if err != nil { t.Fatalf("err: %s", err) } @@ -193,7 +193,7 @@ func TestNodes_ToggleDrain(t *testing.T) { } // Toggle off again - wm, err = nodes.UpdateDrain(nodeID, nil, nil) + wm, err = nodes.UpdateDrain(nodeID, nil, true, nil) if err != nil { t.Fatalf("err: %s", err) } @@ -210,6 +210,9 @@ func TestNodes_ToggleDrain(t *testing.T) { if out.DrainStrategy != nil { t.Fatalf("drain strategy should be unset") } + if out.SchedulingEligibility != structs.NodeSchedulingEligible { + t.Fatalf("should be eligible") + } } func TestNodes_ToggleEligibility(t *testing.T) { diff --git a/command/agent/node_endpoint.go b/command/agent/node_endpoint.go index a86df751c1ab..bad4fc445b4d 100644 --- a/command/agent/node_endpoint.go +++ b/command/agent/node_endpoint.go @@ -132,7 +132,8 @@ func (s *HTTPServer) nodeToggleDrain(resp http.ResponseWriter, req *http.Request } args := structs.NodeUpdateDrainRequest{ - NodeID: nodeID, + NodeID: nodeID, + MarkEligible: drainRequest.MarkEligible, } if drainRequest.DrainSpec != nil { args.DrainStrategy = &structs.DrainStrategy{ diff --git a/command/node_drain.go b/command/node_drain.go index f6475c7bedd4..9d8326d472a0 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -56,6 +56,11 @@ Node Drain Options: Ignore system allows the drain to complete without stopping system job allocations. By default system jobs are stopped last. + -keep-ineligible + Keep ineligible will maintain the node's scheduling ineligibility even if + the drain is being disabled. This is useful when an existing drain is being + cancelled but additional scheduling on the node is not desired. + -self Set the drain status of the local node. @@ -72,14 +77,15 @@ func (c *NodeDrainCommand) Synopsis() string { func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), complete.Flags{ - "-disable": complete.PredictNothing, - "-enable": complete.PredictNothing, - "-deadline": complete.PredictAnything, - "-force": complete.PredictNothing, - "-no-deadline": complete.PredictNothing, - "-ignore-system": complete.PredictNothing, - "-self": complete.PredictNothing, - "-yes": complete.PredictNothing, + "-disable": complete.PredictNothing, + "-enable": complete.PredictNothing, + "-deadline": complete.PredictAnything, + "-force": complete.PredictNothing, + "-no-deadline": complete.PredictNothing, + "-ignore-system": complete.PredictNothing, + "-keep-ineligible": complete.PredictNothing, + "-self": complete.PredictNothing, + "-yes": complete.PredictNothing, }) } @@ -100,7 +106,7 @@ func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor { func (c *NodeDrainCommand) Run(args []string) int { var enable, disable, force, - noDeadline, ignoreSystem, self, autoYes bool + noDeadline, ignoreSystem, keepIneligible, self, autoYes bool var deadline string flags := c.Meta.FlagSet("node-drain", FlagSetClient) @@ -111,6 +117,7 @@ func (c *NodeDrainCommand) Run(args []string) int { flags.BoolVar(&force, "force", false, "Force immediate drain") flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline") flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node") + flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility") flags.BoolVar(&self, "self", false, "") flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.") @@ -252,7 +259,7 @@ func (c *NodeDrainCommand) Run(args []string) int { } // Toggle node draining - if _, err := client.Nodes().UpdateDrain(node.ID, spec, nil); err != nil { + if _, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil); err != nil { c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err)) return 1 } diff --git a/nomad/fsm.go b/nomad/fsm.go index b377f09b3fef..bc52f256e343 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -330,7 +330,7 @@ func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy); err != nil { + if err := n.state.UpdateNodeDrain(index, req.NodeID, req.DrainStrategy, req.MarkEligible); err != nil { n.logger.Printf("[ERR] nomad.fsm: UpdateNodeDrain failed: %v", err) return err } diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 3d98a942f52b..0a18f937cb17 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -2470,7 +2470,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { Deadline: 10 * time.Second, }, } - if err := state.UpdateNodeDrain(3, node.ID, s); err != nil { + if err := state.UpdateNodeDrain(3, node.ID, s, false); err != nil { t.Fatalf("err: %v", err) } }) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index ef6a51754167..5f4564001135 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -618,7 +618,8 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error } // UpdateNodeDrain is used to update the drain of a node -func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs.DrainStrategy) error { +func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, + drain *structs.DrainStrategy, markEligible bool) error { txn := s.db.Txn(true) defer txn.Abort() @@ -641,6 +642,8 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs copyNode.DrainStrategy = drain if drain != nil { copyNode.SchedulingEligibility = structs.NodeSchedulingIneligible + } else if markEligible { + copyNode.SchedulingEligibility = structs.NodeSchedulingEligible } copyNode.ModifyIndex = index diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 1bf1467deda5..7eeb4672e212 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -716,7 +716,7 @@ func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { }, } - require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain)) + require.Nil(state.UpdateNodeDrain(1001, node.ID, expectedDrain, false)) require.True(watchFired(ws)) ws = memdb.NewWatchSet() @@ -822,6 +822,43 @@ func TestStateStore_NodeEvents_RetentionWindow(t *testing.T) { require.Equal(uint64(20), out.Events[len(out.Events)-1].CreateIndex) } +func TestStateStore_UpdateNodeDrain_ResetEligiblity(t *testing.T) { + require := require.New(t) + state := testStateStore(t) + node := mock.Node() + require.Nil(state.UpsertNode(1000, node)) + + // Create a watchset so we can test that update node drain fires the watch + ws := memdb.NewWatchSet() + _, err := state.NodeByID(ws, node.ID) + require.Nil(err) + + drain := &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: -1 * time.Second, + }, + } + + require.Nil(state.UpdateNodeDrain(1001, node.ID, drain, false)) + require.True(watchFired(ws)) + + // Remove the drain + require.Nil(state.UpdateNodeDrain(1002, node.ID, nil, true)) + + ws = memdb.NewWatchSet() + out, err := state.NodeByID(ws, node.ID) + require.Nil(err) + require.False(out.Drain) + require.Nil(out.DrainStrategy) + require.Equal(out.SchedulingEligibility, structs.NodeSchedulingEligible) + require.EqualValues(1002, out.ModifyIndex) + + index, err := state.Index("nodes") + require.Nil(err) + require.EqualValues(1002, index) + require.False(watchFired(ws)) +} + func TestStateStore_UpdateNodeEligibility(t *testing.T) { require := require.New(t) state := testStateStore(t) @@ -860,7 +897,7 @@ func TestStateStore_UpdateNodeEligibility(t *testing.T) { Deadline: -1 * time.Second, }, } - require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain)) + require.Nil(state.UpdateNodeDrain(1002, node.ID, expectedDrain, false)) // Try to set the node to eligible err = state.UpdateNodeEligibility(1003, node.ID, structs.NodeSchedulingEligible) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index f85a4bf48c3b..04c073946be5 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -308,6 +308,9 @@ type NodeUpdateDrainRequest struct { NodeID string Drain bool // TODO Deprecate DrainStrategy *DrainStrategy + + // MarkEligible marks the node as eligible if removing the drain strategy. + MarkEligible bool WriteRequest } From efb6601129d61b11df63ea779f22606c98ad94f6 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Thu, 8 Mar 2018 15:08:23 -0800 Subject: [PATCH 48/79] Switch to drainerv2 impl --- nomad/drainer/drain.go | 590 ------------------ nomad/{drainerv2 => drainer}/drain_heap.go | 2 +- .../{drainerv2 => drainer}/drain_heap_test.go | 2 +- nomad/drainer/drain_test.go | 491 --------------- nomad/{drainerv2 => drainer}/drain_testing.go | 2 +- nomad/{drainerv2 => drainer}/drainer.go | 2 +- nomad/{drainerv2 => drainer}/draining_node.go | 2 +- nomad/drainer/job_watcher.go | 140 ----- nomad/drainer/node_watcher.go | 121 ---- nomad/{drainerv2 => drainer}/watch_jobs.go | 2 +- .../{drainerv2 => drainer}/watch_jobs_test.go | 2 +- nomad/{drainerv2 => drainer}/watch_nodes.go | 2 +- .../watch_nodes_test.go | 6 +- nomad/server.go | 18 +- 14 files changed, 20 insertions(+), 1362 deletions(-) delete mode 100644 nomad/drainer/drain.go rename nomad/{drainerv2 => drainer}/drain_heap.go (99%) rename nomad/{drainerv2 => drainer}/drain_heap_test.go (99%) delete mode 100644 nomad/drainer/drain_test.go rename nomad/{drainerv2 => drainer}/drain_testing.go (98%) rename nomad/{drainerv2 => drainer}/drainer.go (99%) rename nomad/{drainerv2 => drainer}/draining_node.go (99%) delete mode 100644 nomad/drainer/job_watcher.go delete mode 100644 nomad/drainer/node_watcher.go rename nomad/{drainerv2 => drainer}/watch_jobs.go (99%) rename nomad/{drainerv2 => drainer}/watch_jobs_test.go (99%) rename nomad/{drainerv2 => drainer}/watch_nodes.go (99%) rename nomad/{drainerv2 => drainer}/watch_nodes_test.go (97%) diff --git a/nomad/drainer/drain.go b/nomad/drainer/drain.go deleted file mode 100644 index 8db56ac7dacc..000000000000 --- a/nomad/drainer/drain.go +++ /dev/null @@ -1,590 +0,0 @@ -package drainer - -import ( - "context" - "log" - "strings" - "time" - - "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/nomad/state" - "github.com/hashicorp/nomad/nomad/structs" -) - -// jobKey is a tuple of namespace+jobid for use as a map key by job -type jobKey struct { - ns string - jobid string -} - -// runningJob contains the Job and allocations for that job meant to be used -// when collecting all allocations for a job with at least one allocation on a -// draining node. -// -// In order to drain an allocation we must also emit an evaluation for its job, -// so this struct bundles allocations with their job. -type runningJob struct { - job *structs.Job - allocs []*structs.Allocation -} - -// collectResult is the state collected by scanning for drain eligible allocs -type collectResult struct { - // drainableSvcs contains all service jobs and allocs that are - // potentially drainable meaning they have at least one allocation on a - // draining node. - drainableSvcs map[jobKey]*runningJob - - // drainNow contains all batch and system jobs that should be - // immediately drained due to a deadline or in the case of system jobs: - // all other allocs on the node have completed draining. - drainNow map[jobKey]*runningJob - - // upPerTG is a count of running allocs per task group for the - // migration mark phase to use when considering how many allocs can be - // migrated for a given group. - upPerTG map[string]int - - // doneNodes need no coordinating to finish their drain. Either all - // allocs have drained, the node is being force drained, or the drain - // deadline was hit. Any remaining allocs will be migrated via - // drainNow. - doneNodes map[string]*structs.Node -} - -// makeTaskGroupKey returns a unique key for an allocation's task group -func makeTaskGroupKey(a *structs.Allocation) string { - return strings.Join([]string{a.Namespace, a.JobID, a.TaskGroup}, "-") -} - -// stopAllocs tracks allocs to drain by a unique TG key along with their jobs -// as we need to emit evaluations for each allocations job -type stopAllocs struct { - allocBatch map[string]*structs.DesiredTransition - - // namespace+jobid -> Job - jobBatch map[jobKey]*structs.Job -} - -// newStopAllocs creates a list of allocs to migrate from an initial list of -// running jobs+allocs that need immediate draining. -func newStopAllocs(initial map[jobKey]*runningJob) *stopAllocs { - s := &stopAllocs{ - allocBatch: make(map[string]*structs.DesiredTransition), - jobBatch: make(map[jobKey]*structs.Job), - } - - // Add initial allocs - for _, drainingJob := range initial { - for _, a := range drainingJob.allocs { - s.add(drainingJob.job, a) - } - } - return s -} - -// add an allocation to be migrated. Its job must also be specified in order to -// emit an evaluation. -func (s *stopAllocs) add(j *structs.Job, a *structs.Allocation) { - // Add the desired migration transition to the batch - s.allocBatch[a.ID] = &structs.DesiredTransition{ - Migrate: helper.BoolToPtr(true), - } - - // Add job to the job batch - s.jobBatch[jobKey{a.Namespace, a.JobID}] = j -} - -// RaftApplier contains methods for applying the raft requests required by the -// NodeDrainer. -type RaftApplier interface { - AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) error - NodeDrainComplete(nodeID string) error -} - -// nodeDrainerState is used to communicate the state set by -// NodeDrainer.SetEnabled to the concurrently executing Run loop. -type nodeDrainerState struct { - enabled bool - state *state.StateStore -} - -// NodeDrainer migrates allocations off of draining nodes. SetEnabled(true) -// should be called when a server establishes leadership and SetEnabled(false) -// called when leadership is lost. -type NodeDrainer struct { - // enabledCh is used by SetEnabled to signal Run when to start/stop the - // nodeDrainer goroutine - enabledCh chan nodeDrainerState - - // raft is a shim around the raft messages necessary for draining - raft RaftApplier - - // shutdownCh is closed when the Server is shutting down the - // NodeDrainer should permanently exit - shutdownCh <-chan struct{} - - logger *log.Logger -} - -// NewNodeDrainer creates a new NodeDrainer which will exit when shutdownCh is -// closed. A RaftApplier shim must be supplied to allow NodeDrainer access to -// the raft messages it sends. -func NewNodeDrainer(logger *log.Logger, shutdownCh <-chan struct{}, raft RaftApplier) *NodeDrainer { - return &NodeDrainer{ - enabledCh: make(chan nodeDrainerState), - raft: raft, - shutdownCh: shutdownCh, - logger: logger, - } -} - -// SetEnabled will start or stop the node draining goroutine depending on the -// enabled boolean. SetEnabled is meant to be called concurrently with Run. -func (n *NodeDrainer) SetEnabled(enabled bool, state *state.StateStore) { - select { - case n.enabledCh <- nodeDrainerState{enabled, state}: - case <-n.shutdownCh: - } -} - -// Run monitors the shutdown chan as well as SetEnabled calls and starts/stops -// the node draining goroutine appropriately. As it blocks it should be called -// in a goroutine. -func (n *NodeDrainer) Run() { - running := false - var s nodeDrainerState - ctx, cancel := context.WithCancel(context.Background()) - for { - select { - case s = <-n.enabledCh: - case <-n.shutdownCh: - // Stop drainer and exit - cancel() - return - } - - switch { - case s.enabled && running: - // Already running, must restart to ensure the latest StateStore is used - cancel() - ctx, cancel = context.WithCancel(context.Background()) - go n.nodeDrainer(ctx, s.state) - - case !s.enabled && !running: - // Already stopped; nothing to do - - case !s.enabled && running: - // Stop running node drainer - cancel() - running = false - - case s.enabled && !running: - // Start running node drainer - ctx, cancel = context.WithCancel(context.Background()) - go n.nodeDrainer(ctx, s.state) - running = true - } - } -} - -// getNextDeadline is a helper that takes a set of draining nodes and returns the -// next deadline. It also returns a boolean if there is a deadline. -func getNextDeadline(nodes map[string]*structs.Node) (time.Time, bool) { - var nextDeadline time.Time - found := false - for _, node := range nodes { - inf, d := node.DrainStrategy.DeadlineTime() - if !inf && (nextDeadline.IsZero() || d.Before(nextDeadline)) { - nextDeadline = d - found = true - } - } - - return nextDeadline, found -} - -// nodeDrainer is the core node draining main loop and should be started in a -// goroutine when a server establishes leadership. -func (n *NodeDrainer) nodeDrainer(ctx context.Context, state *state.StateStore) { - nodes, nodesIndex, drainingJobs, allocsIndex := initDrainer(n.logger, state) - - // Wait for a node's drain deadline to expire - nextDeadline, ok := getNextDeadline(nodes) - deadlineTimer := time.NewTimer(time.Until(nextDeadline)) - stopDeadlineTimer := func() { - if !deadlineTimer.Stop() { - select { - case <-deadlineTimer.C: - default: - } - } - } - if !ok { - stopDeadlineTimer() - } - - // Watch for nodes to start or stop draining - nodeWatcher := newNodeWatcher(n.logger, nodes, nodesIndex, state) - go nodeWatcher.run(ctx) - - // Watch for drained allocations to be replaced - // Watch for changes in allocs for jobs with allocs on draining nodes - jobWatcher := newJobWatcher(n.logger, drainingJobs, allocsIndex, state) - go jobWatcher.run(ctx) - - for { - n.logger.Printf("[TRACE] nomad.drain: LOOP next deadline: %s (%s)", nextDeadline, time.Until(nextDeadline)) - select { - case nodes = <-nodeWatcher.nodesCh: - // update draining nodes - n.logger.Printf("[TRACE] nomad.drain: running due to node change (%d nodes draining)", len(nodes)) - - d, ok := getNextDeadline(nodes) - if ok && !nextDeadline.Equal(d) { - nextDeadline = d - n.logger.Printf("[TRACE] nomad.drain: new node deadline: %s", nextDeadline) - stopDeadlineTimer() - deadlineTimer.Reset(time.Until(nextDeadline)) - } else if !ok { - stopDeadlineTimer() - } - - case jobs := <-jobWatcher.WaitCh(): - n.logger.Printf("[TRACE] nomad.drain: running due to alloc change (%d jobs updated)", len(jobs)) - case when := <-deadlineTimer.C: - // deadline for a node was reached - n.logger.Printf("[TRACE] nomad.drain: running due to deadline reached (at %s)", when) - case <-ctx.Done(): - // exit - return - } - - // Capture state (statestore and time) to do consistent comparisons - snapshot, err := state.Snapshot() - if err != nil { - //FIXME - panic(err) - } - now := time.Now() - - // Collect all drainable jobs - result, err := n.collectDrainable(nodes, snapshot, jobWatcher, now) - if err != nil { - //FIXME - panic(err) - } - - // stoplist are the allocations to migrate and their jobs to emit - // evaluations for. Initialized with allocations that should be - // immediately drained regardless of MaxParallel - stoplist := newStopAllocs(result.drainNow) - - // build drain list considering deadline & max_parallel - n.markMigrations(stoplist, result.upPerTG, result.drainableSvcs, nodes, now) - - if len(stoplist.allocBatch) > 0 { - if err := n.applyMigrations(stoplist); err != nil { - //FIXME - panic(err) - } - } - - // Unset drain for nodes done draining - for nodeID, node := range result.doneNodes { - if err := n.raft.NodeDrainComplete(nodeID); err != nil { - n.logger.Printf("[ERR] nomad.drain: failed to unset drain for: %v", err) - //FIXME - panic(err) - } - n.logger.Printf("[INFO] nomad.drain: node %s (%s) completed draining", nodeID, node.Name) - delete(nodes, nodeID) - } - } -} - -// collectDrainable scans all nodes and allocs on draining nodes and builds a -// structure of eligible allocs to drain. -func (n *NodeDrainer) collectDrainable(nodes map[string]*structs.Node, state *state.StateSnapshot, - jobWatcher *jobWatcher, now time.Time) (*collectResult, error) { - - svcs := map[jobKey]*runningJob{} - drainNow := map[jobKey]*runningJob{} - upPerTG := map[string]int{} - doneNodes := map[string]*structs.Node{} - - for nodeID, node := range nodes { - allocs, err := state.AllocsByNode(nil, nodeID) - if err != nil { - return nil, err - } - - // drainableSys are allocs for system jobs that should be - // drained if there are no other allocs left - drainableSys := map[jobKey]*runningJob{} - - // track number of allocs left on this node to be drained - allocsLeft := false - inf, deadline := node.DrainStrategy.DeadlineTime() - deadlineReached := !inf && deadline.Before(now) - for _, alloc := range allocs { - // Don't need to consider drained allocs - if alloc.TerminalStatus() { - continue - } - - jobkey := jobKey{alloc.Namespace, alloc.JobID} - - // job does not found yet - job, err := state.JobByID(nil, alloc.Namespace, alloc.JobID) - if err != nil { - return nil, err - } - - // IgnoreSystemJobs if specified in the node's DrainStrategy - if node.DrainStrategy.IgnoreSystemJobs && job.Type == structs.JobTypeSystem { - continue - } - - // When the node deadline is reached all batch - // and service jobs will be drained - if deadlineReached && job.Type != structs.JobTypeService { - n.logger.Printf("[TRACE] nomad.drain: draining alloc %s due to node %s reaching drain deadline", alloc.ID, node.ID) - if j, ok := drainNow[jobkey]; ok { - j.allocs = append(j.allocs, alloc) - } else { - // First alloc for this job, create entry - drainNow[jobkey] = &runningJob{ - job: job, - allocs: []*structs.Allocation{alloc}, - } - } - continue - } - - // If deadline hasn't been reached, system jobs - // may still be drained if there are no other - // allocs left - if !deadlineReached && job.Type == structs.JobTypeSystem { - n.logger.Printf("[TRACE] nomad.drain: system alloc %s will be drained if no other allocs on node %s", alloc.ID, node.ID) - if j, ok := drainableSys[jobkey]; ok { - j.allocs = append(j.allocs, alloc) - } else { - // First alloc for this job, create entry - drainableSys[jobkey] = &runningJob{ - job: job, - allocs: []*structs.Allocation{alloc}, - } - } - continue - } - - // This alloc is still running on a draining - // node, so treat the node as having allocs - // remaining - allocsLeft = true - - jobAllocs, err := state.AllocsByJob(nil, alloc.Namespace, alloc.JobID, true) - if err != nil { - return nil, err - } - - // Count the number of down (terminal or nil deployment status) per task group - if job.Type == structs.JobTypeService { - num := 0 - for _, a := range jobAllocs { - if !a.TerminalStatus() && a.DeploymentStatus != nil { - // Not terminal and health updated, count it as up! - upPerTG[makeTaskGroupKey(a)]++ - num++ - } - } - n.logger.Printf("[TRACE] nomad.drain: job %s has %d allocs running", job.Name, num) - } - - svcs[jobkey] = &runningJob{ - job: job, - allocs: jobAllocs, - } - - jobWatcher.watch(jobkey, nodeID) - } - - // if node has no allocs or has hit its deadline, it's done draining! - if !allocsLeft || deadlineReached { - n.logger.Printf("[TRACE] nomad.drain: node %s has no more allocs left to drain or has reached deadline", nodeID) - jobWatcher.nodeDone(nodeID) - doneNodes[nodeID] = node - - // Add all system jobs on this node to the drainNow slice - for k, sysj := range drainableSys { - if j, ok := drainNow[k]; ok { - // Job already has at least one alloc draining, append this one - j.allocs = append(j.allocs, sysj.allocs...) - } else { - // First draining alloc for this job, add the entry - drainNow[k] = sysj - } - } - } - } - - result := &collectResult{ - drainableSvcs: svcs, - drainNow: drainNow, - upPerTG: upPerTG, - doneNodes: doneNodes, - } - return result, nil -} - -// markMigrations marks services to be drained for migration in the stoplist. -func (n *NodeDrainer) markMigrations(stoplist *stopAllocs, upPerTG map[string]int, drainable map[jobKey]*runningJob, nodes map[string]*structs.Node, now time.Time) { - for _, drainingJob := range drainable { - for _, alloc := range drainingJob.allocs { - // Already draining/dead allocs don't need to be drained - if alloc.TerminalStatus() { - continue - } - - node, ok := nodes[alloc.NodeID] - if !ok { - // Alloc's node is not draining so not elligible for draining! - continue - } - - tgKey := makeTaskGroupKey(alloc) - - if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) { - n.logger.Printf("[TRACE] nomad.drain: draining job %s alloc %s from node %s due to node's drain deadline", drainingJob.job.Name, alloc.ID[:6], alloc.NodeID[:6]) - // Alloc's Node has reached its deadline - stoplist.add(drainingJob.job, alloc) - upPerTG[tgKey]-- - - continue - } - - // Stop allocs with count=1, max_parallel==0, or draining node.ID - jobs := map[jobKey]string{} - - for { - raw := iter.Next() - if raw == nil { - break - } - - // Filter on datacenter and status - node := raw.(*structs.Node) - if !node.Drain { - continue - } - - // Track draining node - nodes[node.ID] = node - - // No point in tracking draining allocs as the deadline has been reached - if inf, d := node.DrainStrategy.DeadlineTime(); !inf && d.Before(now) { - continue - } - - allocs, err := snapshot.AllocsByNode(nil, node.ID) - if err != nil { - logger.Printf("[ERR] nomad.drain: error iterating allocs for node %q: %v", node.ID, err) - panic(err) //FIXME - } - - for _, alloc := range allocs { - jobs[jobKey{alloc.Namespace, alloc.JobID}] = node.ID - } - } - - nodesIndex, _ := snapshot.Index("nodes") - if nodesIndex == 0 { - nodesIndex = 1 - } - allocsIndex, _ := snapshot.Index("allocs") - if allocsIndex == 0 { - allocsIndex = 1 - } - return nodes, nodesIndex, jobs, allocsIndex -} diff --git a/nomad/drainerv2/drain_heap.go b/nomad/drainer/drain_heap.go similarity index 99% rename from nomad/drainerv2/drain_heap.go rename to nomad/drainer/drain_heap.go index efde8a92d380..1a6c23f13cf9 100644 --- a/nomad/drainerv2/drain_heap.go +++ b/nomad/drainer/drain_heap.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "context" diff --git a/nomad/drainerv2/drain_heap_test.go b/nomad/drainer/drain_heap_test.go similarity index 99% rename from nomad/drainerv2/drain_heap_test.go rename to nomad/drainer/drain_heap_test.go index a47a98ff7473..147ad9192eff 100644 --- a/nomad/drainerv2/drain_heap_test.go +++ b/nomad/drainer/drain_heap_test.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "context" diff --git a/nomad/drainer/drain_test.go b/nomad/drainer/drain_test.go deleted file mode 100644 index 993a65fcd0ed..000000000000 --- a/nomad/drainer/drain_test.go +++ /dev/null @@ -1,491 +0,0 @@ -package drainer_test - -import ( - "fmt" - "net" - "net/rpc" - "sort" - "strings" - "testing" - "time" - - "github.com/hashicorp/nomad/client" - "github.com/hashicorp/nomad/client/config" - "github.com/hashicorp/nomad/helper/pool" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/nomad" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/testutil" - "github.com/hashicorp/nomad/testutil/rpcapi" - "github.com/kr/pretty" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -// rpcClient is a test helper method to return a ClientCodec to use to make rpc -// calls to the passed server. -func rpcClient(t *testing.T, conf *nomad.Config) rpc.ClientCodec { - addr := conf.RPCAddr - conn, err := net.DialTimeout("tcp", addr.String(), time.Second) - if err != nil { - t.Fatalf("err: %v", err) - } - // Write the Nomad RPC byte to set the mode - conn.Write([]byte{byte(pool.RpcNomad)}) - return pool.NewClientCodec(conn) -} - -// TestNodeDrainer_SimpleDrain asserts that draining when there are two nodes -// moves allocs from the draining node to the other node. -func TestNodeDrainer_SimpleDrain(t *testing.T) { - assert := assert.New(t) - require := require.New(t) - - // Capture test servers config - var serverConfig *nomad.Config - server := nomad.TestServer(t, func(c *nomad.Config) { - serverConfig = c - }) - defer server.Shutdown() - - testutil.WaitForLeader(t, server.RPC) - - // Setup 2 Nodes: A & B; A has allocs and is draining - - // Create mock jobs - state := server.State() - - serviceJob := mock.Job() - serviceJob.Name = "service-job" - serviceJob.Type = structs.JobTypeService - serviceJob.Constraints = nil - serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{ - MaxParallel: 1, - HealthCheck: structs.MigrateStrategyHealthStates, - MinHealthyTime: time.Millisecond, - HealthyDeadline: 2 * time.Second, - } - serviceJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" - serviceJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() - serviceJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ - "run_for": "10m", - "kill_after": "1ms", - } - serviceJob.TaskGroups[0].Tasks[0].Services = nil - - systemJob := mock.SystemJob() - systemJob.Name = "system-job" - systemJob.Type = structs.JobTypeSystem - systemJob.Constraints = nil - systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" - systemJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ - "run_for": "10m", - "kill_after": "1ms", - } - systemJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() - systemJob.TaskGroups[0].Tasks[0].Services = nil - - // Batch job will run until the node's drain deadline is reached - batchJob := mock.Job() - batchJob.Name = "batch-job" - batchJob.Type = structs.JobTypeBatch - batchJob.Constraints = nil - batchJob.TaskGroups[0].Name = "batch-group" - batchJob.TaskGroups[0].Migrate = nil - batchJob.TaskGroups[0].Tasks[0].Name = "batch-task" - batchJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" - batchJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ - "run_for": "10m", - "kill_after": "1ms", - "exit_code": 13, // set nonzero exit code to cause rescheduling - } - batchJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() - batchJob.TaskGroups[0].Tasks[0].Services = nil - - // Start node 1 - c1 := client.TestClient(t, func(conf *config.Config) { - conf.LogOutput = testlog.NewWriter(t) - conf.Servers = []string{serverConfig.RPCAddr.String()} - }) - defer c1.Shutdown() - node1ID := c1.NodeID() - - // Start jobs so they all get placed on node 1 - codec := rpcClient(t, serverConfig) - rpc := rpcapi.NewRPC(codec) - for _, job := range []*structs.Job{systemJob, serviceJob, batchJob} { - resp, err := rpc.JobRegister(job) - require.Nil(err) - require.NotZero(resp.Index) - } - - // Wait for jobs to start on c1 - testutil.WaitForResult(func() (bool, error) { - resp, err := rpc.NodeGetAllocs(node1ID) - if err != nil { - return false, err - } - - system, batch, service := 0, 0, 0 - for _, alloc := range resp.Allocs { - if alloc.ClientStatus != structs.AllocClientStatusRunning { - return false, fmt.Errorf("alloc %s for job %s not running: %s", alloc.ID, alloc.Job.Name, alloc.ClientStatus) - } - switch alloc.JobID { - case batchJob.ID: - batch++ - case serviceJob.ID: - service++ - case systemJob.ID: - system++ - } - } - // 1 system + 10 batch + 10 service = 21 - if system+batch+service != 21 { - return false, fmt.Errorf("wrong number of allocs: system %d/1, batch %d/10, service %d/10", system, batch, service) - } - return true, nil - }, func(err error) { - if resp, err := rpc.NodeGetAllocs(node1ID); err == nil { - for i, alloc := range resp.Allocs { - t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus) - } - } - if resp, err := rpc.EvalList(); err == nil { - for _, eval := range resp.Evaluations { - t.Logf("% #v\n", pretty.Formatter(eval)) - } - } - t.Fatalf("failed waiting for all allocs to start: %v", err) - }) - - // Start draining node 1 with no deadline - strategy := &structs.DrainStrategy{ - DrainSpec: structs.DrainSpec{ - Deadline: -1 * time.Second, - }, - } - node1Resp, err := rpc.NodeGet(node1ID) - require.Nil(err) - node1 := node1Resp.Node - require.Nil(state.UpdateNodeDrain(node1.ModifyIndex+1, node1ID, strategy)) - - // Start node 2 - c2 := client.TestClient(t, func(conf *config.Config) { - conf.LogOutput = testlog.NewWriter(t) - conf.Servers = []string{serverConfig.RPCAddr.String()} - }) - defer c2.Shutdown() - node2ID := c2.NodeID() - - // Wait for services to be migrated - testutil.WaitForResult(func() (bool, error) { - resp, err := rpc.NodeGetAllocs(node2ID) - if err != nil { - return false, err - } - - system, batch, service := 0, 0, 0 - for _, alloc := range resp.Allocs { - if alloc.ClientStatus != structs.AllocClientStatusRunning { - return false, fmt.Errorf("alloc %s for job %s not running: %s", alloc.ID, alloc.Job.Name, alloc.ClientStatus) - } - switch alloc.JobID { - case batchJob.ID: - batch++ - case serviceJob.ID: - service++ - case systemJob.ID: - system++ - } - } - // 1 system + 10 batch + 10 service = 21 - if system+batch+service != 21 { - return false, fmt.Errorf("wrong number of allocs: system %d/1, batch %d/10, service %d/10", system, batch, service) - } - return true, nil - }, func(err error) { - if resp, err := rpc.NodeGetAllocs(node2ID); err == nil { - for i, alloc := range resp.Allocs { - t.Logf("%d alloc %s job %s status %s prev %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus, alloc.PreviousAllocation) - } - } - t.Errorf("failed waiting for all allocs to migrate: %v", err) - }) - - // Wait for drained services to be dead - testutil.WaitForResult(func() (bool, error) { - resp, err := rpc.NodeGetAllocs(c1.NodeID()) - if err != nil { - return false, err - } - - running := make([]string, 0, len(resp.Allocs)) - for _, alloc := range resp.Allocs { - if alloc.ClientStatus == structs.AllocClientStatusRunning { - running = append(running, alloc.ID[:6]) - } - } - - if len(running) > 0 { - return false, fmt.Errorf("%d alloc(s) on draining node %s still running: %s", len(running), c1.NodeID()[:6], running) - } - return true, nil - }, func(err error) { - t.Errorf("failed waiting for all draining allocs to stop: %v", err) - }) - - node1Resp, err = rpc.NodeGet(node1ID) - require.Nil(err) - node1 = node1Resp.Node - assert.False(node1.Drain) - assert.Nil(node1.DrainStrategy) - assert.Equal(structs.NodeSchedulingIneligible, node1.SchedulingEligibility) - - jobs, err := rpc.JobList() - require.Nil(err) - t.Logf("--> %d jobs", len(jobs.Jobs)) - for _, job := range jobs.Jobs { - t.Logf("job: %s status: %s %s", job.Name, job.Status, job.StatusDescription) - } - - allocs, err := rpc.AllocAll() - require.Nil(err) - - sort.Slice(allocs, func(i, j int) bool { - r := strings.Compare(allocs[i].Job.Name, allocs[j].Job.Name) - switch { - case r < 0: - return true - case r == 0: - return allocs[i].ModifyIndex < allocs[j].ModifyIndex - case r > 0: - return false - } - panic("unreachable") - }) - - t.Logf("--> %d allocs", len(allocs)) - for _, alloc := range allocs { - t.Logf("job: %s node: %s alloc: %s desired_status: %s desired_transition: %s actual: %s replaces: %s", - alloc.Job.Name, alloc.NodeID[:6], alloc.ID[:6], alloc.DesiredStatus, pretty.Sprint(alloc.DesiredTransition.Migrate), alloc.ClientStatus, alloc.PreviousAllocation) - } - - t.Logf("==> PASS") -} - -// TestNodeDrainer_SystemDrain asserts system jobs are drained -func TestNodeDrainer_SystemDrain(t *testing.T) { - assert := assert.New(t) - require := require.New(t) - - // Capture test servers config - var serverConfig *nomad.Config - server := nomad.TestServer(t, func(c *nomad.Config) { - serverConfig = c - }) - defer server.Shutdown() - - testutil.WaitForLeader(t, server.RPC) - - // Setup 2 Nodes: A & B; A has allocs and is draining - - // Create mock jobs - state := server.State() - - serviceJob := mock.Job() - serviceJob.Name = "service-job" - serviceJob.Type = structs.JobTypeService - serviceJob.Constraints = nil - serviceJob.TaskGroups[0].Count = 2 - serviceJob.TaskGroups[0].Migrate = &structs.MigrateStrategy{ - MaxParallel: 1, - HealthCheck: structs.MigrateStrategyHealthStates, - MinHealthyTime: time.Millisecond, - HealthyDeadline: 2 * time.Second, - } - serviceJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" - serviceJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() - serviceJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ - "run_for": "10m", - "kill_after": "1ms", - } - serviceJob.TaskGroups[0].Tasks[0].Services = nil - - systemJob := mock.SystemJob() - systemJob.Name = "system-job" - systemJob.Type = structs.JobTypeSystem - systemJob.Constraints = nil - systemJob.TaskGroups[0].Tasks[0].Driver = "mock_driver" - systemJob.TaskGroups[0].Tasks[0].Config = map[string]interface{}{ - "run_for": "10m", - "kill_after": "1ms", - } - systemJob.TaskGroups[0].Tasks[0].Resources = structs.MinResources() - systemJob.TaskGroups[0].Tasks[0].Services = nil - - // Start node 1 - c1 := client.TestClient(t, func(conf *config.Config) { - conf.LogOutput = testlog.NewWriter(t) - conf.Servers = []string{serverConfig.RPCAddr.String()} - }) - defer c1.Shutdown() - node1ID := c1.NodeID() - - // Start jobs so they all get placed on node 1 - codec := rpcClient(t, serverConfig) - rpc := rpcapi.NewRPC(codec) - for _, job := range []*structs.Job{systemJob, serviceJob} { - resp, err := rpc.JobRegister(job) - require.Nil(err) - require.NotZero(resp.Index) - } - - // Wait for jobs to start on c1 - testutil.WaitForResult(func() (bool, error) { - resp, err := rpc.NodeGetAllocs(c1.NodeID()) - if err != nil { - return false, err - } - - system, service := 0, 0 - for _, alloc := range resp.Allocs { - if alloc.ClientStatus != structs.AllocClientStatusRunning { - return false, fmt.Errorf("alloc %s for job %s not running: %s", alloc.ID, alloc.Job.Name, alloc.ClientStatus) - } - switch alloc.JobID { - case serviceJob.ID: - service++ - case systemJob.ID: - system++ - default: - return false, fmt.Errorf("unknown job: %s", alloc.Job.Name) - } - } - // 1 system + 2 service = 3 - if system+service != 3 { - return false, fmt.Errorf("wrong number of allocs: system %d/1, service %d/2", system, service) - } - return true, nil - }, func(err error) { - if resp, err := rpc.NodeGetAllocs(c1.NodeID()); err == nil { - for i, alloc := range resp.Allocs { - t.Logf("%d alloc %s job %s status %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus) - } - } - t.Fatalf("failed waiting for all allocs to start: %v", err) - }) - - // Start draining node 1 - strategy := &structs.DrainStrategy{ - DrainSpec: structs.DrainSpec{ - Deadline: 1 * time.Hour, - }, - } - node1Resp, err := rpc.NodeGet(node1ID) - require.Nil(err) - node1 := node1Resp.Node - require.Nil(state.UpdateNodeDrain(node1.ModifyIndex+1, node1ID, strategy)) - - // Start node 2 - c2 := client.TestClient(t, func(conf *config.Config) { - conf.LogOutput = testlog.NewWriter(t) - conf.Servers = []string{serverConfig.RPCAddr.String()} - }) - defer c2.Shutdown() - node2ID := c2.NodeID() - - // Wait for services to be migrated - testutil.WaitForResult(func() (bool, error) { - resp, err := rpc.NodeGetAllocs(node2ID) - if err != nil { - return false, err - } - - system, service := 0, 0 - for _, alloc := range resp.Allocs { - if alloc.ClientStatus != structs.AllocClientStatusRunning { - return false, fmt.Errorf("alloc %s for job %s not running: %s", alloc.ID, alloc.Job.Name, alloc.ClientStatus) - } - switch alloc.JobID { - case serviceJob.ID: - service++ - case systemJob.ID: - system++ - default: - return false, fmt.Errorf("unknown job: %s", alloc.Job.Name) - } - } - // 1 system + 2 service = 3 - if system+service != 3 { - return false, fmt.Errorf("wrong number of allocs: system %d/1, service %d/2", system, service) - } - return true, nil - }, func(err error) { - if resp, err := rpc.NodeGetAllocs(node2ID); err == nil { - for i, alloc := range resp.Allocs { - t.Logf("%d alloc %s job %s status %s prev %s", i, alloc.ID, alloc.Job.Name, alloc.ClientStatus, alloc.PreviousAllocation) - } - } - t.Errorf("failed waiting for all allocs to migrate: %v", err) - }) - - // Wait for drained services to be dead - testutil.WaitForResult(func() (bool, error) { - resp, err := rpc.NodeGetAllocs(node1ID) - if err != nil { - return false, err - } - - running := make([]string, 0, len(resp.Allocs)) - for _, alloc := range resp.Allocs { - if alloc.ClientStatus == structs.AllocClientStatusRunning { - running = append(running, alloc.ID[:6]) - } - } - - if len(running) > 0 { - return false, fmt.Errorf("%d alloc(s) on draining node %s still running: %s", len(running), node1ID[:6], running) - } - return true, nil - }, func(err error) { - t.Errorf("failed waiting for all draining allocs to stop: %v", err) - }) - - node1Resp, err = rpc.NodeGet(node1ID) - require.Nil(err) - node1 = node1Resp.Node - assert.False(node1.Drain) - assert.Nil(node1.DrainStrategy) - assert.Equal(structs.NodeSchedulingIneligible, node1.SchedulingEligibility) - - jobs, err := rpc.JobList() - require.Nil(err) - t.Logf("--> %d jobs", len(jobs.Jobs)) - for _, job := range jobs.Jobs { - t.Logf("job: %s status: %s %s", job.Name, job.Status, job.StatusDescription) - } - - allocs, err := rpc.AllocAll() - require.Nil(err) - - sort.Slice(allocs, func(i, j int) bool { - r := strings.Compare(allocs[i].Job.Name, allocs[j].Job.Name) - switch { - case r < 0: - return true - case r == 0: - return allocs[i].ModifyIndex < allocs[j].ModifyIndex - case r > 0: - return false - } - panic("unreachable") - }) - - t.Logf("--> %d allocs", len(allocs)) - for _, alloc := range allocs { - t.Logf("job: %s node: %s alloc: %s desired_status: %s desired_transition: %s actual: %s replaces: %s", - alloc.Job.Name, alloc.NodeID[:6], alloc.ID[:6], alloc.DesiredStatus, pretty.Sprint(alloc.DesiredTransition.Migrate), alloc.ClientStatus, alloc.PreviousAllocation) - } -} diff --git a/nomad/drainerv2/drain_testing.go b/nomad/drainer/drain_testing.go similarity index 98% rename from nomad/drainerv2/drain_testing.go rename to nomad/drainer/drain_testing.go index 60d710e4a593..5af351fe819f 100644 --- a/nomad/drainerv2/drain_testing.go +++ b/nomad/drainer/drain_testing.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "sync" diff --git a/nomad/drainerv2/drainer.go b/nomad/drainer/drainer.go similarity index 99% rename from nomad/drainerv2/drainer.go rename to nomad/drainer/drainer.go index b5842559d0b5..2b6a328070d0 100644 --- a/nomad/drainerv2/drainer.go +++ b/nomad/drainer/drainer.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "context" diff --git a/nomad/drainerv2/draining_node.go b/nomad/drainer/draining_node.go similarity index 99% rename from nomad/drainerv2/draining_node.go rename to nomad/drainer/draining_node.go index 0f13a1b74a77..078399f049f9 100644 --- a/nomad/drainerv2/draining_node.go +++ b/nomad/drainer/draining_node.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "fmt" diff --git a/nomad/drainer/job_watcher.go b/nomad/drainer/job_watcher.go deleted file mode 100644 index 95a1be5d157e..000000000000 --- a/nomad/drainer/job_watcher.go +++ /dev/null @@ -1,140 +0,0 @@ -package drainer - -import ( - "context" - "log" - "sync" - - memdb "github.com/hashicorp/go-memdb" - "github.com/hashicorp/nomad/nomad/state" - "github.com/hashicorp/nomad/nomad/structs" -) - -// jobWatcher watches allocation changes for jobs with at least one allocation -// on a draining node. -type jobWatcher struct { - // allocsIndex to start watching from - allocsIndex uint64 - - // job -> node.ID - jobs map[jobKey]string - jobsMu sync.Mutex - - jobsCh chan map[jobKey]struct{} - - state *state.StateStore - - logger *log.Logger -} - -func newJobWatcher(logger *log.Logger, jobs map[jobKey]string, allocsIndex uint64, state *state.StateStore) *jobWatcher { - return &jobWatcher{ - allocsIndex: allocsIndex, - logger: logger, - jobs: jobs, - jobsCh: make(chan map[jobKey]struct{}), - state: state, - } -} - -func (j *jobWatcher) watch(k jobKey, nodeID string) { - j.logger.Printf("[TRACE] nomad.drain: watching job %s on draining node %s", k.jobid, nodeID[:6]) - j.jobsMu.Lock() - j.jobs[k] = nodeID - j.jobsMu.Unlock() -} - -func (j *jobWatcher) nodeDone(nodeID string) { - j.jobsMu.Lock() - defer j.jobsMu.Unlock() - for k, v := range j.jobs { - if v == nodeID { - j.logger.Printf("[TRACE] nomad.drain: UNwatching job %s on done draining node %s", k.jobid, nodeID[:6]) - delete(j.jobs, k) - } - } -} - -func (j *jobWatcher) WaitCh() <-chan map[jobKey]struct{} { - return j.jobsCh -} - -func (j *jobWatcher) run(ctx context.Context) { - var resp interface{} - var err error - - for { - //FIXME have watchAllocs create a closure and give it a copy of j.jobs to remove locking? - //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? - var newIndex uint64 - resp, newIndex, err = j.state.BlockingQuery(j.watchAllocs, j.allocsIndex, ctx) - if err != nil { - if err == context.Canceled { - j.logger.Printf("[TRACE] nomad.drain: job watcher shutting down") - return - } - j.logger.Printf("[ERR] nomad.drain: error blocking on alloc updates: %v", err) - return - } - - j.logger.Printf("[TRACE] nomad.drain: job watcher old index: %d new index: %d", j.allocsIndex, newIndex) - j.allocsIndex = newIndex - - changedJobs := resp.(map[jobKey]struct{}) - if len(changedJobs) > 0 { - select { - case j.jobsCh <- changedJobs: - case <-ctx.Done(): - return - } - } - } -} - -func (j *jobWatcher) watchAllocs(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { - iter, err := state.Allocs(ws) - if err != nil { - return nil, 0, err - } - - index, err := state.Index("allocs") - if err != nil { - return nil, 0, err - } - - skipped := 0 - - // job ids - resp := map[jobKey]struct{}{} - - for { - raw := iter.Next() - if raw == nil { - break - } - - alloc := raw.(*structs.Allocation) - - j.jobsMu.Lock() - _, ok := j.jobs[jobKey{alloc.Namespace, alloc.JobID}] - j.jobsMu.Unlock() - - if !ok { - // alloc is not part of a draining job - skipped++ - continue - } - - // don't wake drain loop if alloc hasn't updated its health - if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { - j.logger.Printf("[TRACE] nomad.drain: job watcher found alloc %s - deployment status: %t", alloc.ID[:6], *alloc.DeploymentStatus.Healthy) - resp[jobKey{alloc.Namespace, alloc.JobID}] = struct{}{} - } else { - j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring alloc %s - no deployment status", alloc.ID[:6]) - } - } - - j.logger.Printf("[TRACE] nomad.drain: job watcher ignoring %d allocs - not part of draining job at index %d", skipped, index) - - return resp, index, nil -} diff --git a/nomad/drainer/node_watcher.go b/nomad/drainer/node_watcher.go deleted file mode 100644 index 5f419ea2ca91..000000000000 --- a/nomad/drainer/node_watcher.go +++ /dev/null @@ -1,121 +0,0 @@ -package drainer - -import ( - "context" - "log" - - memdb "github.com/hashicorp/go-memdb" - "github.com/hashicorp/nomad/nomad/state" - "github.com/hashicorp/nomad/nomad/structs" -) - -// nodeWatcher watches for nodes to start or stop draining -type nodeWatcher struct { - index uint64 - nodes map[string]*structs.Node - nodesCh chan map[string]*structs.Node - state *state.StateStore - logger *log.Logger -} - -func newNodeWatcher(logger *log.Logger, nodes map[string]*structs.Node, index uint64, state *state.StateStore) *nodeWatcher { - return &nodeWatcher{ - nodes: nodes, - nodesCh: make(chan map[string]*structs.Node), - index: index, - state: state, - logger: logger, - } -} - -func (n *nodeWatcher) run(ctx context.Context) { - // Trigger an initial drain pass if there are already nodes draining - //FIXME this is unneccessary if a node has reached a deadline - n.logger.Printf("[TRACE] nomad.drain: initial draining nodes: %d", len(n.nodes)) - if len(n.nodes) > 0 { - n.nodesCh <- n.nodes - } - - for { - //FIXME it seems possible for this to return a nil error and a 0 index, what to do in that case? - resp, index, err := n.state.BlockingQuery(n.queryNodeDrain, n.index, ctx) - if err != nil { - if err == context.Canceled { - n.logger.Printf("[TRACE] nomad.drain: draining node watcher shutting down") - return - } - n.logger.Printf("[ERR] nomad.drain: error blocking on node updates at index %d: %v", n.index, err) - return - } - - // update index for next run - n.index = index - - changed := false - newNodes := resp.([]*structs.Node) - n.logger.Printf("[TRACE] nomad.drain: %d nodes to consider", len(newNodes)) //FIXME remove - for _, newNode := range newNodes { - if existingNode, ok := n.nodes[newNode.ID]; ok { - // Node was draining, see if it has changed - if newNode.DrainStrategy == nil { - // Node stopped draining - delete(n.nodes, newNode.ID) - changed = true - } else if !newNode.DrainStrategy.Equal(existingNode.DrainStrategy) { - // Update deadline - n.nodes[newNode.ID] = newNode - changed = true - } - } else { - // Node was not draining - if newNode.DrainStrategy != nil { - // Node started draining - n.nodes[newNode.ID] = newNode - changed = true - } - } - } - - // Send a copy of the draining nodes if there were changes - if !changed { - continue - } - - nodesCopy := make(map[string]*structs.Node, len(n.nodes)) - for k, v := range n.nodes { - nodesCopy[k] = v - } - - select { - case n.nodesCh <- nodesCopy: - case <-ctx.Done(): - return - } - } -} - -func (n *nodeWatcher) queryNodeDrain(ws memdb.WatchSet, state *state.StateStore) (interface{}, uint64, error) { - iter, err := state.Nodes(ws) - if err != nil { - return nil, 0, err - } - - index, err := state.Index("nodes") - if err != nil { - return nil, 0, err - } - - resp := make([]*structs.Node, 0, 8) - - for { - raw := iter.Next() - if raw == nil { - break - } - - node := raw.(*structs.Node) - resp = append(resp, node) - } - - return resp, index, nil -} diff --git a/nomad/drainerv2/watch_jobs.go b/nomad/drainer/watch_jobs.go similarity index 99% rename from nomad/drainerv2/watch_jobs.go rename to nomad/drainer/watch_jobs.go index 3a28f647ceff..714bac2b7e53 100644 --- a/nomad/drainerv2/watch_jobs.go +++ b/nomad/drainer/watch_jobs.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "context" diff --git a/nomad/drainerv2/watch_jobs_test.go b/nomad/drainer/watch_jobs_test.go similarity index 99% rename from nomad/drainerv2/watch_jobs_test.go rename to nomad/drainer/watch_jobs_test.go index 6d9b1846ec5c..3db5ea0ac4b8 100644 --- a/nomad/drainerv2/watch_jobs_test.go +++ b/nomad/drainer/watch_jobs_test.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "context" diff --git a/nomad/drainerv2/watch_nodes.go b/nomad/drainer/watch_nodes.go similarity index 99% rename from nomad/drainerv2/watch_nodes.go rename to nomad/drainer/watch_nodes.go index 34cc7a9c97d3..738f496fda78 100644 --- a/nomad/drainerv2/watch_nodes.go +++ b/nomad/drainer/watch_nodes.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "context" diff --git a/nomad/drainerv2/watch_nodes_test.go b/nomad/drainer/watch_nodes_test.go similarity index 97% rename from nomad/drainerv2/watch_nodes_test.go rename to nomad/drainer/watch_nodes_test.go index dab304c32c9c..476c7a39bb50 100644 --- a/nomad/drainerv2/watch_nodes_test.go +++ b/nomad/drainer/watch_nodes_test.go @@ -1,4 +1,4 @@ -package drainerv2 +package drainer import ( "context" @@ -97,7 +97,7 @@ func TestNodeDrainWatcher_Remove(t *testing.T) { require.Equal(n, tracked[n.ID]) // Change the node to be not draining and wait for it to be untracked - require.Nil(state.UpdateNodeDrain(101, n.ID, nil)) + require.Nil(state.UpdateNodeDrain(101, n.ID, nil, false)) testutil.WaitForResult(func() (bool, error) { return len(m.Events) == 2, nil }, func(err error) { @@ -175,7 +175,7 @@ func TestNodeDrainWatcher_Update(t *testing.T) { // Change the node to have a new spec s2 := n.DrainStrategy.Copy() s2.Deadline += time.Hour - require.Nil(state.UpdateNodeDrain(101, n.ID, s2)) + require.Nil(state.UpdateNodeDrain(101, n.ID, s2, false)) // Wait for it to be updated testutil.WaitForResult(func() (bool, error) { diff --git a/nomad/server.go b/nomad/server.go index afe7ee9871ca..b69e0a022571 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -27,7 +27,7 @@ import ( "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/nomad/nomad/deploymentwatcher" - "github.com/hashicorp/nomad/nomad/drainerv2" + "github.com/hashicorp/nomad/nomad/drainer" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" @@ -174,7 +174,7 @@ type Server struct { deploymentWatcher *deploymentwatcher.Watcher // nodeDrainer is used to drain allocations from nodes. - nodeDrainer *drainerv2.NodeDrainer + nodeDrainer *drainer.NodeDrainer // evalBroker is used to manage the in-progress evaluations // that are waiting to be brokered to a sub-scheduler @@ -892,16 +892,16 @@ func (s *Server) setupDeploymentWatcher() error { func (s *Server) setupNodeDrainer() { // Create a shim around Raft requests shim := drainerShim{s} - c := &drainerv2.NodeDrainerConfig{ + c := &drainer.NodeDrainerConfig{ Logger: s.logger, Raft: shim, - JobFactory: drainerv2.GetDrainingJobWatcher, - NodeFactory: drainerv2.GetNodeWatcherFactory(), - DrainDeadlineFactory: drainerv2.GetDeadlineNotifier, - StateQueriesPerSecond: drainerv2.LimitStateQueriesPerSecond, - BatchUpdateInterval: drainerv2.BatchUpdateInterval, + JobFactory: drainer.GetDrainingJobWatcher, + NodeFactory: drainer.GetNodeWatcherFactory(), + DrainDeadlineFactory: drainer.GetDeadlineNotifier, + StateQueriesPerSecond: drainer.LimitStateQueriesPerSecond, + BatchUpdateInterval: drainer.BatchUpdateInterval, } - s.nodeDrainer = drainerv2.NewNodeDrainer(c) + s.nodeDrainer = drainer.NewNodeDrainer(c) } // setupVaultClient is used to set up the Vault API client. From 45e7e885585e4c894e0e86c988751bce49243edf Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 12 Mar 2018 13:44:33 -0700 Subject: [PATCH 49/79] Fix deadline handling --- api/nodes.go | 5 ++- nomad/drainer_int_test.go | 88 +++++++++++++++++++++++++++++++++++++ scheduler/generic_sched.go | 26 +++-------- scheduler/reconcile.go | 26 +---------- scheduler/reconcile_test.go | 86 +++--------------------------------- 5 files changed, 103 insertions(+), 128 deletions(-) diff --git a/api/nodes.go b/api/nodes.go index 9261528544f8..d625629fb5e3 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -183,8 +183,9 @@ type DrainStrategy struct { // DrainSpec is the user declared drain specification DrainSpec - // DeadlineTime is the deadline time for the drain. - DeadlineTime time.Time + // ForceDeadline is the deadline time for the drain after which drains will + // be forced + ForceDeadline time.Time } // DrainSpec describes a Node's drain behavior. diff --git a/nomad/drainer_int_test.go b/nomad/drainer_int_test.go index 39422a5a0ddb..8e03a2ef5ff3 100644 --- a/nomad/drainer_int_test.go +++ b/nomad/drainer_int_test.go @@ -187,6 +187,94 @@ func TestDrainer_Simple_ServiceOnly(t *testing.T) { }) } +func TestDrainer_Simple_ServiceOnly_Deadline(t *testing.T) { + t.Parallel() + require := require.New(t) + s1 := TestServer(t, nil) + defer s1.Shutdown() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Create a node + n1 := mock.Node() + nodeReg := &structs.NodeRegisterRequest{ + Node: n1, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var nodeResp structs.NodeUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", nodeReg, &nodeResp)) + + // Create a job that runs on just one + job := mock.Job() + job.Update = *structs.DefaultUpdateStrategy + job.Update.Stagger = 30 * time.Second + job.TaskGroups[0].Count = 2 + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: job.Namespace, + }, + } + + // Fetch the response + var resp structs.JobRegisterResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp)) + require.NotZero(resp.Index) + + // Wait for the two allocations to be placed + state := s1.State() + testutil.WaitForResult(func() (bool, error) { + allocs, err := state.AllocsByJob(nil, job.Namespace, job.ID, false) + if err != nil { + return false, err + } + return len(allocs) == 2, fmt.Errorf("got %d allocs", len(allocs)) + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + // Drain the node + drainReq := &structs.NodeUpdateDrainRequest{ + NodeID: n1.ID, + DrainStrategy: &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 1 * time.Second, + }, + }, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var drainResp structs.NodeDrainUpdateResponse + require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateDrain", drainReq, &drainResp)) + + // Wait for the allocs to be stopped + testutil.WaitForResult(func() (bool, error) { + allocs, err := state.AllocsByNode(nil, n1.ID) + if err != nil { + return false, err + } + for _, alloc := range allocs { + if alloc.DesiredStatus != structs.AllocDesiredStatusStop { + return false, fmt.Errorf("got desired status %v", alloc.DesiredStatus) + } + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + // Check that the node drain is removed + testutil.WaitForResult(func() (bool, error) { + node, err := state.NodeByID(nil, n1.ID) + if err != nil { + return false, err + } + return node.DrainStrategy == nil, fmt.Errorf("has drain strategy still set") + }, func(err error) { + t.Fatalf("err: %v", err) + }) +} + func TestDrainer_DrainEmptyNode(t *testing.T) { t.Parallel() require := require.New(t) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index 32758359b8c4..6b812740ce9b 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -76,10 +76,7 @@ type GenericScheduler struct { ctx *EvalContext stack *GenericStack - // Deprecated, was used in pre Nomad 0.7 rolling update stanza and in node draining prior to Nomad 0.8 - followupEvalWait time.Duration - nextEval *structs.Evaluation - followUpEvals []*structs.Evaluation + followUpEvals []*structs.Evaluation deployment *structs.Deployment @@ -125,7 +122,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error { default: desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason", eval.TriggeredBy) - return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, + return setStatus(s.logger, s.planner, s.eval, nil, s.blocked, s.failedTGAllocs, structs.EvalStatusFailed, desc, s.queuedAllocs, s.deployment.GetID()) } @@ -144,7 +141,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error { if err := s.createBlockedEval(true); err != nil { mErr.Errors = append(mErr.Errors, err) } - if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, + if err := setStatus(s.logger, s.planner, s.eval, nil, s.blocked, s.failedTGAllocs, statusErr.EvalStatus, err.Error(), s.queuedAllocs, s.deployment.GetID()); err != nil { mErr.Errors = append(mErr.Errors, err) @@ -166,7 +163,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error { } // Update the status to complete - return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, + return setStatus(s.logger, s.planner, s.eval, nil, s.blocked, s.failedTGAllocs, structs.EvalStatusComplete, "", s.queuedAllocs, s.deployment.GetID()) } @@ -259,16 +256,6 @@ func (s *GenericScheduler) process() (bool, error) { return true, nil } - // If we need a followup eval and we haven't created one, do so. - if s.followupEvalWait != 0 && s.nextEval == nil { - s.nextEval = s.eval.NextRollingEval(s.followupEvalWait) - if err := s.planner.CreateEval(s.nextEval); err != nil { - s.logger.Printf("[ERR] sched: %#v failed to make next eval for rolling migration: %v", s.eval, err) - return false, err - } - s.logger.Printf("[DEBUG] sched: %#v: rolling migration limit reached, next eval '%s' created", s.eval, s.nextEval.ID) - } - // Create follow up evals for any delayed reschedule eligible allocations if len(s.followUpEvals) > 0 { for _, eval := range s.followUpEvals { @@ -353,16 +340,13 @@ func (s *GenericScheduler) computeJobAllocs() error { s.plan.Deployment = results.deployment s.plan.DeploymentUpdates = results.deploymentUpdates - // Store the the follow up eval wait duration. If set this will trigger a - // follow up eval to handle node draining. - s.followupEvalWait = results.followupEvalWait - // Store all the follow up evaluations from rescheduled allocations if len(results.desiredFollowupEvals) > 0 { for _, evals := range results.desiredFollowupEvals { s.followUpEvals = append(s.followUpEvals, evals...) } } + // Update the stored deployment if results.deployment != nil { s.deployment = results.deployment diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index cdc375510750..a4e1d1c06d3f 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -99,11 +99,6 @@ type reconcileResults struct { // task group. desiredTGUpdates map[string]*structs.DesiredUpdates - // followupEvalWait is set if there should be a followup eval run after the - // given duration - // Deprecated, the delay strategy that sets this is not available after nomad 0.7.0 - followupEvalWait time.Duration - // desiredFollowupEvals is the map of follow up evaluations to create per task group // This is used to create a delayed evaluation for rescheduling failed allocations. desiredFollowupEvals map[string][]*structs.Evaluation @@ -131,9 +126,6 @@ func (r *reconcileResults) GoString() string { base += fmt.Sprintf("\nDeployment Update for ID %q: Status %q; Description %q", u.DeploymentID, u.Status, u.StatusDescription) } - if r.followupEvalWait != 0 { - base += fmt.Sprintf("\nFollowup Eval in %v", r.followupEvalWait) - } for tg, u := range r.desiredTGUpdates { base += fmt.Sprintf("\nDesired Changes for %q: %#v", tg, u) } @@ -461,16 +453,12 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { // Calculate the allowed number of changes and set the desired changes // accordingly. - min := helper.IntMin(len(migrate), limit) if !a.deploymentFailed && !a.deploymentPaused { - desiredChanges.Migrate += uint64(min) - desiredChanges.Ignore += uint64(len(migrate) - min) + desiredChanges.Migrate += uint64(len(migrate)) } else { desiredChanges.Stop += uint64(len(migrate)) } - followup := false - migrated := 0 for _, alloc := range migrate.nameOrder() { // If the deployment is failed or paused, don't replace it, just mark as stop. if a.deploymentFailed || a.deploymentPaused { @@ -481,12 +469,6 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { continue } - if migrated >= limit { - followup = true - break - } - - migrated++ a.result.stop = append(a.result.stop, allocStopResult{ alloc: alloc, statusDescription: allocMigrating, @@ -499,12 +481,6 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { }) } - // TODO Deprecate - // We need to create a followup evaluation. - if followup && strategy != nil && a.result.followupEvalWait < strategy.Stagger { - a.result.followupEvalWait = strategy.Stagger - } - // Create a new deployment if necessary if !existingDeployment && strategy != nil && dstate.DesiredTotal != 0 { // A previous group may have made the deployment already diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index a00471fba603..604347fa5bd9 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -75,7 +75,6 @@ Update stanza Tests: √ Failed deployment cancels non-promoted task groups √ Failed deployment and updated job works √ Finished deployment gets marked as complete -√ The stagger is correctly calculated when it is applied across multiple task groups. √ Change job change while scaling up √ Update the job when all allocations from the previous job haven't been placed yet. √ Paused or failed deployment doesn't do any rescheduling of failed allocs @@ -306,7 +305,6 @@ type resultExpectation struct { inplace int stop int desiredTGUpdates map[string]*structs.DesiredUpdates - followupEvalWait time.Duration } func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { @@ -342,9 +340,6 @@ func assertResults(t *testing.T, r *reconcileResults, exp *resultExpectation) { if l := len(r.desiredTGUpdates); l != len(exp.desiredTGUpdates) { t.Fatalf("Expected %d task group desired tg updates annotations; got %d", len(exp.desiredTGUpdates), l) } - if r.followupEvalWait != exp.followupEvalWait { - t.Fatalf("Unexpected followup eval wait time. Got %v; want %v", r.followupEvalWait, exp.followupEvalWait) - } // Check the desired updates happened for group, desired := range exp.desiredTGUpdates { @@ -3043,24 +3038,23 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) { assertResults(t, r, &resultExpectation{ createDeployment: nil, deploymentUpdates: nil, - place: 2, + place: 3, destructive: 2, - stop: 2, - followupEvalWait: 31 * time.Second, + stop: 3, desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, // Place the lost Stop: 1, // Stop the lost - Migrate: 1, // Migrate the tainted + Migrate: 2, // Migrate the tainted DestructiveUpdate: 2, - Ignore: 6, + Ignore: 5, }, }, }) assertNamesHaveIndexes(t, intRange(8, 9), destructiveResultsToNames(r.destructiveUpdate)) - assertNamesHaveIndexes(t, intRange(0, 1), placeResultsToNames(r.place)) - assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop)) + assertNamesHaveIndexes(t, intRange(0, 2), placeResultsToNames(r.place)) + assertNamesHaveIndexes(t, intRange(0, 2), stopResultsToNames(r.stop)) } // Tests the reconciler handles a failed deployment and only replaces lost @@ -3132,7 +3126,6 @@ func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) { place: 1, // Only replace the lost node inplace: 0, stop: 2, - followupEvalWait: 0, // Since the deployment is failed, there should be no followup desiredTGUpdates: map[string]*structs.DesiredUpdates{ job.TaskGroups[0].Name: { Place: 1, @@ -3413,73 +3406,6 @@ func TestReconciler_MarkDeploymentComplete(t *testing.T) { }) } -// Tests the reconciler picks the maximum of the staggers when multiple task -// groups are under going node drains. -func TestReconciler_TaintedNode_MultiGroups(t *testing.T) { - // Create a job with two task groups - job := mock.Job() - job.TaskGroups[0].Update = noCanaryUpdate - job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) - job.TaskGroups[1].Name = "two" - job.TaskGroups[1].Update.Stagger = 100 * time.Second - - // Create the allocations - var allocs []*structs.Allocation - for j := 0; j < 2; j++ { - for i := 0; i < 10; i++ { - alloc := mock.Alloc() - alloc.Job = job - alloc.JobID = job.ID - alloc.NodeID = uuid.Generate() - alloc.Name = structs.AllocName(job.ID, job.TaskGroups[j].Name, uint(i)) - alloc.TaskGroup = job.TaskGroups[j].Name - allocs = append(allocs, alloc) - } - } - - // Build a map of tainted nodes - tainted := make(map[string]*structs.Node, 15) - for i := 0; i < 15; i++ { - n := mock.Node() - n.ID = allocs[i].NodeID - allocs[i].DesiredTransition.Migrate = helper.BoolToPtr(true) - n.Drain = true - tainted[n.ID] = n - } - - reconciler := NewAllocReconciler(testLogger(), allocUpdateFnIgnore, false, job.ID, job, nil, allocs, tainted) - r := reconciler.Compute() - - // Assert the correct results - assertResults(t, r, &resultExpectation{ - createDeployment: nil, - deploymentUpdates: nil, - place: 8, - inplace: 0, - stop: 8, - followupEvalWait: 100 * time.Second, - desiredTGUpdates: map[string]*structs.DesiredUpdates{ - job.TaskGroups[0].Name: { - Place: 0, - Stop: 0, - Migrate: 4, - DestructiveUpdate: 0, - Ignore: 6, - }, - job.TaskGroups[1].Name: { - Place: 0, - Stop: 0, - Migrate: 4, - DestructiveUpdate: 0, - Ignore: 6, - }, - }, - }) - - assertNamesHaveIndexes(t, intRange(0, 3, 0, 3), placeResultsToNames(r.place)) - assertNamesHaveIndexes(t, intRange(0, 3, 0, 3), stopResultsToNames(r.stop)) -} - // Tests the reconciler handles changing a job such that a deployment is created // while doing a scale up but as the second eval. func TestReconciler_JobChange_ScaleUp_SecondEval(t *testing.T) { From ad2f211712e76949c90916ed7ac3a754b0c78d84 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 9 Mar 2018 14:15:21 -0800 Subject: [PATCH 50/79] Batch drain update --- nomad/drainer/drainer.go | 13 +++------ nomad/drainer/watch_nodes.go | 2 +- nomad/drainer_shims.go | 14 ++++++---- nomad/fsm.go | 16 +++++++++++ nomad/fsm_test.go | 41 ++++++++++++++++++++++++++++ nomad/state/state_store.go | 23 +++++++++++++++- nomad/state/state_store_test.go | 47 +++++++++++++++++++++++++++++++++ nomad/structs/structs.go | 18 +++++++++++++ 8 files changed, 158 insertions(+), 16 deletions(-) diff --git a/nomad/drainer/drainer.go b/nomad/drainer/drainer.go index 2b6a328070d0..c8d9abaa5562 100644 --- a/nomad/drainer/drainer.go +++ b/nomad/drainer/drainer.go @@ -36,7 +36,7 @@ const ( // NodeDrainer. type RaftApplier interface { AllocUpdateDesiredTransition(allocs map[string]*structs.DesiredTransition, evals []*structs.Evaluation) (uint64, error) - NodeDrainComplete(nodeID string) (uint64, error) + NodesDrainComplete(nodes []string) (uint64, error) } // NodeTracker is the interface to notify an object that is tracking draining @@ -295,14 +295,9 @@ func (n *NodeDrainer) handleMigratedAllocs(allocs []*structs.Allocation) { } n.l.RUnlock() - // TODO(alex) This should probably be a single Raft transaction - for _, doneNode := range done { - index, err := n.raft.NodeDrainComplete(doneNode) - if err != nil { - n.logger.Printf("[ERR] nomad.drain: failed to unset drain for node %q: %v", doneNode, err) - } else { - n.logger.Printf("[INFO] nomad.drain: node %q completed draining at index %d", doneNode, index) - } + // TODO(alex) Shard + if _, err := n.raft.NodesDrainComplete(done); err != nil { + n.logger.Printf("[ERR] nomad.drain: failed to unset drain for nodes: %v", err) } } diff --git a/nomad/drainer/watch_nodes.go b/nomad/drainer/watch_nodes.go index 738f496fda78..ed99fb6938c5 100644 --- a/nomad/drainer/watch_nodes.go +++ b/nomad/drainer/watch_nodes.go @@ -89,7 +89,7 @@ func (n *NodeDrainer) Update(node *structs.Node) { } if done { - index, err := n.raft.NodeDrainComplete(node.ID) + index, err := n.raft.NodesDrainComplete([]string{node.ID}) if err != nil { n.logger.Printf("[ERR] nomad.drain: failed to unset drain for node %q: %v", node.ID, err) } else { diff --git a/nomad/drainer_shims.go b/nomad/drainer_shims.go index 1c7ffb1a9b76..0eb8c43a27b3 100644 --- a/nomad/drainer_shims.go +++ b/nomad/drainer_shims.go @@ -8,14 +8,18 @@ type drainerShim struct { s *Server } -func (d drainerShim) NodeDrainComplete(nodeID string) (uint64, error) { - args := &structs.NodeUpdateDrainRequest{ - NodeID: nodeID, - Drain: false, +func (d drainerShim) NodesDrainComplete(nodes []string) (uint64, error) { + args := &structs.BatchNodeUpdateDrainRequest{ + Updates: make(map[string]*structs.DrainUpdate, len(nodes)), WriteRequest: structs.WriteRequest{Region: d.s.config.Region}, } - resp, index, err := d.s.raftApply(structs.NodeUpdateDrainRequestType, args) + update := &structs.DrainUpdate{} + for _, node := range nodes { + args.Updates[node] = update + } + + resp, index, err := d.s.raftApply(structs.BatchNodeUpdateDrainRequestType, args) return d.convertApplyErrors(resp, index, err) } diff --git a/nomad/fsm.go b/nomad/fsm.go index bc52f256e343..afe726eede39 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -244,6 +244,8 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyAllocUpdateDesiredTransition(buf[1:], log.Index) case structs.NodeUpdateEligibilityRequestType: return n.applyNodeEligibilityUpdate(buf[1:], log.Index) + case structs.BatchNodeUpdateDrainRequestType: + return n.applyBatchDrainUpdate(buf[1:], log.Index) } // Check enterprise only message types. @@ -337,6 +339,20 @@ func (n *nomadFSM) applyDrainUpdate(buf []byte, index uint64) interface{} { return nil } +func (n *nomadFSM) applyBatchDrainUpdate(buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "batch_node_drain_update"}, time.Now()) + var req structs.BatchNodeUpdateDrainRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.BatchUpdateNodeDrain(index, req.Updates); err != nil { + n.logger.Printf("[ERR] nomad.fsm: BatchUpdateNodeDrain failed: %v", err) + return err + } + return nil +} + func (n *nomadFSM) applyNodeEligibilityUpdate(buf []byte, index uint64) interface{} { defer metrics.MeasureSince([]string{"nomad", "fsm", "node_eligibility_update"}, time.Now()) var req structs.NodeUpdateEligibilityRequest diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 9f8ed205a77e..6d4aaf968fb6 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -278,6 +278,47 @@ func TestFSM_UpdateNodeStatus(t *testing.T) { }) } +func TestFSM_BatchUpdateNodeDrain(t *testing.T) { + t.Parallel() + require := require.New(t) + fsm := testFSM(t) + + node := mock.Node() + req := structs.NodeRegisterRequest{ + Node: node, + } + buf, err := structs.Encode(structs.NodeRegisterRequestType, req) + require.Nil(err) + + resp := fsm.Apply(makeLog(buf)) + require.Nil(resp) + + strategy := &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: 10 * time.Second, + }, + } + req2 := structs.BatchNodeUpdateDrainRequest{ + Updates: map[string]*structs.DrainUpdate{ + node.ID: &structs.DrainUpdate{ + DrainStrategy: strategy, + }, + }, + } + buf, err = structs.Encode(structs.BatchNodeUpdateDrainRequestType, req2) + require.Nil(err) + + resp = fsm.Apply(makeLog(buf)) + require.Nil(resp) + + // Verify we are NOT registered + ws := memdb.NewWatchSet() + node, err = fsm.State().NodeByID(ws, req.Node.ID) + require.Nil(err) + require.True(node.Drain) + require.Equal(node.DrainStrategy, strategy) +} + func TestFSM_UpdateNodeDrain(t *testing.T) { t.Parallel() require := require.New(t) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 5f4564001135..6e4f3978db65 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -617,12 +617,34 @@ func (s *StateStore) UpdateNodeStatus(index uint64, nodeID, status string) error return nil } +// BatchUpdateNodeDrain is used to update the drain of a node set of nodes +func (s *StateStore) BatchUpdateNodeDrain(index uint64, updates map[string]*structs.DrainUpdate) error { + txn := s.db.Txn(true) + defer txn.Abort() + for node, update := range updates { + if err := s.updateNodeDrainImpl(txn, index, node, update.DrainStrategy, update.MarkEligible); err != nil { + return err + } + } + txn.Commit() + return nil +} + // UpdateNodeDrain is used to update the drain of a node func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, drain *structs.DrainStrategy, markEligible bool) error { txn := s.db.Txn(true) defer txn.Abort() + if err := s.updateNodeDrainImpl(txn, index, nodeID, drain, markEligible); err != nil { + return err + } + txn.Commit() + return nil +} + +func (s *StateStore) updateNodeDrainImpl(txn *memdb.Txn, index uint64, nodeID string, + drain *structs.DrainStrategy, markEligible bool) error { // Lookup the node existing, err := txn.First("nodes", "id", nodeID) @@ -656,7 +678,6 @@ func (s *StateStore) UpdateNodeDrain(index uint64, nodeID string, return fmt.Errorf("index update failed: %v", err) } - txn.Commit() return nil } diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 7eeb4672e212..20ebbe88fcd2 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -698,6 +698,53 @@ func TestStateStore_UpdateNodeStatus_Node(t *testing.T) { } } +func TestStateStore_BatchUpdateNodeDrain(t *testing.T) { + require := require.New(t) + state := testStateStore(t) + + n1, n2 := mock.Node(), mock.Node() + require.Nil(state.UpsertNode(1000, n1)) + require.Nil(state.UpsertNode(1001, n2)) + + // Create a watchset so we can test that update node drain fires the watch + ws := memdb.NewWatchSet() + _, err := state.NodeByID(ws, n1.ID) + require.Nil(err) + + expectedDrain := &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: -1 * time.Second, + }, + } + + update := map[string]*structs.DrainUpdate{ + n1.ID: &structs.DrainUpdate{ + DrainStrategy: expectedDrain, + }, + n2.ID: &structs.DrainUpdate{ + DrainStrategy: expectedDrain, + }, + } + + require.Nil(state.BatchUpdateNodeDrain(1002, update)) + require.True(watchFired(ws)) + + ws = memdb.NewWatchSet() + for _, id := range []string{n1.ID, n2.ID} { + out, err := state.NodeByID(ws, id) + require.Nil(err) + require.True(out.Drain) + require.NotNil(out.DrainStrategy) + require.Equal(out.DrainStrategy, expectedDrain) + require.EqualValues(1002, out.ModifyIndex) + } + + index, err := state.Index("nodes") + require.Nil(err) + require.EqualValues(1002, index) + require.False(watchFired(ws)) +} + func TestStateStore_UpdateNodeDrain_Node(t *testing.T) { require := require.New(t) state := testStateStore(t) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 04c073946be5..72f2c0a31948 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -80,6 +80,7 @@ const ( JobBatchDeregisterRequestType AllocUpdateDesiredTransitionRequestType NodeUpdateEligibilityRequestType + BatchNodeUpdateDrainRequestType ) const ( @@ -314,6 +315,23 @@ type NodeUpdateDrainRequest struct { WriteRequest } +// BatchNodeUpdateDrainRequest is used for updating the drain strategy for a +// batch of nodes +type BatchNodeUpdateDrainRequest struct { + // Updates is a mapping of nodes to their updated drain strategy + Updates map[string]*DrainUpdate + WriteRequest +} + +// DrainUpdate is used to update the drain of a node +type DrainUpdate struct { + // DrainStrategy is the new strategy for the node + DrainStrategy *DrainStrategy + + // MarkEligible marks the node as eligible if removing the drain strategy. + MarkEligible bool +} + // NodeUpdateEligibilityRequest is used for updating the scheduling eligibility type NodeUpdateEligibilityRequest struct { NodeID string From 5324e56e1c0604ebb597e249a71114e50d71c988 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Fri, 9 Mar 2018 16:10:38 -0800 Subject: [PATCH 51/79] sharding --- nomad/drainer/drainer.go | 27 ++++++--- nomad/drainer/drainer_util.go | 93 ++++++++++++++++++++++++++++++ nomad/drainer/drainer_util_test.go | 54 +++++++++++++++++ 3 files changed, 165 insertions(+), 9 deletions(-) create mode 100644 nomad/drainer/drainer_util.go create mode 100644 nomad/drainer/drainer_util_test.go diff --git a/nomad/drainer/drainer.go b/nomad/drainer/drainer.go index c8d9abaa5562..98c52479a865 100644 --- a/nomad/drainer/drainer.go +++ b/nomad/drainer/drainer.go @@ -295,9 +295,12 @@ func (n *NodeDrainer) handleMigratedAllocs(allocs []*structs.Allocation) { } n.l.RUnlock() - // TODO(alex) Shard - if _, err := n.raft.NodesDrainComplete(done); err != nil { - n.logger.Printf("[ERR] nomad.drain: failed to unset drain for nodes: %v", err) + // Submit the node transistions in a sharded form to ensure a reasonable + // Raft transaction size. + for _, nodes := range partitionIds(done) { + if _, err := n.raft.NodesDrainComplete(nodes); err != nil { + n.logger.Printf("[ERR] nomad.drain: failed to unset drain for nodes: %v", err) + } } } @@ -341,13 +344,11 @@ func (n *NodeDrainer) batchDrainAllocs(allocs []*structs.Allocation) (uint64, er // the set of allocations. It will also create the necessary evaluations for the // affected jobs. func (n *NodeDrainer) drainAllocs(future *structs.BatchFuture, allocs []*structs.Allocation) { - // TODO(alex) This should shard to limit the size of the transaction. - // Compute the effected jobs and make the transition map jobs := make(map[string]*structs.Allocation, 4) - transitions := make(map[string]*structs.DesiredTransition, len(allocs)) + transistions := make(map[string]*structs.DesiredTransition, len(allocs)) for _, alloc := range allocs { - transitions[alloc.ID] = &structs.DesiredTransition{ + transistions[alloc.ID] = &structs.DesiredTransition{ Migrate: helper.BoolToPtr(true), } jobs[alloc.JobID] = alloc @@ -367,6 +368,14 @@ func (n *NodeDrainer) drainAllocs(future *structs.BatchFuture, allocs []*structs } // Commit this update via Raft - index, err := n.raft.AllocUpdateDesiredTransition(transitions, evals) - future.Respond(index, err) + var finalIndex uint64 + for _, u := range partitionAllocDrain(transistions, evals) { + index, err := n.raft.AllocUpdateDesiredTransition(u.Transistions, u.Evals) + if err != nil { + future.Respond(index, err) + } + finalIndex = index + } + + future.Respond(finalIndex, nil) } diff --git a/nomad/drainer/drainer_util.go b/nomad/drainer/drainer_util.go new file mode 100644 index 000000000000..09d026235aa0 --- /dev/null +++ b/nomad/drainer/drainer_util.go @@ -0,0 +1,93 @@ +package drainer + +import ( + "github.com/hashicorp/nomad/nomad/structs" +) + +var ( + // maxIdsPerTxn is the maximum number of IDs that can be included in a + // single Raft transaction. This is to ensure that the Raft message does not + // become too large. + maxIdsPerTxn = (1024 * 256) / 36 // 0.25 MB of ids. +) + +// partitionIds takes a set of IDs and returns a partitioned view of them such +// that no batch would result in an overly large raft transaction. +func partitionIds(ids []string) [][]string { + index := 0 + total := len(ids) + var partitions [][]string + for remaining := total - index; remaining > 0; remaining = total - index { + if remaining < maxIdsPerTxn { + partitions = append(partitions, ids[index:]) + break + } else { + partitions = append(partitions, ids[index:index+maxIdsPerTxn]) + index += maxIdsPerTxn + } + } + + return partitions +} + +// transistionTuple is used to group desired transistions and evals +type transistionTuple struct { + Transistions map[string]*structs.DesiredTransition + Evals []*structs.Evaluation +} + +// partitionAllocDrain returns a list of alloc transistions and evals to apply +// in a single raft transaction.This is necessary to ensure that the Raft +// transaction does not become too large. +func partitionAllocDrain(transistions map[string]*structs.DesiredTransition, + evals []*structs.Evaluation) []*transistionTuple { + + // Determine a stable ordering of the transistioning allocs + allocs := make([]string, 0, len(transistions)) + for id := range transistions { + allocs = append(allocs, id) + } + + var requests []*transistionTuple + submittedEvals, submittedTrans := 0, 0 + for submittedEvals != len(evals) || submittedTrans != len(transistions) { + req := &transistionTuple{ + Transistions: make(map[string]*structs.DesiredTransition), + } + requests = append(requests, req) + available := maxIdsPerTxn + + // Add the allocs first + if remaining := len(allocs) - submittedTrans; remaining > 0 { + if remaining <= available { + for _, id := range allocs[submittedTrans:] { + req.Transistions[id] = transistions[id] + } + available -= remaining + submittedTrans += remaining + } else { + for _, id := range allocs[submittedTrans : submittedTrans+available] { + req.Transistions[id] = transistions[id] + } + submittedTrans += available + + // Exhausted space so skip adding evals + continue + } + + } + + // Add the evals + if remaining := len(evals) - submittedEvals; remaining > 0 { + if remaining <= available { + req.Evals = evals[submittedEvals:] + submittedEvals += remaining + } else { + req.Evals = evals[submittedEvals : submittedEvals+available] + submittedEvals += available + } + } + } + + return requests +} diff --git a/nomad/drainer/drainer_util_test.go b/nomad/drainer/drainer_util_test.go new file mode 100644 index 000000000000..ee2f4a79f508 --- /dev/null +++ b/nomad/drainer/drainer_util_test.go @@ -0,0 +1,54 @@ +package drainer + +import ( + "testing" + + "github.com/hashicorp/nomad/nomad/structs" + "github.com/stretchr/testify/require" +) + +func TestDrainer_PartitionAllocDrain(t *testing.T) { + // Set the max ids per reap to something lower. + old := maxIdsPerTxn + defer func() { maxIdsPerTxn = old }() + maxIdsPerTxn = 2 + + require := require.New(t) + transistions := map[string]*structs.DesiredTransition{"a": nil, "b": nil, "c": nil} + evals := []*structs.Evaluation{nil, nil, nil} + requests := partitionAllocDrain(transistions, evals) + require.Len(requests, 3) + + first := requests[0] + require.Len(first.Transistions, 2) + require.Len(first.Evals, 0) + + second := requests[1] + require.Len(second.Transistions, 1) + require.Len(second.Evals, 1) + + third := requests[2] + require.Len(third.Transistions, 0) + require.Len(third.Evals, 2) +} + +func TestDrainer_PartitionIds(t *testing.T) { + require := require.New(t) + + // Set the max ids per reap to something lower. + old := maxIdsPerTxn + defer func() { maxIdsPerTxn = old }() + maxIdsPerTxn = 2 + + ids := []string{"1", "2", "3", "4", "5"} + requests := partitionIds(ids) + require.Len(requests, 3) + require.Len(requests[0], 2) + require.Len(requests[1], 2) + require.Len(requests[2], 1) + require.Equal(requests[0][0], ids[0]) + require.Equal(requests[0][1], ids[1]) + require.Equal(requests[1][0], ids[2]) + require.Equal(requests[1][1], ids[3]) + require.Equal(requests[2][0], ids[4]) +} From 270699bab27bf73ec7cb2181d0432a429f5a0fa7 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Wed, 14 Mar 2018 16:38:19 -0700 Subject: [PATCH 52/79] fix comment --- nomad/fsm_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 6d4aaf968fb6..1a7b08a4bd60 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -311,7 +311,7 @@ func TestFSM_BatchUpdateNodeDrain(t *testing.T) { resp = fsm.Apply(makeLog(buf)) require.Nil(resp) - // Verify we are NOT registered + // Verify drain is set ws := memdb.NewWatchSet() node, err = fsm.State().NodeByID(ws, req.Node.ID) require.Nil(err) From fb6c821526ee7555fae795db0dc2cae9286f8a79 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 6 Mar 2018 13:12:36 -0800 Subject: [PATCH 53/79] Fix node eligibility test --- command/node_eligibility_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/command/node_eligibility_test.go b/command/node_eligibility_test.go index 3129fe86a19b..6fbb3c91d8ff 100644 --- a/command/node_eligibility_test.go +++ b/command/node_eligibility_test.go @@ -37,8 +37,9 @@ func TestNodeEligibilityCommand_Fails(t *testing.T) { if code := cmd.Run([]string{"-address=nope", "-enable", "12345678-abcd-efab-cdef-123456789abc"}); code != 1 { t.Fatalf("expected exit code 1, got: %d", code) } - if out := ui.ErrorWriter.String(); !strings.Contains(out, "Error toggling") { - t.Fatalf("expected failed toggle error, got: %s", out) + expected := "Error updating scheduling eligibility" + if out := ui.ErrorWriter.String(); !strings.Contains(out, expected) { + t.Fatalf("expected %q, got: %s", expected, out) } ui.ErrorWriter.Reset() From 6347baec55774aa011201e5c22356636be6822f2 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 6 Mar 2018 16:23:21 -0800 Subject: [PATCH 54/79] Add DesiredTransition.ShouldMigrate to api pkg --- api/allocations.go | 5 +++++ api/allocations_test.go | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/api/allocations.go b/api/allocations.go index c3759806741f..fc035ebb16ce 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -215,3 +215,8 @@ type DesiredTransition struct { // migrated to another node. Migrate *bool } + +// ShouldMigrate returns whether the transition object dictates a migration. +func (d DesiredTransition) ShouldMigrate() bool { + return d.Migrate != nil && *d.Migrate +} diff --git a/api/allocations_test.go b/api/allocations_test.go index dd5ae333bd1a..5eb5508bb69f 100644 --- a/api/allocations_test.go +++ b/api/allocations_test.go @@ -239,3 +239,10 @@ func TestAllocations_RescheduleInfo(t *testing.T) { } } + +func TestAllocations_ShouldMigrate(t *testing.T) { + t.Parallel() + require.True(t, DesiredTransition{Migrate: helper.BoolToPtr(true)}.ShouldMigrate()) + require.False(t, DesiredTransition{}.ShouldMigrate()) + require.False(t, DesiredTransition{Migrate: helper.BoolToPtr(false)}.ShouldMigrate()) +} From 11d0eae5eddd58b5d1437dd50bdadcf6f15e73f3 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 6 Mar 2018 14:16:20 -0800 Subject: [PATCH 55/79] Monitor node drains until completion in CLI allow -detach like other commands --- command/node_drain.go | 134 ++++++++++++++++++++++++++++- command/node_drain_test.go | 171 +++++++++++++++++++++++++++++++++++++ 2 files changed, 303 insertions(+), 2 deletions(-) diff --git a/command/node_drain.go b/command/node_drain.go index 9d8326d472a0..b4a2ebad7369 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -45,6 +45,9 @@ Node Drain Options: Remaining allocations after the deadline are forced removed from the node. If unspecified, a default deadline of one hour is applied. + -detach + Return immediately instead of entering monitor mode. + -force Force remove allocations off the node immediately. @@ -80,6 +83,7 @@ func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags { "-disable": complete.PredictNothing, "-enable": complete.PredictNothing, "-deadline": complete.PredictAnything, + "-detach": complete.PredictNothing, "-force": complete.PredictNothing, "-no-deadline": complete.PredictNothing, "-ignore-system": complete.PredictNothing, @@ -105,7 +109,7 @@ func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor { } func (c *NodeDrainCommand) Run(args []string) int { - var enable, disable, force, + var enable, disable, detach, force, noDeadline, ignoreSystem, keepIneligible, self, autoYes bool var deadline string @@ -114,6 +118,7 @@ func (c *NodeDrainCommand) Run(args []string) int { flags.BoolVar(&enable, "enable", false, "Enable drain mode") flags.BoolVar(&disable, "disable", false, "Disable drain mode") flags.StringVar(&deadline, "deadline", "", "Deadline after which allocations are force stopped") + flags.BoolVar(&detach, "detach", false, "") flags.BoolVar(&force, "force", false, "Force immediate drain") flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline") flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node") @@ -259,11 +264,136 @@ func (c *NodeDrainCommand) Run(args []string) int { } // Toggle node draining - if _, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil); err != nil { + meta, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil) + if err != nil { c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err)) return 1 } c.Ui.Output(fmt.Sprintf("Node %q drain strategy set", node.ID)) + + if enable && !detach { + if err := monitorDrain(c.Ui.Output, client.Nodes(), node.ID, meta.LastIndex); err != nil { + c.Ui.Error(fmt.Sprintf("Error monitoring drain: %v", err)) + return 1 + } + + c.Ui.Output(fmt.Sprintf("Node %q drain complete", nodeID)) + } + return 0 } + +// monitorDrain monitors the node being drained and exits when the node has +// finished draining. +func monitorDrain(output func(string), nodeClient *api.Nodes, nodeID string, index uint64) error { + doneCh := make(chan struct{}) + defer close(doneCh) + + // Errors from either goroutine are sent here + errCh := make(chan error, 1) + + // Monitor node changes and close chan when drain is complete + nodeCh := make(chan struct{}) + go func() { + for { + q := api.QueryOptions{ + AllowStale: true, + WaitIndex: index, + } + node, meta, err := nodeClient.Info(nodeID, &q) + if err != nil { + select { + case errCh <- err: + case <-doneCh: + } + return + } + + if node.DrainStrategy == nil { + close(nodeCh) + return + } + + // Drain still ongoing + index = meta.LastIndex + } + }() + + // Monitor alloc changes + allocCh := make(chan string, 1) + go func() { + allocs, meta, err := nodeClient.Allocations(nodeID, nil) + if err != nil { + select { + case errCh <- err: + case <-doneCh: + } + return + } + + initial := make(map[string]*api.Allocation, len(allocs)) + for _, a := range allocs { + initial[a.ID] = a + } + + for { + q := api.QueryOptions{ + AllowStale: true, + WaitIndex: meta.LastIndex, + } + + allocs, meta, err = nodeClient.Allocations(nodeID, &q) + if err != nil { + select { + case errCh <- err: + case <-doneCh: + } + return + } + + for _, a := range allocs { + // Get previous version of alloc + orig, ok := initial[a.ID] + + // Update local alloc state + initial[a.ID] = a + + msg := "" + switch { + case !ok: + // Should only be possible if response + // from initial Allocations call was + // stale. No need to output + + case orig.ClientStatus != a.ClientStatus: + // Alloc status has changed; output + msg = fmt.Sprintf("status %s -> %s", orig.ClientStatus, a.ClientStatus) + + case !orig.DesiredTransition.ShouldMigrate() && a.DesiredTransition.ShouldMigrate(): + // Alloc marked for migration + msg = "draining" + } + + if msg != "" { + select { + case allocCh <- fmt.Sprintf("Alloc %q %s", a.ID, msg): + case <-doneCh: + return + } + } + } + } + }() + + for { + select { + case err := <-errCh: + return err + case <-nodeCh: + return nil + case msg := <-allocCh: + output(msg) + } + } +} diff --git a/command/node_drain_test.go b/command/node_drain_test.go index 20f63d95f571..1207047454a3 100644 --- a/command/node_drain_test.go +++ b/command/node_drain_test.go @@ -4,11 +4,16 @@ import ( "fmt" "strings" "testing" + "time" + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/command/agent" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/testutil" "github.com/mitchellh/cli" "github.com/posener/complete" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestNodeDrainCommand_Implements(t *testing.T) { @@ -16,6 +21,172 @@ func TestNodeDrainCommand_Implements(t *testing.T) { var _ cli.Command = &NodeDrainCommand{} } +func TestNodeDrainCommand_Detach(t *testing.T) { + t.Parallel() + require := require.New(t) + server, client, url := testServer(t, true, func(c *agent.Config) { + c.NodeName = "drain_detach_node" + }) + defer server.Shutdown() + + // Wait for a node to appear + var nodeID string + testutil.WaitForResult(func() (bool, error) { + nodes, _, err := client.Nodes().List(nil) + if err != nil { + return false, err + } + if len(nodes) == 0 { + return false, fmt.Errorf("missing node") + } + nodeID = nodes[0].ID + return true, nil + }, func(err error) { + t.Fatalf("err: %s", err) + }) + + // Register a job to create an alloc to drain that will block draining + job := &api.Job{ + ID: helper.StringToPtr("mock_service"), + Name: helper.StringToPtr("mock_service"), + Datacenters: []string{"dc1"}, + TaskGroups: []*api.TaskGroup{ + { + Name: helper.StringToPtr("mock_group"), + Tasks: []*api.Task{ + { + Name: "mock_task", + Driver: "mock_driver", + Config: map[string]interface{}{ + "run_for": "10m", + "exit_after": "10m", + }, + }, + }, + }, + }, + } + + _, _, err := client.Jobs().Register(job, nil) + require.Nil(err) + + testutil.WaitForResult(func() (bool, error) { + allocs, _, err := client.Nodes().Allocations(nodeID, nil) + if err != nil { + return false, err + } + return len(allocs) > 0, fmt.Errorf("no allocs") + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + ui := new(cli.MockUi) + cmd := &NodeDrainCommand{Meta: Meta{Ui: ui}} + if code := cmd.Run([]string{"-address=" + url, "-self", "-enable", "-detach"}); code != 0 { + t.Fatalf("expected exit 0, got: %d", code) + } + + out := ui.OutputWriter.String() + expected := "drain strategy set" + require.Contains(out, expected) + + node, _, err := client.Nodes().Info(nodeID, nil) + require.Nil(err) + require.NotNil(node.DrainStrategy) +} + +func TestNodeDrainCommand_Monitor(t *testing.T) { + t.Parallel() + require := require.New(t) + server, client, url := testServer(t, true, func(c *agent.Config) { + c.NodeName = "drain_monitor_node" + }) + defer server.Shutdown() + + // Wait for a node to appear + var nodeID string + testutil.WaitForResult(func() (bool, error) { + nodes, _, err := client.Nodes().List(nil) + if err != nil { + return false, err + } + if len(nodes) == 0 { + return false, fmt.Errorf("missing node") + } + nodeID = nodes[0].ID + return true, nil + }, func(err error) { + t.Fatalf("err: %s", err) + }) + + // Register a job to create an alloc to drain + count := 3 + job := &api.Job{ + ID: helper.StringToPtr("mock_service"), + Name: helper.StringToPtr("mock_service"), + Datacenters: []string{"dc1"}, + TaskGroups: []*api.TaskGroup{ + { + Name: helper.StringToPtr("mock_group"), + Count: &count, + Migrate: &api.MigrateStrategy{ + MaxParallel: helper.IntToPtr(1), + HealthCheck: helper.StringToPtr("task_states"), + MinHealthyTime: helper.TimeToPtr(10 * time.Millisecond), + HealthyDeadline: helper.TimeToPtr(5 * time.Minute), + }, + Tasks: []*api.Task{ + { + Name: "mock_task", + Driver: "mock_driver", + Config: map[string]interface{}{ + "run_for": "10m", + }, + }, + }, + }, + }, + } + + _, _, err := client.Jobs().Register(job, nil) + require.Nil(err) + + var allocs []*api.Allocation + testutil.WaitForResult(func() (bool, error) { + allocs, _, err = client.Nodes().Allocations(nodeID, nil) + if err != nil { + return false, err + } + if len(allocs) != count { + return false, fmt.Errorf("number of allocs %d != count (%d)", len(allocs), count) + } + for _, a := range allocs { + if a.ClientStatus != "running" { + return false, fmt.Errorf("alloc %q still not running: %s", a.ID, a.ClientStatus) + } + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) + + ui := new(cli.MockUi) + cmd := &NodeDrainCommand{Meta: Meta{Ui: ui}} + args := []string{"-address=" + url, "-self", "-enable", "-deadline", "1s"} + t.Logf("Running: %v", args) + if code := cmd.Run(args); code != 0 { + t.Fatalf("expected exit 0, got: %d", code) + } + + out := ui.OutputWriter.String() + t.Logf("Output:\n%s", out) + + require.Contains(out, "drain complete") + for _, a := range allocs { + require.Contains(out, fmt.Sprintf("Alloc %q draining", a.ID)) + } +} + func TestNodeDrainCommand_Fails(t *testing.T) { t.Parallel() srv, _, url := testServer(t, false, nil) From e669e8213a7f41335c179158b710aba80c26f469 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 16 Mar 2018 10:43:28 -0700 Subject: [PATCH 56/79] Improve drain log messages Also delay "node complete" after the node has been marked complete to capture a few more alloc events. There are other ways to implement this that could trade off correctness for responsiveness as technically a node is considered drained when all of its allocs have been marked to stop and not when they've actually stopped (which may not happen for a long time). --- command/node_drain.go | 39 ++++++++++++++++++++++++++++++++++---- command/node_drain_test.go | 1 + 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/command/node_drain.go b/command/node_drain.go index b4a2ebad7369..9f170c76e082 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -7,6 +7,7 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/api/contexts" + "github.com/hashicorp/nomad/nomad/structs" "github.com/posener/complete" ) @@ -359,6 +360,8 @@ func monitorDrain(output func(string), nodeClient *api.Nodes, nodeID string, ind // Update local alloc state initial[a.ID] = a + migrating := a.DesiredTransition.ShouldMigrate() + msg := "" switch { case !ok: @@ -370,9 +373,15 @@ func monitorDrain(output func(string), nodeClient *api.Nodes, nodeID string, ind // Alloc status has changed; output msg = fmt.Sprintf("status %s -> %s", orig.ClientStatus, a.ClientStatus) - case !orig.DesiredTransition.ShouldMigrate() && a.DesiredTransition.ShouldMigrate(): - // Alloc marked for migration + case migrating && !orig.DesiredTransition.ShouldMigrate(): + // Alloc was marked for migration + msg = "marked for migration" + case migrating && (orig.DesiredStatus != a.DesiredStatus) && a.DesiredStatus == structs.AllocDesiredStatusStop: + // Alloc has already been marked for migration and is now being stopped msg = "draining" + case a.NextAllocation != "" && orig.NextAllocation == "": + // Alloc has been replaced by another allocation + msg = fmt.Sprintf("replaced by allocation %q", a.NextAllocation) } if msg != "" { @@ -386,14 +395,36 @@ func monitorDrain(output func(string), nodeClient *api.Nodes, nodeID string, ind } }() - for { + done := false + for !done { select { case err := <-errCh: return err case <-nodeCh: - return nil + done = true + case msg := <-allocCh: + output(msg) + } + } + + // Loop on alloc messages for a bit longer as we may have gotten the + // "node done" first (since the watchers run concurrently the events + // may be received out of order) + deadline := 250 * time.Millisecond + timer := time.NewTimer(deadline) + for { + select { + case err := <-errCh: + return err case msg := <-allocCh: output(msg) + if !timer.Stop() { + <-timer.C + } + timer.Reset(deadline) + case <-timer.C: + // No events within deadline, exit + return nil } } } diff --git a/command/node_drain_test.go b/command/node_drain_test.go index 1207047454a3..01c8b12532bd 100644 --- a/command/node_drain_test.go +++ b/command/node_drain_test.go @@ -183,6 +183,7 @@ func TestNodeDrainCommand_Monitor(t *testing.T) { require.Contains(out, "drain complete") for _, a := range allocs { + require.Contains(out, fmt.Sprintf("Alloc %q marked for migration", a.ID)) require.Contains(out, fmt.Sprintf("Alloc %q draining", a.ID)) } } From 0a1f1d2c561a92366c0b3645b8b21392f06d26d7 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Thu, 8 Mar 2018 16:08:21 -0800 Subject: [PATCH 57/79] Fix deadline heap triggering Chan must be buffered to avoid skipping triggering altogether Also made timing in a test a bit more lenient --- nomad/drainer/drain_heap.go | 34 +++++++++++++++----------------- nomad/drainer/drain_heap_test.go | 4 ++-- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/nomad/drainer/drain_heap.go b/nomad/drainer/drain_heap.go index 1a6c23f13cf9..1642b0fdb330 100644 --- a/nomad/drainer/drain_heap.go +++ b/nomad/drainer/drain_heap.go @@ -43,7 +43,7 @@ func NewDeadlineHeap(ctx context.Context, coalesceWindow time.Duration) *deadlin coalesceWindow: coalesceWindow, batch: make(chan []string), nodes: make(map[string]time.Time, 64), - trigger: make(chan struct{}), + trigger: make(chan struct{}, 1), } go d.watch() @@ -51,12 +51,11 @@ func NewDeadlineHeap(ctx context.Context, coalesceWindow time.Duration) *deadlin } func (d *deadlineHeap) watch() { - timer := time.NewTimer(0 * time.Millisecond) - if !timer.Stop() { - select { - case <-timer.C: - default: - } + timer := time.NewTimer(0) + timer.Stop() + select { + case <-timer.C: + default: } var nextDeadline time.Time @@ -71,8 +70,9 @@ func (d *deadlineHeap) watch() { continue } - d.mu.Lock() var batch []string + + d.mu.Lock() for nodeID, nodeDeadline := range d.nodes { if !nodeDeadline.After(nextDeadline) { batch = append(batch, nodeID) @@ -81,21 +81,19 @@ func (d *deadlineHeap) watch() { } d.mu.Unlock() - // If there is nothing exit early - if len(batch) == 0 { - goto CALC + if len(batch) > 0 { + // Send the batch + select { + case d.batch <- batch: + case <-d.ctx.Done(): + return + } } - // Send the batch - select { - case d.batch <- batch: - case <-d.ctx.Done(): - return - } case <-d.trigger: } - CALC: + // Calculate the next deadline deadline, ok := d.calculateNextDeadline() if !ok { continue diff --git a/nomad/drainer/drain_heap_test.go b/nomad/drainer/drain_heap_test.go index 147ad9192eff..02108e1dfa0e 100644 --- a/nomad/drainer/drain_heap_test.go +++ b/nomad/drainer/drain_heap_test.go @@ -95,7 +95,7 @@ func TestDeadlineHeap_MultiwatchAndDelete(t *testing.T) { func TestDeadlineHeap_WatchCoalesce(t *testing.T) { t.Parallel() require := require.New(t) - h := NewDeadlineHeap(context.Background(), 250*time.Millisecond) + h := NewDeadlineHeap(context.Background(), 100*time.Millisecond) now := time.Now() @@ -107,7 +107,7 @@ func TestDeadlineHeap_WatchCoalesce(t *testing.T) { } group2 := map[string]time.Time{ - "10": now.Add(355 * time.Millisecond), + "10": now.Add(350 * time.Millisecond), "11": now.Add(360 * time.Millisecond), } From 8217ebf11e2b7585bef61b142550fa9bd3dfcb0e Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 9 Mar 2018 16:25:46 -0800 Subject: [PATCH 58/79] drainer: RegisterJob -> RegisterJobs Test job watcher --- nomad/drainer/drainer.go | 1 - nomad/drainer/watch_jobs.go | 38 +- nomad/drainer/watch_jobs_test.go | 714 +++++++++++++++++++++---------- nomad/drainer/watch_nodes.go | 4 +- nomad/structs/structs.go | 4 +- 5 files changed, 505 insertions(+), 256 deletions(-) diff --git a/nomad/drainer/drainer.go b/nomad/drainer/drainer.go index 98c52479a865..46dcad696d4c 100644 --- a/nomad/drainer/drainer.go +++ b/nomad/drainer/drainer.go @@ -332,7 +332,6 @@ func (n *NodeDrainer) batchDrainAllocs(allocs []*structs.Allocation) (uint64, er } n.batcher.Unlock() - // Wait for the future if err := future.Wait(); err != nil { return 0, err } diff --git a/nomad/drainer/watch_jobs.go b/nomad/drainer/watch_jobs.go index 714bac2b7e53..61a615646019 100644 --- a/nomad/drainer/watch_jobs.go +++ b/nomad/drainer/watch_jobs.go @@ -29,7 +29,7 @@ func NewDrainRequest(allocs []*structs.Allocation) *DrainRequest { // DrainingJobWatcher is the interface for watching a job drain type DrainingJobWatcher interface { // RegisterJob is used to start watching a draining job - RegisterJob(job structs.JobNs) + RegisterJobs(job []structs.JobNs) // Drain is used to emit allocations that should be drained. Drain() <-chan *DrainRequest @@ -90,21 +90,28 @@ func NewDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *st } // RegisterJob marks the given job as draining and adds it to being watched. -func (w *drainingJobWatcher) RegisterJob(job structs.JobNs) { +func (w *drainingJobWatcher) RegisterJobs(jobs []structs.JobNs) { w.l.Lock() defer w.l.Unlock() - if _, ok := w.jobs[job]; ok { - return + updated := false + for _, jns := range jobs { + if _, ok := w.jobs[jns]; ok { + continue + } + + // Add the job and cancel the context + w.logger.Printf("[TRACE] nomad.drain.job_watcher: registering job %v", jns) + w.jobs[jns] = struct{}{} + updated = true } - // Add the job and cancel the context - w.logger.Printf("[TRACE] nomad.drain.job_watcher: registering job %v", job) - w.jobs[job] = struct{}{} - w.queryCancel() + if updated { + w.queryCancel() - // Create a new query context - w.queryCtx, w.queryCancel = context.WithCancel(w.ctx) + // Create a new query context + w.queryCtx, w.queryCancel = context.WithCancel(w.ctx) + } } // Drain returns the channel that emits allocations to drain. @@ -160,7 +167,6 @@ func (w *drainingJobWatcher) watch() { } } - // update index for next run lastHandled := waitIndex waitIndex = index @@ -184,7 +190,7 @@ func (w *drainingJobWatcher) watch() { // Lookup the job job, err := w.state.JobByID(nil, jns.Namespace, jns.ID) - if err != nil { + if err != nil || job == nil { w.logger.Printf("[WARN] nomad.drain.job_watcher: failed to lookup job %v: %v", jns, err) continue } @@ -268,7 +274,8 @@ type jobResult struct { done bool } -// newJobResult returns an initialized jobResult +// newJobResult returns a jobResult with done=true. It is the responsibility of +// callers to set done=false when a remaining drainable alloc is found. func newJobResult() *jobResult { return &jobResult{ done: true, @@ -390,10 +397,13 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, numToDrain := healthy - thresholdCount numToDrain = helper.IntMin(len(drainable), numToDrain) if numToDrain <= 0 { - fmt.Printf("------- Not draining any allocs\n") + fmt.Printf("------- Not draining any allocs: drainable:%d healthy:%d thresholdCount:%d\n", + len(drainable), healthy, thresholdCount) return nil } + fmt.Printf("------- DRAINing allocs: n: %d drainable:%d healthy:%d thresholdCount:%d\n", + numToDrain, len(drainable), healthy, thresholdCount) result.drain = append(result.drain, drainable[0:numToDrain]...) return nil } diff --git a/nomad/drainer/watch_jobs_test.go b/nomad/drainer/watch_jobs_test.go index 3db5ea0ac4b8..078e5316ec32 100644 --- a/nomad/drainer/watch_jobs_test.go +++ b/nomad/drainer/watch_jobs_test.go @@ -2,7 +2,6 @@ package drainer import ( "context" - "fmt" "testing" "time" @@ -11,309 +10,552 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/time/rate" ) -func testDrainingJobWatcher(t *testing.T) (*drainingJobWatcher, *state.StateStore) { +func testNodes(t *testing.T, state *state.StateStore) (drainingNode, runningNode *structs.Node) { + n1 := mock.Node() + n1.Name = "draining" + n1.DrainStrategy = &structs.DrainStrategy{ + DrainSpec: structs.DrainSpec{ + Deadline: time.Minute, + }, + ForceDeadline: time.Now().Add(time.Minute), + } + require.Nil(t, state.UpsertNode(100, n1)) + + // Create a non-draining node + n2 := mock.Node() + n2.Name = "running" + require.Nil(t, state.UpsertNode(101, n2)) + return n1, n2 +} + +func testDrainingJobWatcher(t *testing.T, state *state.StateStore) (*drainingJobWatcher, context.CancelFunc) { t.Helper() - state := state.TestStateStore(t) limiter := rate.NewLimiter(100.0, 100) logger := testlog.Logger(t) - w := NewDrainingJobWatcher(context.Background(), limiter, state, logger) - return w, state + ctx, cancel := context.WithCancel(context.Background()) + w := NewDrainingJobWatcher(ctx, limiter, state, logger) + return w, cancel } +// TestDrainingJobWatcher_Interface is a compile-time assertion that we +// implement the intended interface. func TestDrainingJobWatcher_Interface(t *testing.T) { - t.Parallel() - require := require.New(t) - w, _ := testDrainingJobWatcher(t) - require.Implements((*DrainingJobWatcher)(nil), w) + w, cancel := testDrainingJobWatcher(t, state.TestStateStore(t)) + cancel() + var _ DrainingJobWatcher = w } -// DrainingJobWatcher tests: -// TODO Test that several jobs allocation changes get batched -// TODO Test that jobs are deregistered when they have no more to migrate -// TODO Test that the watcher gets triggered on alloc changes -// TODO Test that the watcher cancels its query when a new job is registered - -func TestHandleTaskGroup_AllDone(t *testing.T) { +// TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches +// allocation changes from multiple jobs. +func TestDrainingJobWatcher_DrainJobs(t *testing.T) { t.Parallel() require := require.New(t) - // Create a non-draining node state := state.TestStateStore(t) - n := mock.Node() - require.Nil(state.UpsertNode(100, n)) + jobWatcher, cancelWatcher := testDrainingJobWatcher(t, state) + defer cancelWatcher() + drainingNode, runningNode := testNodes(t, state) - job := mock.Job() - require.Nil(state.UpsertJob(101, job)) + var index uint64 = 101 + count := 8 - // Create 10 running allocs on the healthy node - var allocs []*structs.Allocation - for i := 0; i < 10; i++ { + newAlloc := func(node *structs.Node, job *structs.Job) *structs.Allocation { a := mock.Alloc() + a.JobID = job.ID a.Job = job a.TaskGroup = job.TaskGroups[0].Name - a.NodeID = n.ID - a.DeploymentStatus = &structs.AllocDeploymentStatus{ - Healthy: helper.BoolToPtr(false), + a.NodeID = node.ID + return a + } + + // 2 jobs with count 10, max parallel 3 + jnss := make([]structs.JobNs, 2) + jobs := make([]*structs.Job, 2) + for i := 0; i < 2; i++ { + job := mock.Job() + jobs[i] = job + jnss[i] = structs.NewJobNs(job.Namespace, job.ID) + job.TaskGroups[0].Migrate.MaxParallel = 3 + job.TaskGroups[0].Count = count + require.Nil(state.UpsertJob(index, job)) + index++ + + var allocs []*structs.Allocation + for i := 0; i < count; i++ { + a := newAlloc(drainingNode, job) + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + allocs = append(allocs, a) } - allocs = append(allocs, a) + + require.Nil(state.UpsertAllocs(index, allocs)) + index++ + } - require.Nil(state.UpsertAllocs(102, allocs)) - snap, err := state.Snapshot() - require.Nil(err) + // Only register jobs with watcher after creating all data models as + // once the watcher starts we need to track the index carefully for + // updating the batch future + jobWatcher.RegisterJobs(jnss) + + // assertOps asserts how many allocs should be drained and migrated. + // The drains and migrations - if any - are returned. + assertOps := func(drained, migrated int) (drains *DrainRequest, migrations []*structs.Allocation) { + t.Helper() + var drainsChecked, migrationsChecked bool + for { + select { + case drains = <-jobWatcher.Drain(): + ids := make([]string, len(drains.Allocs)) + for i, a := range drains.Allocs { + ids[i] = a.JobID[:6] + ":" + a.ID[:6] + } + t.Logf("draining %d allocs: %v", len(ids), ids) + require.False(drainsChecked, "drains already received") + drainsChecked = true + require.Lenf(drains.Allocs, drained, + "expected %d drains but found %d", drained, len(drains.Allocs)) + case migrations = <-jobWatcher.Migrated(): + ids := make([]string, len(migrations)) + for i, a := range migrations { + ids[i] = a.JobID[:6] + ":" + a.ID[:6] + } + t.Logf("migrating %d allocs: %v", len(ids), ids) + require.False(migrationsChecked, "migrations already received") + migrationsChecked = true + require.Lenf(migrations, migrated, + "expected %d migrations but found %d", migrated, len(migrations)) + case <-time.After(10 * time.Millisecond): + if !drainsChecked && drained > 0 { + t.Fatalf("expected %d drains but none happened", drained) + } + if !migrationsChecked && migrated > 0 { + t.Fatalf("expected %d migrations but none happened", migrated) + } + return drains, migrations + } + } + } - res := &jobResult{} - require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res)) - require.Empty(res.drain) - require.Empty(res.migrated) - require.True(res.done) -} + // Expect a first batch of MaxParallel allocs from each job + drains, _ := assertOps(6, 0) -func TestHandleTaskGroup_AllOnDrainingNodes(t *testing.T) { - t.Parallel() - require := require.New(t) + // Fake migrating the drained allocs by starting new ones and stopping + // the old ones + drainedAllocs := make([]*structs.Allocation, len(drains.Allocs)) + for i, a := range drains.Allocs { + a.DesiredTransition.Migrate = helper.BoolToPtr(true) - // The loop value sets the max parallel for the drain strategy - for i := 1; i < 8; i++ { - // Create a draining node - state := state.TestStateStore(t) - n := mock.Node() - n.DrainStrategy = &structs.DrainStrategy{ - DrainSpec: structs.DrainSpec{ - Deadline: 5 * time.Minute, - }, - ForceDeadline: time.Now().Add(1 * time.Minute), + // create a copy so we can reuse this slice + drainedAllocs[i] = a.Copy() + } + require.Nil(state.UpsertAllocs(index, drainedAllocs)) + drains.Resp.Respond(index, nil) + index++ + + // Just setting ShouldMigrate should not cause any further drains + assertOps(0, 0) + + // Proceed our fake migration along by creating new allocs and stopping + // old ones + replacements := make([]*structs.Allocation, len(drainedAllocs)) + updates := make([]*structs.Allocation, 0, len(drainedAllocs)*2) + for i, a := range drainedAllocs { + // Stop drained allocs + a.DesiredTransition.Migrate = nil + a.DesiredStatus = structs.AllocDesiredStatusStop + + // Create a replacement + replacement := mock.Alloc() + replacement.JobID = a.Job.ID + replacement.Job = a.Job + replacement.TaskGroup = a.TaskGroup + replacement.NodeID = runningNode.ID + // start in pending state with no health status + + updates = append(updates, a, replacement) + replacements[i] = replacement.Copy() + } + require.Nil(state.UpsertAllocs(index, updates)) + index++ + + // The drained allocs stopping cause migrations but no new drains + // because the replacements have not started + assertOps(0, 6) + + // Finally kickoff further drain activity by "starting" replacements + for _, a := range replacements { + a.ClientStatus = structs.AllocClientStatusRunning + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), } - require.Nil(state.UpsertNode(100, n)) + } + require.Nil(state.UpsertAllocs(index, replacements)) + index++ - job := mock.Job() - job.TaskGroups[0].Migrate.MaxParallel = i - require.Nil(state.UpsertJob(101, job)) + require.NotEmpty(jobWatcher.drainingJobs()) - // Create 10 running allocs on the draining node - var allocs []*structs.Allocation - for i := 0; i < 10; i++ { - a := mock.Alloc() - a.Job = job - a.TaskGroup = job.TaskGroups[0].Name - a.NodeID = n.ID - a.DeploymentStatus = &structs.AllocDeploymentStatus{ - Healthy: helper.BoolToPtr(false), - } - allocs = append(allocs, a) + // 6 new drains + drains, _ = assertOps(6, 0) + + // Fake migrations once more to finish the drain + drainedAllocs = make([]*structs.Allocation, len(drains.Allocs)) + for i, a := range drains.Allocs { + a.DesiredTransition.Migrate = helper.BoolToPtr(true) + + // create a copy so we can reuse this slice + drainedAllocs[i] = a.Copy() + } + require.Nil(state.UpsertAllocs(index, drainedAllocs)) + drains.Resp.Respond(index, nil) + index++ + + assertOps(0, 0) + + replacements = make([]*structs.Allocation, len(drainedAllocs)) + updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2) + for i, a := range drainedAllocs { + a.DesiredTransition.Migrate = nil + a.DesiredStatus = structs.AllocDesiredStatusStop + + replacement := newAlloc(runningNode, a.Job) + updates = append(updates, a, replacement) + replacements[i] = replacement.Copy() + } + require.Nil(state.UpsertAllocs(index, updates)) + index++ + + assertOps(0, 6) + + for _, a := range replacements { + a.ClientStatus = structs.AllocClientStatusRunning + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), } - require.Nil(state.UpsertAllocs(102, allocs)) + } + require.Nil(state.UpsertAllocs(index, replacements)) + index++ + + require.NotEmpty(jobWatcher.drainingJobs()) - snap, err := state.Snapshot() - require.Nil(err) + // Final 4 new drains + drains, _ = assertOps(4, 0) - res := &jobResult{} - require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res)) - require.Len(res.drain, i) - require.Empty(res.migrated) - require.False(res.done) + // Fake migrations once more to finish the drain + drainedAllocs = make([]*structs.Allocation, len(drains.Allocs)) + for i, a := range drains.Allocs { + a.DesiredTransition.Migrate = helper.BoolToPtr(true) + + // create a copy so we can reuse this slice + drainedAllocs[i] = a.Copy() } + require.Nil(state.UpsertAllocs(index, drainedAllocs)) + drains.Resp.Respond(index, nil) + index++ + + assertOps(0, 0) + + replacements = make([]*structs.Allocation, len(drainedAllocs)) + updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2) + for i, a := range drainedAllocs { + a.DesiredTransition.Migrate = nil + a.DesiredStatus = structs.AllocDesiredStatusStop + + replacement := newAlloc(runningNode, a.Job) + updates = append(updates, a, replacement) + replacements[i] = replacement.Copy() + } + require.Nil(state.UpsertAllocs(index, updates)) + index++ + + assertOps(0, 4) + + for _, a := range replacements { + a.ClientStatus = structs.AllocClientStatusRunning + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + } + require.Nil(state.UpsertAllocs(index, replacements)) + index++ + + // No jobs should be left! + require.Empty(jobWatcher.drainingJobs()) +} + +// DrainingJobWatcher tests: +// TODO Test that the watcher cancels its query when a new job is registered + +// handleTaskGroupTestCase is the test case struct for TestHandleTaskGroup +// +// Two nodes will be initialized: one draining and one running. +type handleTaskGroupTestCase struct { + // Name of test + Name string + + // Expectations + ExpectedDrained int + ExpectedMigrated int + ExpectedDone bool + + // Count overrides the default count of 10 if set + Count int + + // MaxParallel overrides the default max_parallel of 1 if set + MaxParallel int + + // AddAlloc will be called 10 times to create test allocs + // + // Allocs default to be healthy on the draining node + AddAlloc func(i int, a *structs.Allocation, drainingID, runningID string) } -func TestHandleTaskGroup_MixedHealth(t *testing.T) { - cases := []struct { - maxParallel int - drainingNodeAllocs int - healthSet int - healthUnset int - expectedDrain int - expectedMigrated int - expectedDone bool - }{ +func TestHandeTaskGroup_Table(t *testing.T) { + cases := []handleTaskGroupTestCase{ { - maxParallel: 2, - drainingNodeAllocs: 10, - healthSet: 0, - healthUnset: 0, - expectedDrain: 2, - expectedMigrated: 0, - expectedDone: false, + // All allocs on draining node + Name: "AllDraining", + ExpectedDrained: 1, + ExpectedMigrated: 0, + ExpectedDone: false, }, { - maxParallel: 2, - drainingNodeAllocs: 9, - healthSet: 0, - healthUnset: 0, - expectedDrain: 1, - expectedMigrated: 1, - expectedDone: false, + // All allocs on non-draining node + Name: "AllNonDraining", + ExpectedDrained: 0, + ExpectedMigrated: 0, + ExpectedDone: true, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + a.NodeID = runningID + }, }, { - maxParallel: 5, - drainingNodeAllocs: 9, - healthSet: 0, - healthUnset: 0, - expectedDrain: 4, - expectedMigrated: 1, - expectedDone: false, + // Some allocs on non-draining node but not healthy + Name: "SomeNonDrainingUnhealthy", + ExpectedDrained: 0, + ExpectedMigrated: 0, + ExpectedDone: false, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + if i%2 == 0 { + a.NodeID = runningID + a.DeploymentStatus = nil + } + }, }, { - maxParallel: 2, - drainingNodeAllocs: 5, - healthSet: 2, - healthUnset: 0, - expectedDrain: 0, - expectedMigrated: 5, - expectedDone: false, + // One draining, other allocs on non-draining node and healthy + Name: "OneDraining", + ExpectedDrained: 1, + ExpectedMigrated: 0, + ExpectedDone: false, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + if i != 0 { + a.NodeID = runningID + } + }, }, { - maxParallel: 2, - drainingNodeAllocs: 5, - healthSet: 3, - healthUnset: 0, - expectedDrain: 0, - expectedMigrated: 5, - expectedDone: false, + // One already draining, other allocs on non-draining node and healthy + Name: "OneAlreadyDraining", + ExpectedDrained: 0, + ExpectedMigrated: 0, + ExpectedDone: false, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + if i == 0 { + a.DesiredTransition.Migrate = helper.BoolToPtr(true) + return + } + a.NodeID = runningID + }, }, { - maxParallel: 2, - drainingNodeAllocs: 5, - healthSet: 4, - healthUnset: 0, - expectedDrain: 1, - expectedMigrated: 5, - expectedDone: false, + // One already drained, other allocs on non-draining node and healthy + Name: "OneAlreadyDrained", + ExpectedDrained: 0, + ExpectedMigrated: 1, + ExpectedDone: true, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + if i == 0 { + a.DesiredStatus = structs.AllocDesiredStatusStop + return + } + a.NodeID = runningID + }, }, { - maxParallel: 2, - drainingNodeAllocs: 5, - healthSet: 4, - healthUnset: 1, - expectedDrain: 1, - expectedMigrated: 5, - expectedDone: false, + // All allocs are terminl, nothing to be drained + Name: "AllMigrating", + ExpectedDrained: 0, + ExpectedMigrated: 10, + ExpectedDone: true, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + a.DesiredStatus = structs.AllocDesiredStatusStop + }, }, { - maxParallel: 1, - drainingNodeAllocs: 5, - healthSet: 4, - healthUnset: 1, - expectedDrain: 0, - expectedMigrated: 5, - expectedDone: false, + // All allocs may be drained at once + Name: "AllAtOnce", + ExpectedDrained: 10, + ExpectedMigrated: 0, + ExpectedDone: false, + MaxParallel: 10, }, { - maxParallel: 3, - drainingNodeAllocs: 5, - healthSet: 3, - healthUnset: 0, - expectedDrain: 1, - expectedMigrated: 5, - expectedDone: false, + // Drain 2 + Name: "Drain2", + ExpectedDrained: 2, + ExpectedMigrated: 0, + ExpectedDone: false, + MaxParallel: 2, }, { - maxParallel: 3, - drainingNodeAllocs: 0, - healthSet: 10, - healthUnset: 0, - expectedDrain: 0, - expectedMigrated: 10, - expectedDone: true, + // One on new node, one drained, and one draining + ExpectedDrained: 1, + ExpectedMigrated: 1, + MaxParallel: 2, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + switch i { + case 0: + // One alloc on running node + a.NodeID = runningID + case 1: + // One alloc already migrated + a.DesiredStatus = structs.AllocDesiredStatusStop + } + }, }, { - // Is the case where deadline is hit and all 10 are just marked - // stopped. We should detect the job as done. - maxParallel: 3, - drainingNodeAllocs: 0, - healthSet: 0, - healthUnset: 0, - expectedDrain: 0, - expectedMigrated: 10, - expectedDone: true, + // 8 on new node, one drained, and one draining + ExpectedDrained: 1, + ExpectedMigrated: 1, + MaxParallel: 2, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + switch i { + case 0, 1, 2, 3, 4, 5, 6, 7: + a.NodeID = runningID + case 8: + a.DesiredStatus = structs.AllocDesiredStatusStop + } + }, + }, + { + // 5 on new node, two drained, and three draining + ExpectedDrained: 3, + ExpectedMigrated: 2, + MaxParallel: 5, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + switch i { + case 0, 1, 2, 3, 4: + a.NodeID = runningID + case 8, 9: + a.DesiredStatus = structs.AllocDesiredStatusStop + } + }, + }, + { + // Not all on new node have health set + Name: "PendingHealth", + ExpectedDrained: 1, + ExpectedMigrated: 1, + MaxParallel: 3, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + switch i { + case 0: + // Deployment status UNset for 1 on new node + a.NodeID = runningID + a.DeploymentStatus = nil + case 1, 2, 3, 4: + // Deployment status set for 4 on new node + a.NodeID = runningID + case 9: + a.DesiredStatus = structs.AllocDesiredStatusStop + } + }, + }, + { + // 5 max parallel - 1 migrating - 2 with unset health = 2 drainable + Name: "PendingHealthHigherMax", + ExpectedDrained: 2, + ExpectedMigrated: 1, + MaxParallel: 5, + AddAlloc: func(i int, a *structs.Allocation, drainingID, runningID string) { + switch i { + case 0, 1: + // Deployment status UNset for 2 on new node + a.NodeID = runningID + a.DeploymentStatus = nil + case 2, 3, 4: + // Deployment status set for 3 on new node + a.NodeID = runningID + case 9: + a.DesiredStatus = structs.AllocDesiredStatusStop + } + }, }, } - for cnum, c := range cases { - t.Run(fmt.Sprintf("%d", cnum), func(t *testing.T) { - require := require.New(t) + for _, testCase := range cases { + t.Run(testCase.Name, func(t *testing.T) { + testHandleTaskGroup(t, testCase) + }) + } +} - // Create a draining node - state := state.TestStateStore(t) +func testHandleTaskGroup(t *testing.T, tc handleTaskGroupTestCase) { + t.Parallel() + require := require.New(t) + assert := assert.New(t) - drainingNode := mock.Node() - drainingNode.DrainStrategy = &structs.DrainStrategy{ - DrainSpec: structs.DrainSpec{ - Deadline: 5 * time.Minute, - }, - ForceDeadline: time.Now().Add(1 * time.Minute), - } - require.Nil(state.UpsertNode(100, drainingNode)) - - healthyNode := mock.Node() - require.Nil(state.UpsertNode(101, healthyNode)) - - job := mock.Job() - job.TaskGroups[0].Migrate.MaxParallel = c.maxParallel - require.Nil(state.UpsertJob(101, job)) - - // Create running allocs on the draining node with health set - var allocs []*structs.Allocation - for i := 0; i < c.drainingNodeAllocs; i++ { - a := mock.Alloc() - a.Job = job - a.TaskGroup = job.TaskGroups[0].Name - a.NodeID = drainingNode.ID - a.DeploymentStatus = &structs.AllocDeploymentStatus{ - Healthy: helper.BoolToPtr(false), - } - allocs = append(allocs, a) - } + // Create nodes + state := state.TestStateStore(t) + drainingNode, runningNode := testNodes(t, state) - // Create stopped allocs on the draining node - for i := 10 - c.drainingNodeAllocs; i > 0; i-- { - a := mock.Alloc() - a.Job = job - a.TaskGroup = job.TaskGroups[0].Name - a.NodeID = drainingNode.ID - a.DeploymentStatus = &structs.AllocDeploymentStatus{ - Healthy: helper.BoolToPtr(false), - } - a.DesiredStatus = structs.AllocDesiredStatusStop - allocs = append(allocs, a) - } + job := mock.Job() + job.TaskGroups[0].Count = 10 + if tc.Count > 0 { + job.TaskGroups[0].Count = tc.Count + } + if tc.MaxParallel > 0 { + job.TaskGroups[0].Migrate.MaxParallel = tc.MaxParallel + } + require.Nil(state.UpsertJob(102, job)) - // Create allocs on the healthy node with health set - for i := 0; i < c.healthSet; i++ { - a := mock.Alloc() - a.Job = job - a.TaskGroup = job.TaskGroups[0].Name - a.NodeID = healthyNode.ID - a.DeploymentStatus = &structs.AllocDeploymentStatus{ - Healthy: helper.BoolToPtr(false), - } - allocs = append(allocs, a) - } + var allocs []*structs.Allocation + for i := 0; i < 10; i++ { + a := mock.Alloc() + a.JobID = job.ID + a.Job = job + a.TaskGroup = job.TaskGroups[0].Name - // Create allocs on the healthy node with health not set - for i := 0; i < c.healthUnset; i++ { - a := mock.Alloc() - a.Job = job - a.TaskGroup = job.TaskGroups[0].Name - a.NodeID = healthyNode.ID - allocs = append(allocs, a) - } - require.Nil(state.UpsertAllocs(103, allocs)) + // Default to being healthy on the draining node + a.NodeID = drainingNode.ID + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Healthy: helper.BoolToPtr(true), + } + if tc.AddAlloc != nil { + tc.AddAlloc(i, a, drainingNode.ID, runningNode.ID) + } + allocs = append(allocs, a) + } - snap, err := state.Snapshot() - require.Nil(err) + require.Nil(state.UpsertAllocs(103, allocs)) + snap, err := state.Snapshot() + require.Nil(err) - res := &jobResult{} - require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res)) - require.Len(res.drain, c.expectedDrain) - require.Len(res.migrated, c.expectedMigrated) - require.Equal(c.expectedDone, res.done) - }) - } + res := newJobResult() + require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 102, res)) + assert.Lenf(res.drain, tc.ExpectedDrained, "Drain expected %d but found: %d", + tc.ExpectedDrained, len(res.drain)) + assert.Lenf(res.migrated, tc.ExpectedMigrated, "Migrate expected %d but found: %d", + tc.ExpectedMigrated, len(res.migrated)) + assert.Equal(tc.ExpectedDone, res.done) } func TestHandleTaskGroup_Migrations(t *testing.T) { diff --git a/nomad/drainer/watch_nodes.go b/nomad/drainer/watch_nodes.go index ed99fb6938c5..97c6cf8b24ce 100644 --- a/nomad/drainer/watch_nodes.go +++ b/nomad/drainer/watch_nodes.go @@ -74,9 +74,7 @@ func (n *NodeDrainer) Update(node *structs.Node) { return } n.logger.Printf("[TRACE] nomad.drain: node %q has %d services on it", node.ID, len(jobs)) - for _, job := range jobs { - n.jobWatcher.RegisterJob(job) - } + n.jobWatcher.RegisterJobs(jobs) // TODO Test at this layer as well that a node drain on a node without // allocs immediately gets unmarked as draining diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 72f2c0a31948..fa16284abf1d 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1794,13 +1794,13 @@ func (n *NetworkResource) PortLabels() map[string]int { // JobNs is a Job.ID and Namespace tuple type JobNs struct { - ID, Namespace string + Namespace, ID string } func NewJobNs(namespace, id string) JobNs { return JobNs{ - ID: id, Namespace: namespace, + ID: id, } } From 74dc8fd46076dbc6e351b6fb472a481289e1da11 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 19 Mar 2018 10:12:12 -0700 Subject: [PATCH 59/79] JobNs -> NamespacedID Also drop the New func as it's easy to swap the order of arguments since they're both strings. --- nomad/drainer/draining_node.go | 8 ++++---- nomad/drainer/watch_jobs.go | 20 ++++++++++---------- nomad/drainer/watch_jobs_test.go | 4 ++-- nomad/structs/structs.go | 20 ++++---------------- 4 files changed, 20 insertions(+), 32 deletions(-) diff --git a/nomad/drainer/draining_node.go b/nomad/drainer/draining_node.go index 078399f049f9..af5c094b8089 100644 --- a/nomad/drainer/draining_node.go +++ b/nomad/drainer/draining_node.go @@ -125,7 +125,7 @@ func (n *drainingNode) DeadlineAllocs() ([]*structs.Allocation, error) { } // RunningServices returns the set of jobs on the node -func (n *drainingNode) RunningServices() ([]structs.JobNs, error) { +func (n *drainingNode) RunningServices() ([]structs.NamespacedID, error) { n.l.RLock() defer n.l.RUnlock() @@ -135,14 +135,14 @@ func (n *drainingNode) RunningServices() ([]structs.JobNs, error) { return nil, err } - jobIDs := make(map[structs.JobNs]struct{}) - var jobs []structs.JobNs + jobIDs := make(map[structs.NamespacedID]struct{}) + var jobs []structs.NamespacedID for _, alloc := range allocs { if alloc.TerminalStatus() || alloc.Job.Type != structs.JobTypeService { continue } - jns := structs.NewJobNs(alloc.Namespace, alloc.JobID) + jns := structs.NamespacedID{Namespace: alloc.Namespace, ID: alloc.JobID} if _, ok := jobIDs[jns]; ok { continue } diff --git a/nomad/drainer/watch_jobs.go b/nomad/drainer/watch_jobs.go index 61a615646019..181871b204db 100644 --- a/nomad/drainer/watch_jobs.go +++ b/nomad/drainer/watch_jobs.go @@ -29,7 +29,7 @@ func NewDrainRequest(allocs []*structs.Allocation) *DrainRequest { // DrainingJobWatcher is the interface for watching a job drain type DrainingJobWatcher interface { // RegisterJob is used to start watching a draining job - RegisterJobs(job []structs.JobNs) + RegisterJobs(job []structs.NamespacedID) // Drain is used to emit allocations that should be drained. Drain() <-chan *DrainRequest @@ -52,7 +52,7 @@ type drainingJobWatcher struct { limiter *rate.Limiter // jobs is the set of tracked jobs. - jobs map[structs.JobNs]struct{} + jobs map[structs.NamespacedID]struct{} // queryCtx is used to cancel a blocking query. queryCtx context.Context @@ -80,7 +80,7 @@ func NewDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *st limiter: limiter, logger: logger, state: state, - jobs: make(map[structs.JobNs]struct{}, 64), + jobs: make(map[structs.NamespacedID]struct{}, 64), drainCh: make(chan *DrainRequest), migratedCh: make(chan []*structs.Allocation), } @@ -90,7 +90,7 @@ func NewDrainingJobWatcher(ctx context.Context, limiter *rate.Limiter, state *st } // RegisterJob marks the given job as draining and adds it to being watched. -func (w *drainingJobWatcher) RegisterJobs(jobs []structs.JobNs) { +func (w *drainingJobWatcher) RegisterJobs(jobs []structs.NamespacedID) { w.l.Lock() defer w.l.Unlock() @@ -129,7 +129,7 @@ func (w *drainingJobWatcher) Migrated() <-chan []*structs.Allocation { func (w *drainingJobWatcher) deregisterJob(jobID, namespace string) { w.l.Lock() defer w.l.Unlock() - jns := structs.JobNs{ + jns := structs.NamespacedID{ ID: jobID, Namespace: namespace, } @@ -409,7 +409,7 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, } // getJobAllocs returns all allocations for draining jobs -func (w *drainingJobWatcher) getJobAllocs(ctx context.Context, minIndex uint64) (map[structs.JobNs][]*structs.Allocation, uint64, error) { +func (w *drainingJobWatcher) getJobAllocs(ctx context.Context, minIndex uint64) (map[structs.NamespacedID][]*structs.Allocation, uint64, error) { if err := w.limiter.Wait(ctx); err != nil { return nil, 0, err } @@ -422,7 +422,7 @@ func (w *drainingJobWatcher) getJobAllocs(ctx context.Context, minIndex uint64) return nil, index, nil } - return resp.(map[structs.JobNs][]*structs.Allocation), index, nil + return resp.(map[structs.NamespacedID][]*structs.Allocation), index, nil } // getJobAllocsImpl returns a map of draining jobs to their allocations. @@ -440,7 +440,7 @@ func (w *drainingJobWatcher) getJobAllocsImpl(ws memdb.WatchSet, state *state.St } // Capture the allocs for each draining job. - resp := make(map[structs.JobNs][]*structs.Allocation, l) + resp := make(map[structs.NamespacedID][]*structs.Allocation, l) for jns := range draining { allocs, err := state.AllocsByJob(ws, jns.Namespace, jns.ID, false) if err != nil { @@ -454,7 +454,7 @@ func (w *drainingJobWatcher) getJobAllocsImpl(ws memdb.WatchSet, state *state.St } // drainingJobs captures the set of draining jobs. -func (w *drainingJobWatcher) drainingJobs() map[structs.JobNs]struct{} { +func (w *drainingJobWatcher) drainingJobs() map[structs.NamespacedID]struct{} { w.l.RLock() defer w.l.RUnlock() @@ -463,7 +463,7 @@ func (w *drainingJobWatcher) drainingJobs() map[structs.JobNs]struct{} { return nil } - draining := make(map[structs.JobNs]struct{}, l) + draining := make(map[structs.NamespacedID]struct{}, l) for k := range w.jobs { draining[k] = struct{}{} } diff --git a/nomad/drainer/watch_jobs_test.go b/nomad/drainer/watch_jobs_test.go index 078e5316ec32..399ee46a16ec 100644 --- a/nomad/drainer/watch_jobs_test.go +++ b/nomad/drainer/watch_jobs_test.go @@ -75,12 +75,12 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { } // 2 jobs with count 10, max parallel 3 - jnss := make([]structs.JobNs, 2) + jnss := make([]structs.NamespacedID, 2) jobs := make([]*structs.Job, 2) for i := 0; i < 2; i++ { job := mock.Job() jobs[i] = job - jnss[i] = structs.NewJobNs(job.Namespace, job.ID) + jnss[i] = structs.NamespacedID{Namespace: job.Namespace, ID: job.ID} job.TaskGroups[0].Migrate.MaxParallel = 3 job.TaskGroups[0].Count = count require.Nil(state.UpsertJob(index, job)) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index fa16284abf1d..307cce7faf0c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -156,6 +156,10 @@ type NamespacedID struct { Namespace string } +func (n NamespacedID) String() string { + return fmt.Sprintf("", n.Namespace, n.ID) +} + // RPCInfo is used to describe common information about query type RPCInfo interface { RequestRegion() string @@ -1792,22 +1796,6 @@ func (n *NetworkResource) PortLabels() map[string]int { return labelValues } -// JobNs is a Job.ID and Namespace tuple -type JobNs struct { - Namespace, ID string -} - -func NewJobNs(namespace, id string) JobNs { - return JobNs{ - Namespace: namespace, - ID: id, - } -} - -func (j JobNs) String() string { - return fmt.Sprintf("", j.Namespace, j.ID) -} - const ( // JobTypeNomad is reserved for internal system tasks and is // always handled by the CoreScheduler. From 8ef7863bed8271769d11c09fefb01f6b4d48cb95 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 19 Mar 2018 10:18:20 -0700 Subject: [PATCH 60/79] Deregister garbage collected jobs --- nomad/drainer/watch_jobs.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/nomad/drainer/watch_jobs.go b/nomad/drainer/watch_jobs.go index 181871b204db..388bf9a7f7b4 100644 --- a/nomad/drainer/watch_jobs.go +++ b/nomad/drainer/watch_jobs.go @@ -189,12 +189,19 @@ func (w *drainingJobWatcher) watch() { w.logger.Printf("[TRACE] nomad.drain.job_watcher: handling job %v", jns) // Lookup the job - job, err := w.state.JobByID(nil, jns.Namespace, jns.ID) - if err != nil || job == nil { + job, err := snap.JobByID(nil, jns.Namespace, jns.ID) + if err != nil { w.logger.Printf("[WARN] nomad.drain.job_watcher: failed to lookup job %v: %v", jns, err) continue } + // Ignore purged jobs + if job == nil { + w.logger.Printf("[TRACE] nomad.drain.job_watcher: ignoring garbage collected job %q", jns) + w.deregisterJob(jns.ID, jns.Namespace) + continue + } + // Ignore all non-service jobs if job.Type != structs.JobTypeService { w.deregisterJob(job.ID, job.Namespace) From e003b0534b46db55097144f7f565f2f21b9356a0 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 19 Mar 2018 10:23:45 -0700 Subject: [PATCH 61/79] Remove debug prints --- nomad/drainer/watch_jobs.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/nomad/drainer/watch_jobs.go b/nomad/drainer/watch_jobs.go index 388bf9a7f7b4..93232aeb40e9 100644 --- a/nomad/drainer/watch_jobs.go +++ b/nomad/drainer/watch_jobs.go @@ -336,9 +336,6 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, var drainable []*structs.Allocation for _, alloc := range allocs { - // TODO Remove at the end/when no more bugs - fmt.Printf("--- Looking at alloc %q\n", alloc.ID) - // Check if the alloc is on a draining node. onDrainingNode, ok := drainingNodes[alloc.NodeID] if !ok { @@ -360,7 +357,6 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, onDrainingNode && alloc.ModifyIndex > lastHandledIndex { result.migrated = append(result.migrated, alloc) - fmt.Printf("------- Alloc %q marked as migrated\n", alloc.ID) continue } @@ -369,7 +365,6 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, if !alloc.TerminalStatus() && alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil { - fmt.Printf("------- Alloc %q considered as healthy\n", alloc.ID) healthy++ } @@ -377,7 +372,6 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, // - It isn't on a draining node // - It is already terminal if !onDrainingNode || alloc.TerminalStatus() { - fmt.Printf("------- Alloc %q not drainable\n", alloc.ID) continue } @@ -389,13 +383,11 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, // it as eligible for draining. if !alloc.DesiredTransition.ShouldMigrate() { drainable = append(drainable, alloc) - fmt.Printf("------- Alloc %q drainable\n", alloc.ID) } } // Update the done status if remainingDrainingAlloc { - fmt.Printf("------- Job has remaining allocs to drain\n") result.done = false } @@ -404,13 +396,9 @@ func handleTaskGroup(snap *state.StateSnapshot, tg *structs.TaskGroup, numToDrain := healthy - thresholdCount numToDrain = helper.IntMin(len(drainable), numToDrain) if numToDrain <= 0 { - fmt.Printf("------- Not draining any allocs: drainable:%d healthy:%d thresholdCount:%d\n", - len(drainable), healthy, thresholdCount) return nil } - fmt.Printf("------- DRAINing allocs: n: %d drainable:%d healthy:%d thresholdCount:%d\n", - numToDrain, len(drainable), healthy, thresholdCount) result.drain = append(result.drain, drainable[0:numToDrain]...) return nil } From 08c9116f2d563729ec212c125c6fdc1d0de023b8 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Mon, 19 Mar 2018 10:36:31 -0700 Subject: [PATCH 62/79] Refactor assertOps into a helper func --- nomad/drainer/watch_jobs_test.go | 101 ++++++++++++++++--------------- 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/nomad/drainer/watch_jobs_test.go b/nomad/drainer/watch_jobs_test.go index 399ee46a16ec..3a73938d713b 100644 --- a/nomad/drainer/watch_jobs_test.go +++ b/nomad/drainer/watch_jobs_test.go @@ -51,6 +51,50 @@ func TestDrainingJobWatcher_Interface(t *testing.T) { var _ DrainingJobWatcher = w } +// asertJobWatcherOps asserts a certain number of allocs are drained and/or +// migrated by the job watcher. +func assertJobWatcherOps(t *testing.T, jw DrainingJobWatcher, drained, migrated int) ( + *DrainRequest, []*structs.Allocation) { + t.Helper() + var ( + drains *DrainRequest + migrations []*structs.Allocation + drainsChecked, migrationsChecked bool + ) + for { + select { + case drains = <-jw.Drain(): + ids := make([]string, len(drains.Allocs)) + for i, a := range drains.Allocs { + ids[i] = a.JobID[:6] + ":" + a.ID[:6] + } + t.Logf("draining %d allocs: %v", len(ids), ids) + require.False(t, drainsChecked, "drains already received") + drainsChecked = true + require.Lenf(t, drains.Allocs, drained, + "expected %d drains but found %d", drained, len(drains.Allocs)) + case migrations = <-jw.Migrated(): + ids := make([]string, len(migrations)) + for i, a := range migrations { + ids[i] = a.JobID[:6] + ":" + a.ID[:6] + } + t.Logf("migrating %d allocs: %v", len(ids), ids) + require.False(t, migrationsChecked, "migrations already received") + migrationsChecked = true + require.Lenf(t, migrations, migrated, + "expected %d migrations but found %d", migrated, len(migrations)) + case <-time.After(10 * time.Millisecond): + if !drainsChecked && drained > 0 { + t.Fatalf("expected %d drains but none happened", drained) + } + if !migrationsChecked && migrated > 0 { + t.Fatalf("expected %d migrations but none happened", migrated) + } + return drains, migrations + } + } +} + // TestDrainingJobWatcher_DrainJobs asserts DrainingJobWatcher batches // allocation changes from multiple jobs. func TestDrainingJobWatcher_DrainJobs(t *testing.T) { @@ -105,47 +149,8 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { // updating the batch future jobWatcher.RegisterJobs(jnss) - // assertOps asserts how many allocs should be drained and migrated. - // The drains and migrations - if any - are returned. - assertOps := func(drained, migrated int) (drains *DrainRequest, migrations []*structs.Allocation) { - t.Helper() - var drainsChecked, migrationsChecked bool - for { - select { - case drains = <-jobWatcher.Drain(): - ids := make([]string, len(drains.Allocs)) - for i, a := range drains.Allocs { - ids[i] = a.JobID[:6] + ":" + a.ID[:6] - } - t.Logf("draining %d allocs: %v", len(ids), ids) - require.False(drainsChecked, "drains already received") - drainsChecked = true - require.Lenf(drains.Allocs, drained, - "expected %d drains but found %d", drained, len(drains.Allocs)) - case migrations = <-jobWatcher.Migrated(): - ids := make([]string, len(migrations)) - for i, a := range migrations { - ids[i] = a.JobID[:6] + ":" + a.ID[:6] - } - t.Logf("migrating %d allocs: %v", len(ids), ids) - require.False(migrationsChecked, "migrations already received") - migrationsChecked = true - require.Lenf(migrations, migrated, - "expected %d migrations but found %d", migrated, len(migrations)) - case <-time.After(10 * time.Millisecond): - if !drainsChecked && drained > 0 { - t.Fatalf("expected %d drains but none happened", drained) - } - if !migrationsChecked && migrated > 0 { - t.Fatalf("expected %d migrations but none happened", migrated) - } - return drains, migrations - } - } - } - // Expect a first batch of MaxParallel allocs from each job - drains, _ := assertOps(6, 0) + drains, _ := assertJobWatcherOps(t, jobWatcher, 6, 0) // Fake migrating the drained allocs by starting new ones and stopping // the old ones @@ -161,7 +166,7 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { index++ // Just setting ShouldMigrate should not cause any further drains - assertOps(0, 0) + assertJobWatcherOps(t, jobWatcher, 0, 0) // Proceed our fake migration along by creating new allocs and stopping // old ones @@ -188,7 +193,7 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { // The drained allocs stopping cause migrations but no new drains // because the replacements have not started - assertOps(0, 6) + assertJobWatcherOps(t, jobWatcher, 0, 6) // Finally kickoff further drain activity by "starting" replacements for _, a := range replacements { @@ -203,7 +208,7 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { require.NotEmpty(jobWatcher.drainingJobs()) // 6 new drains - drains, _ = assertOps(6, 0) + drains, _ = assertJobWatcherOps(t, jobWatcher, 6, 0) // Fake migrations once more to finish the drain drainedAllocs = make([]*structs.Allocation, len(drains.Allocs)) @@ -217,7 +222,7 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { drains.Resp.Respond(index, nil) index++ - assertOps(0, 0) + assertJobWatcherOps(t, jobWatcher, 0, 0) replacements = make([]*structs.Allocation, len(drainedAllocs)) updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2) @@ -232,7 +237,7 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { require.Nil(state.UpsertAllocs(index, updates)) index++ - assertOps(0, 6) + assertJobWatcherOps(t, jobWatcher, 0, 6) for _, a := range replacements { a.ClientStatus = structs.AllocClientStatusRunning @@ -246,7 +251,7 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { require.NotEmpty(jobWatcher.drainingJobs()) // Final 4 new drains - drains, _ = assertOps(4, 0) + drains, _ = assertJobWatcherOps(t, jobWatcher, 4, 0) // Fake migrations once more to finish the drain drainedAllocs = make([]*structs.Allocation, len(drains.Allocs)) @@ -260,7 +265,7 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { drains.Resp.Respond(index, nil) index++ - assertOps(0, 0) + assertJobWatcherOps(t, jobWatcher, 0, 0) replacements = make([]*structs.Allocation, len(drainedAllocs)) updates = make([]*structs.Allocation, 0, len(drainedAllocs)*2) @@ -275,7 +280,7 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { require.Nil(state.UpsertAllocs(index, updates)) index++ - assertOps(0, 4) + assertJobWatcherOps(t, jobWatcher, 0, 4) for _, a := range replacements { a.ClientStatus = structs.AllocClientStatusRunning From 98935b82e04829759db8e8aa20913b22e19bb4f8 Mon Sep 17 00:00:00 2001 From: Alex Dadgar Date: Mon, 19 Mar 2018 15:19:57 -0700 Subject: [PATCH 63/79] fix race in drain integration tests --- nomad/drainer_int_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nomad/drainer_int_test.go b/nomad/drainer_int_test.go index 8e03a2ef5ff3..f71363a0d03b 100644 --- a/nomad/drainer_int_test.go +++ b/nomad/drainer_int_test.go @@ -60,7 +60,13 @@ func allocPromoter(t *testing.T, ctx context.Context, WriteRequest: structs.WriteRequest{Region: "global"}, } var resp structs.NodeAllocsResponse - require.Nil(t, msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", req, &resp)) + if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", req, &resp); err != nil { + if ctx.Err() == context.Canceled { + return + } else { + require.Nil(t, err) + } + } } } From 4efbc349a408fe280ded52a775f321e971db72c2 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 10:54:55 -0700 Subject: [PATCH 64/79] rpcapi: remove; unused --- testutil/rpcapi/rcpapi.go | 160 -------------------------------------- 1 file changed, 160 deletions(-) delete mode 100644 testutil/rpcapi/rcpapi.go diff --git a/testutil/rpcapi/rcpapi.go b/testutil/rpcapi/rcpapi.go deleted file mode 100644 index 1eafabccbdb3..000000000000 --- a/testutil/rpcapi/rcpapi.go +++ /dev/null @@ -1,160 +0,0 @@ -package rpcapi - -import ( - "net/rpc" - - msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" - "github.com/hashicorp/nomad/nomad/structs" -) - -type RPC struct { - Region string - Namespace string - codec rpc.ClientCodec -} - -func NewRPC(codec rpc.ClientCodec) *RPC { - return &RPC{ - Region: "global", - Namespace: structs.DefaultNamespace, - codec: codec, - } -} - -// AllocAll calls Alloc.List + Alloc.GetAllocs to return all allocs. -func (r *RPC) AllocAll() ([]*structs.Allocation, error) { - listResp, err := r.AllocList() - if err != nil { - return nil, err - } - - ids := make([]string, 0, len(listResp.Allocations)) - for _, a := range listResp.Allocations { - ids = append(ids, a.ID) - } - - allocsResp, err := r.AllocGetAllocs(ids) - if err != nil { - return nil, err - } - return allocsResp.Allocs, nil -} - -// Alloc.List RPC -func (r *RPC) AllocList() (*structs.AllocListResponse, error) { - get := &structs.AllocListRequest{ - QueryOptions: structs.QueryOptions{ - Region: r.Region, - Namespace: r.Namespace, - }, - } - - var resp structs.AllocListResponse - if err := msgpackrpc.CallWithCodec(r.codec, "Alloc.List", get, &resp); err != nil { - return nil, err - } - return &resp, nil -} - -// Alloc.GetAllocs RPC -func (r *RPC) AllocGetAllocs(ids []string) (*structs.AllocsGetResponse, error) { - get := &structs.AllocsGetRequest{ - AllocIDs: ids, - QueryOptions: structs.QueryOptions{ - Region: r.Region, - Namespace: r.Namespace, - }, - } - var resp structs.AllocsGetResponse - if err := msgpackrpc.CallWithCodec(r.codec, "Alloc.GetAllocs", get, &resp); err != nil { - return nil, err - } - return &resp, nil -} - -// Eval.List RPC -func (r *RPC) EvalList() (*structs.EvalListResponse, error) { - get := &structs.EvalListRequest{ - QueryOptions: structs.QueryOptions{ - Region: r.Region, - Namespace: r.Namespace, - }, - } - var resp structs.EvalListResponse - if err := msgpackrpc.CallWithCodec(r.codec, "Eval.List", get, &resp); err != nil { - return nil, err - } - return &resp, nil -} - -// Job.List RPC -func (r *RPC) JobList() (*structs.JobListResponse, error) { - get := &structs.JobListRequest{ - QueryOptions: structs.QueryOptions{ - Region: r.Region, - Namespace: r.Namespace, - }, - } - - var resp structs.JobListResponse - if err := msgpackrpc.CallWithCodec(r.codec, "Job.List", get, &resp); err != nil { - return nil, err - } - return &resp, nil -} - -// Job.Register RPC -func (r *RPC) JobRegister(j *structs.Job) (*structs.JobRegisterResponse, error) { - req := &structs.JobRegisterRequest{ - Job: j.Copy(), - WriteRequest: structs.WriteRequest{ - Region: r.Region, - Namespace: j.Namespace, - }, - } - - // Fetch the response - var resp structs.JobRegisterResponse - if err := msgpackrpc.CallWithCodec(r.codec, "Job.Register", req, &resp); err != nil { - return nil, err - } - return &resp, nil -} - -// Node.List RPC -func (r *RPC) NodeList() (*structs.NodeListResponse, error) { - get := &structs.NodeListRequest{ - QueryOptions: structs.QueryOptions{Region: r.Region}, - } - var resp structs.NodeListResponse - if err := msgpackrpc.CallWithCodec(r.codec, "Node.List", get, &resp); err != nil { - return nil, err - } - return &resp, nil -} - -// Node.GetAllocs RPC -func (r *RPC) NodeGetAllocs(nodeID string) (*structs.NodeAllocsResponse, error) { - get := &structs.NodeSpecificRequest{ - NodeID: nodeID, - QueryOptions: structs.QueryOptions{Region: r.Region}, - } - var resp structs.NodeAllocsResponse - if err := msgpackrpc.CallWithCodec(r.codec, "Node.GetAllocs", get, &resp); err != nil { - return nil, err - } - return &resp, nil -} - -// Node.GetNode RPC -func (r *RPC) NodeGet(nodeID string) (*structs.SingleNodeResponse, error) { - get := &structs.NodeSpecificRequest{ - NodeID: nodeID, - QueryOptions: structs.QueryOptions{Region: r.Region}, - } - var resp structs.SingleNodeResponse - if err := msgpackrpc.CallWithCodec(r.codec, "Node.GetNode", get, &resp); err != nil { - return nil, err - } - return &resp, nil -} From aab1fb76721a3f6ed11fe72b655d351e4f76f518 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 11:31:55 -0700 Subject: [PATCH 65/79] Fix linting errors --- command/agent/node_endpoint_test.go | 2 +- nomad/drainer/watch_jobs_test.go | 1 - nomad/fsm_test.go | 2 +- nomad/state/state_store_test.go | 4 ++-- scheduler/reconcile.go | 1 - scheduler/reconcile_util_test.go | 6 +++--- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/command/agent/node_endpoint_test.go b/command/agent/node_endpoint_test.go index 19ff6e64cc1e..6b3d96c44ed4 100644 --- a/command/agent/node_endpoint_test.go +++ b/command/agent/node_endpoint_test.go @@ -292,7 +292,7 @@ func TestHTTP_NodeDrain(t *testing.T) { respW = httptest.NewRecorder() // Make the request - obj, err = s.Server.NodeSpecificRequest(respW, req) + _, err = s.Server.NodeSpecificRequest(respW, req) require.Nil(err) out, err = state.NodeByID(nil, node.ID) diff --git a/nomad/drainer/watch_jobs_test.go b/nomad/drainer/watch_jobs_test.go index 3a73938d713b..32d97d1040ac 100644 --- a/nomad/drainer/watch_jobs_test.go +++ b/nomad/drainer/watch_jobs_test.go @@ -289,7 +289,6 @@ func TestDrainingJobWatcher_DrainJobs(t *testing.T) { } } require.Nil(state.UpsertAllocs(index, replacements)) - index++ // No jobs should be left! require.Empty(jobWatcher.drainingJobs()) diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 1a7b08a4bd60..ed8cf2df5944 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -300,7 +300,7 @@ func TestFSM_BatchUpdateNodeDrain(t *testing.T) { } req2 := structs.BatchNodeUpdateDrainRequest{ Updates: map[string]*structs.DrainUpdate{ - node.ID: &structs.DrainUpdate{ + node.ID: { DrainStrategy: strategy, }, }, diff --git a/nomad/state/state_store_test.go b/nomad/state/state_store_test.go index 20ebbe88fcd2..9f13dd10bf36 100644 --- a/nomad/state/state_store_test.go +++ b/nomad/state/state_store_test.go @@ -718,10 +718,10 @@ func TestStateStore_BatchUpdateNodeDrain(t *testing.T) { } update := map[string]*structs.DrainUpdate{ - n1.ID: &structs.DrainUpdate{ + n1.ID: { DrainStrategy: expectedDrain, }, - n2.ID: &structs.DrainUpdate{ + n2.ID: { DrainStrategy: expectedDrain, }, } diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index a4e1d1c06d3f..b7b936defdca 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -436,7 +436,6 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool { if deploymentPlaceReady { // Do all destructive updates min := helper.IntMin(len(destructive), limit) - limit -= min desiredChanges.DestructiveUpdate += uint64(min) desiredChanges.Ignore += uint64(len(destructive) - min) for _, alloc := range destructive.nameOrder()[:min] { diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go index 6d85dfb811ed..6905b26fbbd9 100644 --- a/scheduler/reconcile_util_test.go +++ b/scheduler/reconcile_util_test.go @@ -36,16 +36,16 @@ func TestAllocSet_filterByTainted(t *testing.T) { require := require.New(t) nodes := map[string]*structs.Node{ - "draining": &structs.Node{ + "draining": { ID: "draining", Drain: true, }, - "lost": &structs.Node{ + "lost": { ID: "lost", Status: structs.NodeStatusDown, }, "nil": nil, - "normal": &structs.Node{ + "normal": { ID: "normal", Status: structs.NodeStatusReady, }, From 2bb18741b0fb997304b740f59bd609ab4c00154a Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 12:11:08 -0700 Subject: [PATCH 66/79] api: fix tests to expect default migrate strategy --- api/jobs_test.go | 5 +++++ api/nodes_test.go | 21 ++++++--------------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/api/jobs_test.go b/api/jobs_test.go index edf045a3cde1..194470494b84 100644 --- a/api/jobs_test.go +++ b/api/jobs_test.go @@ -141,6 +141,7 @@ func TestJobs_Canonicalize(t *testing.T) { MaxDelay: helper.TimeToPtr(1 * time.Hour), Unlimited: helper.BoolToPtr(true), }, + Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ { KillTimeout: helper.TimeToPtr(5 * time.Second), @@ -211,6 +212,7 @@ func TestJobs_Canonicalize(t *testing.T) { MaxDelay: helper.TimeToPtr(1 * time.Hour), Unlimited: helper.BoolToPtr(true), }, + Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ { Name: "task1", @@ -363,6 +365,7 @@ func TestJobs_Canonicalize(t *testing.T) { AutoRevert: helper.BoolToPtr(false), Canary: helper.IntToPtr(0), }, + Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ { Name: "redis", @@ -576,6 +579,7 @@ func TestJobs_Canonicalize(t *testing.T) { AutoRevert: helper.BoolToPtr(true), Canary: helper.IntToPtr(1), }, + Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ { Name: "task1", @@ -616,6 +620,7 @@ func TestJobs_Canonicalize(t *testing.T) { AutoRevert: helper.BoolToPtr(false), Canary: helper.IntToPtr(0), }, + Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ { Name: "task1", diff --git a/api/nodes_test.go b/api/nodes_test.go index d2b02b82c243..4945b3f99c76 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -142,6 +142,7 @@ func TestNodes_Info(t *testing.T) { func TestNodes_ToggleDrain(t *testing.T) { t.Parallel() + require := require.New(t) c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) @@ -166,9 +167,7 @@ func TestNodes_ToggleDrain(t *testing.T) { // Check for drain mode out, _, err := nodes.Info(nodeID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + require.Nil(err) if out.Drain { t.Fatalf("drain mode should be off") } @@ -178,32 +177,24 @@ func TestNodes_ToggleDrain(t *testing.T) { Deadline: 10 * time.Second, } wm, err := nodes.UpdateDrain(nodeID, spec, false, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + require.Nil(err) assertWriteMeta(t, wm) // Check again out, _, err = nodes.Info(nodeID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + require.Nil(err) if out.SchedulingEligibility != structs.NodeSchedulingIneligible { t.Fatalf("bad eligibility: %v vs %v", out.SchedulingEligibility, structs.NodeSchedulingIneligible) } // Toggle off again wm, err = nodes.UpdateDrain(nodeID, nil, true, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + require.Nil(err) assertWriteMeta(t, wm) // Check again out, _, err = nodes.Info(nodeID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + require.Nil(err) if out.Drain { t.Fatalf("drain mode should be off") } From 1537061ebc4594abec57d7c7f036b649ef422ea3 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 14:15:35 -0700 Subject: [PATCH 67/79] alloc_runner: watch health for deployed batch jobs --- client/alloc_runner_health_watcher.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/client/alloc_runner_health_watcher.go b/client/alloc_runner_health_watcher.go index bdb7eaa82261..93d2553324b9 100644 --- a/client/alloc_runner_health_watcher.go +++ b/client/alloc_runner_health_watcher.go @@ -31,18 +31,25 @@ func (r *AllocRunner) watchHealth(ctx context.Context) { // See if we should watch the allocs health alloc := r.Alloc() - if alloc.Job.Type != structs.JobTypeService { - // No need to watch non-service jos + if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { + // No need to watch health as it's already set return } - if alloc.DeploymentStatus.IsHealthy() || alloc.DeploymentStatus.IsUnhealthy() { - // No need to watch health as it's already set + // Neither deployments nor migrations care about system jobs so never + // watch their health + if alloc.Job.Type == structs.JobTypeSystem { return } isDeploy := alloc.DeploymentID != "" + // Migrations don't consider the health of batch jobs so only watch + // batch health during deployments + if !isDeploy && alloc.Job.Type == structs.JobTypeBatch { + return + } + tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) if tg == nil { r.logger.Printf("[ERR] client.alloc_watcher: failed to lookup allocation's task group. Exiting watcher") From 9b88749ced575a061afbcd4932d19469f5466194 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 14:49:17 -0700 Subject: [PATCH 68/79] mock: add BatchJob() helper --- nomad/mock/mock.go | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index aef9c475f011..fc12adbb1618 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -169,6 +169,72 @@ func Job() *structs.Job { return job } +func BatchJob() *structs.Job { + job := &structs.Job{ + Region: "global", + ID: uuid.Generate(), + Name: "batch-job", + Namespace: structs.DefaultNamespace, + Type: structs.JobTypeBatch, + Priority: 50, + AllAtOnce: false, + Datacenters: []string{"dc1"}, + TaskGroups: []*structs.TaskGroup{ + { + Name: "worker", + Count: 10, + EphemeralDisk: &structs.EphemeralDisk{ + SizeMB: 150, + }, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + Mode: structs.RestartPolicyModeDelay, + }, + ReschedulePolicy: &structs.ReschedulePolicy{ + Attempts: 2, + Interval: 10 * time.Minute, + Delay: 5 * time.Second, + DelayFunction: "linear", + }, + Tasks: []*structs.Task{ + { + Name: "worker", + Driver: "mock_driver", + Config: map[string]interface{}{ + "run_for": "500ms", + }, + Env: map[string]string{ + "FOO": "bar", + }, + LogConfig: structs.DefaultLogConfig(), + Resources: &structs.Resources{ + CPU: 100, + MemoryMB: 100, + Networks: []*structs.NetworkResource{ + { + MBits: 50, + }, + }, + }, + Meta: map[string]string{ + "foo": "bar", + }, + }, + }, + }, + }, + Status: structs.JobStatusPending, + Version: 0, + CreateIndex: 43, + ModifyIndex: 99, + JobModifyIndex: 99, + } + job.Canonicalize() + return job +} + func SystemJob() *structs.Job { job := &structs.Job{ Region: "global", From 17161ec5f9c9f8230ada7b92e80c3e57a29bfa06 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 14:49:29 -0700 Subject: [PATCH 69/79] tests: use mock.BatchJob to fix tests --- nomad/job_endpoint_test.go | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 7d0d0e770831..9182cc872754 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -421,8 +421,7 @@ func TestJobEndpoint_Register_ParameterizedJob(t *testing.T) { testutil.WaitForLeader(t, s1.RPC) // Create the register request for a parameterized job. - job := mock.Job() - job.Type = structs.JobTypeBatch + job := mock.BatchJob() job.ParameterizedJob = &structs.ParameterizedJobConfig{} req := &structs.JobRegisterRequest{ Job: job, @@ -1423,8 +1422,7 @@ func TestJobEndpoint_Evaluate_ParameterizedJob(t *testing.T) { testutil.WaitForLeader(t, s1.RPC) // Create the register request - job := mock.Job() - job.Type = structs.JobTypeBatch + job := mock.BatchJob() job.ParameterizedJob = &structs.ParameterizedJobConfig{} req := &structs.JobRegisterRequest{ Job: job, @@ -1751,8 +1749,7 @@ func TestJobEndpoint_Deregister_ParameterizedJob(t *testing.T) { testutil.WaitForLeader(t, s1.RPC) // Create the register request - job := mock.Job() - job.Type = structs.JobTypeBatch + job := mock.BatchJob() job.ParameterizedJob = &structs.ParameterizedJobConfig{} reg := &structs.JobRegisterRequest{ Job: job, @@ -3958,8 +3955,7 @@ func TestJobEndpoint_Dispatch_ACL(t *testing.T) { state := s1.fsm.State() // Create a parameterized job - job := mock.Job() - job.Type = structs.JobTypeBatch + job := mock.BatchJob() job.ParameterizedJob = &structs.ParameterizedJobConfig{} err := state.UpsertJob(400, job) require.Nil(err) @@ -4027,34 +4023,29 @@ func TestJobEndpoint_Dispatch(t *testing.T) { t.Parallel() // No requirements - d1 := mock.Job() - d1.Type = structs.JobTypeBatch + d1 := mock.BatchJob() d1.ParameterizedJob = &structs.ParameterizedJobConfig{} // Require input data - d2 := mock.Job() - d2.Type = structs.JobTypeBatch + d2 := mock.BatchJob() d2.ParameterizedJob = &structs.ParameterizedJobConfig{ Payload: structs.DispatchPayloadRequired, } // Disallow input data - d3 := mock.Job() - d3.Type = structs.JobTypeBatch + d3 := mock.BatchJob() d3.ParameterizedJob = &structs.ParameterizedJobConfig{ Payload: structs.DispatchPayloadForbidden, } // Require meta - d4 := mock.Job() - d4.Type = structs.JobTypeBatch + d4 := mock.BatchJob() d4.ParameterizedJob = &structs.ParameterizedJobConfig{ MetaRequired: []string{"foo", "bar"}, } // Optional meta - d5 := mock.Job() - d5.Type = structs.JobTypeBatch + d5 := mock.BatchJob() d5.ParameterizedJob = &structs.ParameterizedJobConfig{ MetaOptional: []string{"foo", "bar"}, } @@ -4063,8 +4054,7 @@ func TestJobEndpoint_Dispatch(t *testing.T) { d6 := mock.PeriodicJob() d6.ParameterizedJob = &structs.ParameterizedJobConfig{} - d7 := mock.Job() - d7.Type = structs.JobTypeBatch + d7 := mock.BatchJob() d7.ParameterizedJob = &structs.ParameterizedJobConfig{} d7.Stop = true From 80885623c1df75160a7b1c3a6e7283708da0a96a Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 15:19:47 -0700 Subject: [PATCH 70/79] test: don't call t.Fatal from within a goroutine --- command/agent/fs_endpoint_test.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/command/agent/fs_endpoint_test.go b/command/agent/fs_endpoint_test.go index f59bbd953b0f..9be39497ce9d 100644 --- a/command/agent/fs_endpoint_test.go +++ b/command/agent/fs_endpoint_test.go @@ -437,11 +437,10 @@ func TestHTTP_FS_Logs_Follow(t *testing.T) { req, err := http.NewRequest("GET", path, p) require.Nil(err) respW := httptest.NewRecorder() - doneCh := make(chan struct{}) + errCh := make(chan error) go func() { - _, err = s.Server.Logs(respW, req) - require.Nil(err) - close(doneCh) + _, err := s.Server.Logs(respW, req) + errCh <- err }() out := "" @@ -458,8 +457,8 @@ func TestHTTP_FS_Logs_Follow(t *testing.T) { }) select { - case <-doneCh: - t.Fatal("shouldn't close") + case err := <-errCh: + t.Fatalf("shouldn't exit: %v", err) case <-time.After(1 * time.Second): } From 50a94d73c9ae27d9c2a5efe19ebbf47f93d5c710 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 15:28:18 -0700 Subject: [PATCH 71/79] test: try to prevent flakiness on travis --- client/alloc_runner_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index 0ade0ba39dba..b2927f86eb9f 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -168,7 +168,7 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Deadline(t *testing.T) { // Make the task block task := ar.alloc.Job.TaskGroups[0].Tasks[0] task.Driver = "mock_driver" - task.Config["start_block_for"] = "2s" + task.Config["start_block_for"] = "4s" task.Config["run_for"] = "10s" // Make the alloc be part of a deployment From b8b1922b9ba8a6cf4d99d456560d2e4f589f6d75 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 15:28:38 -0700 Subject: [PATCH 72/79] test: fix by using mock.BatchJob --- command/agent/job_endpoint_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index 57b5d1869d24..3e730a96957d 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -942,8 +942,7 @@ func TestHTTP_JobDispatch(t *testing.T) { t.Parallel() httpTest(t, nil, func(s *TestAgent) { // Create the parameterized job - job := mock.Job() - job.Type = "batch" + job := mock.BatchJob() job.ParameterizedJob = &structs.ParameterizedJobConfig{} args := structs.JobRegisterRequest{ From e8673b14ef5e2a64be46f68d547c5109387248f9 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 15:42:04 -0700 Subject: [PATCH 73/79] test: disable drain during fsm test drainer was unsetting drain before fsm could read written value --- nomad/node_endpoint_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 0a18f937cb17..26888d830541 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -753,6 +753,9 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { codec := rpcClient(t, s1) testutil.WaitForLeader(t, s1.RPC) + // Disable drainer to prevent drain from completing during test + s1.nodeDrainer.SetEnabled(false, nil) + // Create the register request node := mock.Node() reg := &structs.NodeRegisterRequest{ @@ -764,6 +767,7 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { var resp structs.NodeUpdateResponse require.Nil(msgpackrpc.CallWithCodec(codec, "Node.Register", reg, &resp)) + beforeUpdate := time.Now() strategy := &structs.DrainStrategy{ DrainSpec: structs.DrainSpec{ Deadline: 10 * time.Second, @@ -786,7 +790,11 @@ func TestClientEndpoint_UpdateDrain(t *testing.T) { out, err := state.NodeByID(ws, node.ID) require.Nil(err) require.True(out.Drain) - require.Equal(strategy, out.DrainStrategy) + require.Equal(strategy.Deadline, out.DrainStrategy.Deadline) + // before+deadline should be before the forced deadline + require.True(beforeUpdate.Add(strategy.Deadline).Before(out.DrainStrategy.ForceDeadline)) + // now+deadline should be after the forced deadline + require.True(time.Now().Add(strategy.Deadline).After(out.DrainStrategy.ForceDeadline)) } func TestClientEndpoint_UpdateDrain_ACL(t *testing.T) { From 636693830fda3f4b08a87f079748475eb6c85d35 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 15:51:58 -0700 Subject: [PATCH 74/79] test: disable node drainer during tests Node drainer would throw off the index checks --- nomad/node_endpoint_test.go | 39 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index 26888d830541..4c278766cbc7 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -2439,15 +2439,18 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { codec := rpcClient(t, s1) testutil.WaitForLeader(t, s1.RPC) + // Disable drainer to prevent drain from completing during test + s1.nodeDrainer.SetEnabled(false, nil) + // Create the node node := mock.Node() // Node upsert triggers watches - time.AfterFunc(100*time.Millisecond, func() { - if err := state.UpsertNode(2, node); err != nil { - t.Fatalf("err: %v", err) - } + errCh := make(chan error, 1) + timer := time.AfterFunc(100*time.Millisecond, func() { + errCh <- state.UpsertNode(2, node) }) + defer timer.Stop() req := &structs.NodeListRequest{ QueryOptions: structs.QueryOptions{ @@ -2461,6 +2464,10 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { t.Fatalf("err: %v", err) } + if err := <-errCh; err != nil { + t.Fatalf("error from timer: %v", err) + } + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { t.Fatalf("should block (returned in %s) %#v", elapsed, resp) } @@ -2478,9 +2485,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { Deadline: 10 * time.Second, }, } - if err := state.UpdateNodeDrain(3, node.ID, s, false); err != nil { - t.Fatalf("err: %v", err) - } + errCh <- state.UpdateNodeDrain(3, node.ID, s, false) }) req.MinQueryIndex = 2 @@ -2490,6 +2495,10 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { t.Fatalf("err: %v", err) } + if err := <-errCh; err != nil { + t.Fatalf("error from timer: %v", err) + } + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { t.Fatalf("should block (returned in %s) %#v", elapsed, resp2) } @@ -2502,9 +2511,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { // Node status update triggers watches time.AfterFunc(100*time.Millisecond, func() { - if err := state.UpdateNodeStatus(40, node.ID, structs.NodeStatusDown); err != nil { - t.Fatalf("err: %v", err) - } + errCh <- state.UpdateNodeStatus(40, node.ID, structs.NodeStatusDown) }) req.MinQueryIndex = 38 @@ -2514,6 +2521,10 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { t.Fatalf("err: %v", err) } + if err := <-errCh; err != nil { + t.Fatalf("error from timer: %v", err) + } + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { t.Fatalf("should block (returned in %s) %#v", elapsed, resp3) } @@ -2526,9 +2537,7 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { // Node delete triggers watches. time.AfterFunc(100*time.Millisecond, func() { - if err := state.DeleteNode(50, node.ID); err != nil { - t.Fatalf("err: %v", err) - } + errCh <- state.DeleteNode(50, node.ID) }) req.MinQueryIndex = 45 @@ -2538,6 +2547,10 @@ func TestClientEndpoint_ListNodes_Blocking(t *testing.T) { t.Fatalf("err: %v", err) } + if err := <-errCh; err != nil { + t.Fatalf("error from timer: %v", err) + } + if elapsed := time.Since(start); elapsed < 100*time.Millisecond { t.Fatalf("should block (returned in %s) %#v", elapsed, resp4) } From ec09ea61be811290afa83126b399bb5dd4f34ead Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 16:27:24 -0700 Subject: [PATCH 75/79] test: must initialize jobResults with new func --- nomad/drainer/watch_jobs_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nomad/drainer/watch_jobs_test.go b/nomad/drainer/watch_jobs_test.go index 32d97d1040ac..be90ed13d42f 100644 --- a/nomad/drainer/watch_jobs_test.go +++ b/nomad/drainer/watch_jobs_test.go @@ -604,13 +604,13 @@ func TestHandleTaskGroup_Migrations(t *testing.T) { require.Nil(err) // Handle before and after indexes - res := &jobResult{} + res := newJobResult() require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 101, res)) require.Empty(res.drain) require.Len(res.migrated, 10) require.True(res.done) - res = &jobResult{} + res = newJobResult() require.Nil(handleTaskGroup(snap, job.TaskGroups[0], allocs, 103, res)) require.Empty(res.drain) require.Empty(res.migrated) From b58a22c2e9cadbabe061904e7c1e1c00fd88db46 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 20 Mar 2018 17:25:28 -0700 Subject: [PATCH 76/79] remove spurious TODOs and FIXMEs --- client/alloc_runner_health_watcher.go | 13 +++++-------- nomad/drainer/drain_heap.go | 3 --- nomad/structs/structs.go | 1 - 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/client/alloc_runner_health_watcher.go b/client/alloc_runner_health_watcher.go index 93d2553324b9..b57f9c46e94f 100644 --- a/client/alloc_runner_health_watcher.go +++ b/client/alloc_runner_health_watcher.go @@ -196,12 +196,11 @@ func newAllocHealthTracker(parentCtx context.Context, logger *log.Logger, alloc minHealthyTime time.Duration, useChecks bool) *allocHealthTracker { a := &allocHealthTracker{ - logger: logger, - healthy: make(chan bool, 1), - allocStopped: make(chan struct{}), - alloc: alloc, - tg: alloc.Job.LookupTaskGroup(alloc.TaskGroup), - //FIXME should i wrap all these parameters up in a struct? + logger: logger, + healthy: make(chan bool, 1), + allocStopped: make(chan struct{}), + alloc: alloc, + tg: alloc.Job.LookupTaskGroup(alloc.TaskGroup), minHealthyTime: minHealthyTime, useChecks: useChecks, allocUpdates: allocUpdates, @@ -260,7 +259,6 @@ func (a *allocHealthTracker) TaskEvents() map[string]string { // Go through are task information and build the event map for task, state := range a.taskHealth { - //FIXME skip this for migrations? useChecks := a.tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks if e, ok := state.event(deadline, a.tg.Update.MinHealthyTime, useChecks); ok { events[task] = e @@ -542,7 +540,6 @@ func (t *taskHealthState) event(deadline time.Time, minHealthyTime time.Duration } // We are running so check if we have been running long enough - //FIXME need minHealthyTime here if t.state.StartedAt.Add(minHealthyTime).After(deadline) { return fmt.Sprintf("Task not running for min_healthy_time of %v by deadline", minHealthyTime), true } diff --git a/nomad/drainer/drain_heap.go b/nomad/drainer/drain_heap.go index 1642b0fdb330..2d0a1506e052 100644 --- a/nomad/drainer/drain_heap.go +++ b/nomad/drainer/drain_heap.go @@ -20,9 +20,6 @@ type DrainDeadlineNotifier interface { Watch(nodeID string, deadline time.Time) } -// TODO Make any of what I just wrote true :) Initially it is just a simple -// implementation. - // deadlineHeap implements the DrainDeadlineNotifier and is backed by a min-heap // to efficiently determine the next deadlining node. It also supports // coalescing several deadlines into a single emission. diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 307cce7faf0c..b96eef738825 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1193,7 +1193,6 @@ const ( // ShouldDrainNode checks if a given node status should trigger an // evaluation. Some states don't require any further action. -//TODO(schmichael) Update for drainv2?! func ShouldDrainNode(status string) bool { switch status { case NodeStatusInit, NodeStatusReady: From 07fe87918ad8a3f8a79b87ee60fee07804cfae77 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 21 Mar 2018 10:13:26 -0700 Subject: [PATCH 77/79] test: index no longer guaranteed on job list Also switch to require and add t.Helper to appropriate funcs. --- api/jobs_test.go | 27 ++++++++++----------------- api/util_test.go | 2 ++ 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/api/jobs_test.go b/api/jobs_test.go index 194470494b84..f14bd7b49fd8 100644 --- a/api/jobs_test.go +++ b/api/jobs_test.go @@ -12,41 +12,34 @@ import ( "github.com/hashicorp/nomad/testutil" "github.com/kr/pretty" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestJobs_Register(t *testing.T) { t.Parallel() + require := require.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() // Listing jobs before registering returns nothing resp, qm, err := jobs.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } + require.Nil(err) assertQueryMeta(t, qm) - if n := len(resp); n != 0 { - t.Fatalf("expected 0 jobs, got: %d", n) - } + require.Emptyf(resp, "expected 0 jobs, got: %d", len(resp)) // Create a job and attempt to register it job := testJob() resp2, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if resp2 == nil || resp2.EvalID == "" { - t.Fatalf("missing eval id") - } + require.Nil(err) + require.NotNil(resp2) + require.NotEmpty(resp2.EvalID) assertWriteMeta(t, wm) // Query the jobs back out again - resp, qm, err = jobs.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } - assertQueryMeta(t, qm) + resp, _, err = jobs.List(nil) + require.Nil(err) // Check that we got the expected response if len(resp) != 1 || resp[0].ID != *job.ID { diff --git a/api/util_test.go b/api/util_test.go index 9aceee0bfdad..c6f99018e4ce 100644 --- a/api/util_test.go +++ b/api/util_test.go @@ -7,6 +7,7 @@ import ( ) func assertQueryMeta(t *testing.T, qm *QueryMeta) { + t.Helper() if qm.LastIndex == 0 { t.Fatalf("bad index: %d", qm.LastIndex) } @@ -16,6 +17,7 @@ func assertQueryMeta(t *testing.T, qm *QueryMeta) { } func assertWriteMeta(t *testing.T, wm *WriteMeta) { + t.Helper() if wm.LastIndex == 0 { t.Fatalf("bad index: %d", wm.LastIndex) } From 3496bcf76670b1fdae3ce543841af0cecc9fa354 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 21 Mar 2018 10:41:06 -0700 Subject: [PATCH 78/79] docs: improve DrainRequest.MarkEligible comment --- api/nodes.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/nodes.go b/api/nodes.go index d625629fb5e3..76e25594f775 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -52,7 +52,8 @@ type NodeUpdateDrainRequest struct { // will disable draining. DrainSpec *DrainSpec - // MarkEligible marks the node as eligible if removing the drain strategy. + // MarkEligible marks the node as eligible for scheduling if removing + // the drain strategy. MarkEligible bool } From e10883ca2bc38ad49a1f008ee59dbf040a7986e4 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 21 Mar 2018 10:44:17 -0700 Subject: [PATCH 79/79] eligbile -> eligible --- nomad/structs/structs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index b96eef738825..5f455e58d3e9 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -1218,7 +1218,7 @@ const ( // NodeSchedulingEligible and Ineligible marks the node as eligible or not, // respectively, for receiving allocations. This is orthoginal to the node // status being ready. - NodeSchedulingEligible = "eligbile" + NodeSchedulingEligible = "eligible" NodeSchedulingIneligible = "ineligible" )