diff --git a/CHANGELOG.md b/CHANGELOG.md index 64db8227437b..812e2bee3859 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ IMPROVEMENTS: isolate canary instances during deployments [[GH-4259](https://github.com/hashicorp/nomad/issues/4259)] * core: Emit Node events for drain and eligibility operations as well as for missed heartbeats [[GH-4284](https://github.com/hashicorp/nomad/issues/4284)], [[GH-4291](https://github.com/hashicorp/nomad/issues/4291)], [[GH-4292](https://github.com/hashicorp/nomad/issues/4292)] + * cli: Add node drain monitoring with new `-monitor` flag on node drain + command [[GH-4260](https://github.com/hashicorp/nomad/issues/4260)] * cli: Add node drain details to node status [[GH-4247](https://github.com/hashicorp/nomad/issues/4247)] * client: Avoid splitting log line across two files [[GH-4282](https://github.com/hashicorp/nomad/issues/4282)] * command: Add -short option to init command that emits a minimal diff --git a/api/nodes.go b/api/nodes.go index cc1f4452b496..59d8a9f728de 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -193,6 +193,7 @@ func (n *Nodes) monitorDrainNode(ctx context.Context, nodeID string, index uint6 defer close(nodeCh) var lastStrategy *DrainStrategy + var strategyChanged bool q := QueryOptions{ AllowStale: true, WaitIndex: index, @@ -209,7 +210,12 @@ func (n *Nodes) monitorDrainNode(ctx context.Context, nodeID string, index uint6 } if node.DrainStrategy == nil { - msg := Messagef(MonitorMsgLevelInfo, "Node %q has marked all allocations for migration", nodeID) + var msg *MonitorMessage + if strategyChanged { + msg = Messagef(MonitorMsgLevelInfo, "Node %q has marked all allocations for migration", nodeID) + } else { + msg = Messagef(MonitorMsgLevelInfo, "No drain strategy set for node %s", nodeID) + } select { case nodeCh <- msg: case <-ctx.Done(): @@ -236,6 +242,7 @@ func (n *Nodes) monitorDrainNode(ctx context.Context, nodeID string, index uint6 } lastStrategy = node.DrainStrategy + strategyChanged = true // Drain still ongoing, update index and block for updates q.WaitIndex = meta.LastIndex diff --git a/command/node_drain.go b/command/node_drain.go index 198feb233ca2..040b16ee19fa 100644 --- a/command/node_drain.go +++ b/command/node_drain.go @@ -49,6 +49,9 @@ Node Drain Options: -detach Return immediately instead of entering monitor mode. + -monitor + Enter monitor mode directly without modifying the drain status. + -force Force remove allocations off the node immediately. @@ -113,7 +116,8 @@ func (c *NodeDrainCommand) Name() string { return "node-drain" } func (c *NodeDrainCommand) Run(args []string) int { var enable, disable, detach, force, - noDeadline, ignoreSystem, keepIneligible, self, autoYes bool + noDeadline, ignoreSystem, keepIneligible, + self, autoYes, monitor bool var deadline string flags := c.Meta.FlagSet(c.Name(), FlagSetClient) @@ -128,14 +132,22 @@ func (c *NodeDrainCommand) Run(args []string) int { flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility") flags.BoolVar(&self, "self", false, "") flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.") + flags.BoolVar(&monitor, "monitor", false, "Monitor drain status.") if err := flags.Parse(args); err != nil { return 1 } + // Check that enable or disable is not set with monitor + if monitor && (enable || disable) { + c.Ui.Error("The -monitor flag cannot be used with the '-enable' or '-disable' flags") + c.Ui.Error(commandErrorText(c)) + return 1 + } + // Check that we got either enable or disable, but not both. - if (enable && disable) || (!enable && !disable) { - c.Ui.Error("Ethier the '-enable' or '-disable' flag must be set") + if (enable && disable) || (!monitor && !enable && !disable) { + c.Ui.Error("Ethier the '-enable' or '-disable' flag must be set, unless using '-monitor'") c.Ui.Error(commandErrorText(c)) return 1 } @@ -236,6 +248,13 @@ func (c *NodeDrainCommand) Run(args []string) int { return 1 } + // If monitoring the drain start the montior and return when done + if monitor { + c.Ui.Info(fmt.Sprintf("%s: Monitoring node %q: Ctrl-C to detach monitoring", formatTime(time.Now()), node.ID)) + c.monitorDrain(client, context.Background(), node, 0, ignoreSystem) + return 0 + } + // Confirm drain if the node was a prefix match. if nodeID != node.ID && !autoYes { verb := "enable" @@ -290,20 +309,23 @@ func (c *NodeDrainCommand) Run(args []string) int { now := time.Now() c.Ui.Info(fmt.Sprintf("%s: Ctrl-C to stop monitoring: will not cancel the node drain", formatTime(now))) c.Ui.Output(fmt.Sprintf("%s: Node %q drain strategy set", formatTime(now), node.ID)) - outCh := client.Nodes().MonitorDrain(context.Background(), node.ID, meta.LastIndex, ignoreSystem) - for msg := range outCh { - switch msg.Level { - case api.MonitorMsgLevelInfo: - c.Ui.Info(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) - case api.MonitorMsgLevelWarn: - c.Ui.Warn(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) - case api.MonitorMsgLevelError: - c.Ui.Error(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) - default: - c.Ui.Output(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) - } - } + c.monitorDrain(client, context.Background(), node, meta.LastIndex, ignoreSystem) } - return 0 } + +func (c *NodeDrainCommand) monitorDrain(client *api.Client, ctx context.Context, node *api.Node, index uint64, ignoreSystem bool) { + outCh := client.Nodes().MonitorDrain(ctx, node.ID, index, ignoreSystem) + for msg := range outCh { + switch msg.Level { + case api.MonitorMsgLevelInfo: + c.Ui.Info(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) + case api.MonitorMsgLevelWarn: + c.Ui.Warn(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) + case api.MonitorMsgLevelError: + c.Ui.Error(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) + default: + c.Ui.Output(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg)) + } + } +} diff --git a/command/node_drain_test.go b/command/node_drain_test.go index 9ddc4482a232..33d714d1dcac 100644 --- a/command/node_drain_test.go +++ b/command/node_drain_test.go @@ -237,6 +237,19 @@ func TestNodeDrainCommand_Monitor(t *testing.T) { if !strings.HasSuffix(out, expected) { t.Fatalf("expected output to end with:\n%s", expected) } + + // Test -monitor flag + outBuf.Reset() + args = []string{"-address=" + url, "-self", "-monitor", "-ignore-system"} + t.Logf("Running: %v", args) + if code := cmd.Run(args); code != 0 { + t.Fatalf("expected exit 0, got: %d\n%s", code, outBuf.String()) + } + + out = outBuf.String() + t.Logf("Output:\n%s", out) + + require.Contains(out, "No drain strategy set") } func TestNodeDrainCommand_Fails(t *testing.T) { diff --git a/website/source/docs/commands/node/drain.html.md.erb b/website/source/docs/commands/node/drain.html.md.erb index d03ca3f0d6cf..1abbc4cfdcf9 100644 --- a/website/source/docs/commands/node/drain.html.md.erb +++ b/website/source/docs/commands/node/drain.html.md.erb @@ -57,6 +57,7 @@ operation is desired. node. Remaining allocations after the deadline are force removed from the node. Defaults to 1 hour. * `-detach`: Return immediately instead of entering monitor mode. +* `-monitor`: Enter monitor mode directly without modifying the drain status. * `-force`: Force remove allocations off the node immediately. * `-no-deadline`: No deadline allows the allocations to drain off the node without being force stopped after a certain deadline. @@ -114,6 +115,14 @@ $ nomad node drain -disable -keep-ineligible 4d2ba53b ... ``` +Enable drain mode and detach from monitoring, then reattach later: + +``` +$ nomad node drain -enable -detach -self +... +$ nomad node drain -self -monitor +... +``` [eligibility]: /docs/commands/node/eligibility.html [migrate]: /docs/job-specification/migrate.html