Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cli: add monitor flag to deployment status #10661

Merged
merged 2 commits into from
Jun 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions command/agent/testagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@ import (
"os"
"path/filepath"
"strings"
"testing"
"time"

testing "github.com/mitchellh/go-testing-interface"

metrics "github.com/armon/go-metrics"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/api"
Expand All @@ -39,7 +38,7 @@ var TempDir = os.TempDir()
// is removed after shutdown.
type TestAgent struct {
// T is the testing object
T testing.T
T testing.TB
tgross marked this conversation as resolved.
Show resolved Hide resolved

// Name is an optional name of the agent.
Name string
Expand Down Expand Up @@ -92,7 +91,7 @@ type TestAgent struct {
// NewTestAgent returns a started agent with the given name and
// configuration. The caller should call Shutdown() to stop the agent and
// remove temporary directories.
func NewTestAgent(t testing.T, name string, configCallback func(*Config)) *TestAgent {
func NewTestAgent(t testing.TB, name string, configCallback func(*Config)) *TestAgent {
a := &TestAgent{
T: t,
Name: name,
Expand Down
285 changes: 284 additions & 1 deletion command/deployment_status.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
package command

import (
"context"
"errors"
"fmt"
"os"
"runtime"
"sort"
"strings"
"time"

"github.com/docker/docker/pkg/term"
"github.com/gosuri/uilive"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/go-glint"
"github.com/mitchellh/go-glint/components"
"github.com/posener/complete"
)

Expand Down Expand Up @@ -37,6 +46,9 @@ Status Options:
-json
Output the deployment in its JSON format.

-monitor
Enter monitor mode to poll for updates to the deployment status.

-t
Format and display deployment using a Go template.
`
Expand All @@ -52,6 +64,7 @@ func (c *DeploymentStatusCommand) AutocompleteFlags() complete.Flags {
complete.Flags{
"-verbose": complete.PredictNothing,
"-json": complete.PredictNothing,
"-monitor": complete.PredictNothing,
"-t": complete.PredictAnything,
})
}
Expand All @@ -74,19 +87,26 @@ func (c *DeploymentStatusCommand) AutocompleteArgs() complete.Predictor {
func (c *DeploymentStatusCommand) Name() string { return "deployment status" }

func (c *DeploymentStatusCommand) Run(args []string) int {
var json, verbose bool
var json, verbose, monitor bool
var tmpl string

flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&verbose, "verbose", false, "")
flags.BoolVar(&json, "json", false, "")
flags.BoolVar(&monitor, "monitor", false, "")
flags.StringVar(&tmpl, "t", "", "")

if err := flags.Parse(args); err != nil {
return 1
}

// Check that json or tmpl isn't set with monitor
if monitor && (json || len(tmpl) > 0) {
c.Ui.Error("The monitor flag cannot be used with the '-json' or '-t' flags")
return 1
}

// Check that we got exactly one argument
args = flags.Args()
if l := len(args); l > 1 {
Expand Down Expand Up @@ -144,10 +164,263 @@ func (c *DeploymentStatusCommand) Run(args []string) int {
return 0
}

if monitor {
// Call just to get meta
_, meta, err := client.Deployments().Info(deploy.ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error retrieving deployment: %s", err))
}

c.Ui.Output(fmt.Sprintf("%s: Monitoring deployment %q",
formatTime(time.Now()), limit(deploy.ID, length)))
c.monitor(client, deploy.ID, meta.LastIndex, verbose)

return 0
}
c.Ui.Output(c.Colorize().Color(formatDeployment(client, deploy, length)))
return 0
}

func (c *DeploymentStatusCommand) monitor(client *api.Client, deployID string, index uint64, verbose bool) {
_, isStdoutTerminal := term.GetFdInfo(os.Stdout)
// TODO if/when glint offers full Windows support take out the runtime check
if isStdoutTerminal && runtime.GOOS != "windows" {
c.ttyMonitor(client, deployID, index, verbose)
} else {
c.defaultMonitor(client, deployID, index, verbose)
}
}

// Uses glint for printing in place. Same logic as the defaultMonitor function
// but only used for tty and non-Windows machines since glint doesn't work with
// cmd/PowerShell and non-interactive interfaces
// Margins are used to match the text alignment from job run
func (c *DeploymentStatusCommand) ttyMonitor(client *api.Client, deployID string, index uint64, verbose bool) {
var length int
if verbose {
length = fullId
} else {
length = shortId
}

d := glint.New()
spinner := glint.Layout(
components.Spinner(),
glint.Text(fmt.Sprintf(" Deployment %q in progress...", limit(deployID, length))),
).Row().MarginLeft(2)
refreshRate := 100 * time.Millisecond

d.SetRefreshRate(refreshRate)
d.Set(spinner)

ctx, cancel := context.WithCancel(context.Background())

go d.Render(ctx)
defer cancel()

q := api.QueryOptions{
AllowStale: true,
WaitIndex: index,
WaitTime: 2 * time.Second,
}

var statusComponent *glint.LayoutComponent
var endSpinner *glint.LayoutComponent

UPDATE:
for {
deploy, meta, err := client.Deployments().Info(deployID, &q)
if err != nil {
d.Append(glint.Style(
glint.Text(fmt.Sprintf("%s: Error fetching deployment", formatTime(time.Now()))),
glint.Color("red"),
))
d.RenderFrame()
return
}

status := deploy.Status
statusComponent = glint.Layout(
glint.Text(""),
glint.Text(formatTime(time.Now())),
// Use colorize to render bold text in formatDeployment function
glint.Text(c.Colorize().Color(formatDeployment(client, deploy, length))),
)

if verbose {
allocComponent := glint.Layout(glint.Style(
glint.Text("Allocations"),
glint.Bold(),
))

allocs, _, err := client.Deployments().Allocations(deployID, nil)
if err != nil {
allocComponent = glint.Layout(
allocComponent,
glint.Style(
glint.Text("Error fetching allocations"),
glint.Color("red"),
),
)
} else {
allocComponent = glint.Layout(
allocComponent,
glint.Text(formatAllocListStubs(allocs, verbose, length)),
)
}

statusComponent = glint.Layout(
statusComponent,
glint.Text(""),
allocComponent,
)
}

statusComponent = glint.Layout(statusComponent).MarginLeft(4)
d.Set(spinner, statusComponent)

endSpinner = glint.Layout(
components.Spinner(),
glint.Text(fmt.Sprintf(" Deployment %q %s", limit(deployID, length), status)),
).Row().MarginLeft(2)

switch status {
case structs.DeploymentStatusFailed:
if hasAutoRevert(deploy) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 This is a nice usability addition so that folks don't have to go rummaging around for it themselves.

// Separate rollback monitoring from failed deployment
d.Set(
endSpinner,
statusComponent,
glint.Layout(glint.Text("")),
)

// Wait for rollback to launch
time.Sleep(1 * time.Second)
rollback, _, err := client.Jobs().LatestDeployment(deploy.JobID, nil)

if err != nil {
d.Append(glint.Style(
glint.Text(fmt.Sprintf("%s: Error fetching rollback deployment", formatTime(time.Now()))),
glint.Color("red")),
)
d.RenderFrame()
return
}

// Check for noop/no target rollbacks
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We slept before calling LatestDeployment above, which I think will work most of the time.

But if we have a cluster that's maybe in a troubled state with slow scheduling, can that LatestDeployment call potentially get the same deployment that just failed even though there eventually will be a rollback deployment? I think it's probably okay to bail out in that situation just because we probably don't have a good way of determining that, but let's flesh out this comment a bit to make sure that's clear to developers in case we want to revisit that decision later.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I wasn't sure about sleeping since it didn't seem like the most robust way of getting the rollback but I couldn't think of anything better. Great suggestion on the comment though, will do

// TODO We may want to find a more robust way of waiting for rollbacks to launch instead of
// just sleeping for 1 sec. If scheduling is slow, this will break update here instead of
// waiting for the (eventual) rollback
if rollback.ID == deploy.ID {
break UPDATE
}

d.Close()
c.ttyMonitor(client, rollback.ID, index, verbose)
return
} else {
break UPDATE
}
case structs.DeploymentStatusSuccessful, structs.DeploymentStatusCancelled, structs.DeploymentStatusDescriptionBlocked:
break UPDATE
default:
q.WaitIndex = meta.LastIndex
continue
}
}
// Render one final time with completion message
d.Set(endSpinner, statusComponent)
d.RenderFrame()
}

// Used for Windows and non-tty
func (c *DeploymentStatusCommand) defaultMonitor(client *api.Client, deployID string, index uint64, verbose bool) {
writer := uilive.New()
writer.Start()
defer writer.Stop()

var length int
if verbose {
length = fullId
} else {
length = shortId
}

q := api.QueryOptions{
AllowStale: true,
WaitIndex: index,
WaitTime: 2 * time.Second,
}

for {
deploy, meta, err := client.Deployments().Info(deployID, &q)
if err != nil {
c.Ui.Error(c.Colorize().Color(fmt.Sprintf("%s: Error fetching deployment", formatTime(time.Now()))))
return
}

status := deploy.Status
info := formatTime(time.Now())
info += fmt.Sprintf("\n%s", formatDeployment(client, deploy, length))

if verbose {
info += "\n\n[bold]Allocations[reset]\n"
allocs, _, err := client.Deployments().Allocations(deployID, nil)
if err != nil {
info += "Error fetching allocations"
} else {
info += formatAllocListStubs(allocs, verbose, length)
}
}

// Add newline before output to avoid prefix indentation when called from job run
msg := c.Colorize().Color(fmt.Sprintf("\n%s", info))

// Print in place if tty
_, isStdoutTerminal := term.GetFdInfo(os.Stdout)
if isStdoutTerminal {
fmt.Fprint(writer, msg)
} else {
c.Ui.Output(msg)
}

switch status {
case structs.DeploymentStatusFailed:
if hasAutoRevert(deploy) {
// Wait for rollback to launch
time.Sleep(1 * time.Second)
rollback, _, err := client.Jobs().LatestDeployment(deploy.JobID, nil)

// Separate rollback monitoring from failed deployment
// Needs to be after time.Sleep or it messes up the formatting
c.Ui.Output("")
if err != nil {
c.Ui.Error(c.Colorize().Color(
fmt.Sprintf("%s: Error fetching deployment of previous job version", formatTime(time.Now())),
))
return
}

// Check for noop/no target rollbacks
// TODO We may want to find a more robust way of waiting for rollbacks to launch instead of
// just sleeping for 1 sec. If scheduling is slow, this will break update here instead of
// waiting for the (eventual) rollback
if rollback.ID == deploy.ID {
return
}
c.defaultMonitor(client, rollback.ID, index, verbose)
}
return

case structs.DeploymentStatusSuccessful, structs.DeploymentStatusCancelled, structs.DeploymentStatusDescriptionBlocked:
return
default:
q.WaitIndex = meta.LastIndex
continue
}
}
}

func getDeployment(client *api.Deployments, dID string) (match *api.Deployment, possible []*api.Deployment, err error) {
// First attempt an immediate lookup if we have a proper length
if len(dID) == 36 {
Expand Down Expand Up @@ -358,3 +631,13 @@ func formatDeploymentGroups(d *api.Deployment, uuidLength int) string {

return formatList(rows)
}

func hasAutoRevert(d *api.Deployment) bool {
taskGroups := d.TaskGroups
for _, state := range taskGroups {
if state.AutoRevert {
return true
}
}
return false
}
9 changes: 9 additions & 0 deletions command/deployment_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ func TestDeploymentStatusCommand_Fails(t *testing.T) {
// "deployments" indicates that we attempted to list all deployments
require.Contains(t, out, "Error retrieving deployments")
ui.ErrorWriter.Reset()

// Fails if monitor passed with json or tmpl flags
for _, flag := range []string{"-json", "-t"} {
code = cmd.Run([]string{"-monitor", flag, "12"})
require.Equal(t, 1, code)
out = ui.ErrorWriter.String()
require.Contains(t, out, "The monitor flag cannot be used with the '-json' or '-t' flags")
ui.ErrorWriter.Reset()
}
}

func TestDeploymentStatusCommand_AutocompleteArgs(t *testing.T) {
Expand Down
Loading