Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

alloc-status displays individual task state #424

Merged
merged 5 commits into from
Nov 17, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 119 additions & 6 deletions command/alloc_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ package command

import (
"fmt"
"sort"
"strings"
"time"

"github.com/hashicorp/nomad/api"
)

type AllocStatusCommand struct {
Expand All @@ -13,14 +17,21 @@ func (c *AllocStatusCommand) Help() string {
helpText := `
Usage: nomad alloc-status [options] <allocation>
Display information about existing allocations. This command can
be used to inspect the current status of all allocation,
including its running status, metadata, and verbose failure
messages reported by internal subsystems.
Display information about existing allocations and its tasks. This command can
be used to inspect the current status of all allocation, including its running
status, metadata, and verbose failure messages reported by internal
subsystems.
General Options:
` + generalOptionsUsage()
` + generalOptionsUsage() + `
Alloc Status Options:
-short
Display short output. Shows only the most recent task event.
`

return strings.TrimSpace(helpText)
}

Expand All @@ -29,8 +40,12 @@ func (c *AllocStatusCommand) Synopsis() string {
}

func (c *AllocStatusCommand) Run(args []string) int {
var short bool

flags := c.Meta.FlagSet("alloc-status", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&short, "short", false, "")

if err := flags.Parse(args); err != nil {
return 1
}
Expand Down Expand Up @@ -65,7 +80,6 @@ func (c *AllocStatusCommand) Run(args []string) int {
fmt.Sprintf("NodeID|%s", alloc.NodeID),
fmt.Sprintf("JobID|%s", alloc.JobID),
fmt.Sprintf("ClientStatus|%s", alloc.ClientStatus),
fmt.Sprintf("ClientDescription|%s", alloc.ClientDescription),
fmt.Sprintf("NodesEvaluated|%d", alloc.Metrics.NodesEvaluated),
fmt.Sprintf("NodesFiltered|%d", alloc.Metrics.NodesFiltered),
fmt.Sprintf("NodesExhausted|%d", alloc.Metrics.NodesExhausted),
Expand All @@ -74,9 +88,108 @@ func (c *AllocStatusCommand) Run(args []string) int {
}
c.Ui.Output(formatKV(basic))

// Print the state of each task.
if short {
c.shortTaskStatus(alloc)
} else {
c.taskStatus(alloc)
}

// Format the detailed status
c.Ui.Output("\n==> Status")
dumpAllocStatus(c.Ui, alloc)

return 0
}

// shortTaskStatus prints out the current state of each task.
func (c *AllocStatusCommand) shortTaskStatus(alloc *api.Allocation) {
tasks := make([]string, 0, len(alloc.TaskStates)+1)
tasks = append(tasks, "Name|State|LastEvent|Time")
for task := range c.sortedTaskStateIterator(alloc.TaskStates) {
fmt.Println(task)
state := alloc.TaskStates[task]
lastState := state.State
var lastEvent, lastTime string

l := len(state.Events)
if l != 0 {
last := state.Events[l-1]
lastEvent = last.Type
lastTime = c.formatUnixNanoTime(last.Time)
}

tasks = append(tasks, fmt.Sprintf("%s|%s|%s|%s",
task, lastState, lastEvent, lastTime))
}

c.Ui.Output("\n==> Tasks")
c.Ui.Output(formatList(tasks))
}

// taskStatus prints out the most recent events for each task.
func (c *AllocStatusCommand) taskStatus(alloc *api.Allocation) {
for task := range c.sortedTaskStateIterator(alloc.TaskStates) {
state := alloc.TaskStates[task]
events := make([]string, len(state.Events)+1)
events[0] = "Time|Type|Description"

size := len(state.Events)
for i, event := range state.Events {
formatedTime := c.formatUnixNanoTime(event.Time)

// Build up the description based on the event type.
var desc string
switch event.Type {
case api.TaskDriverFailure:
desc = event.DriverError
case api.TaskKilled:
desc = event.KillError
case api.TaskTerminated:
var parts []string
parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode))

if event.Signal != 0 {
parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal))
}

if event.Message != "" {
parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message))
}
desc = strings.Join(parts, ", ")
}

// Reverse order so we are sorted by time
events[size-i] = fmt.Sprintf("%s|%s|%s", formatedTime, event.Type, desc)
}

c.Ui.Output(fmt.Sprintf("\n==> Task %q is %q\nRecent Events:", task, state.State))
c.Ui.Output(formatList(events))
}
}

// formatUnixNanoTime is a helper for formating time for output.
func (c *AllocStatusCommand) formatUnixNanoTime(nano int64) string {
t := time.Unix(0, nano)
return t.Format("15:04:05 01/02/06")
}

// sortedTaskStateIterator is a helper that takes the task state map and returns a
// channel that returns the keys in a sorted order.
func (c *AllocStatusCommand) sortedTaskStateIterator(m map[string]*api.TaskState) <-chan string {
output := make(chan string, len(m))
keys := make([]string, len(m))
i := 0
for k := range m {
keys[i] = k
i++
}
sort.Strings(keys)

for _, key := range keys {
output <- key
}

close(output)
return output
}
75 changes: 61 additions & 14 deletions website/source/docs/commands/alloc-status.html.md.erb
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ layout: "docs"
page_title: "Commands: alloc-status"
sidebar_current: "docs-commands-alloc-status"
description: >
Display status and metadata about existing allocations
Display status and metadata about existing allocations and their tasks.
---

# Command: alloc-status

The `alloc-status` command displays status information and metadata about
an existing allocation. It can be useful while debugging to reveal the
underlying reasons for scheduling decisions or failures.
The `alloc-status` command displays status information and metadata about an
existing allocation and its tasks. It can be useful while debugging to reveal
the underlying reasons for scheduling decisions or failures, as well as the
current state of its tasks.

## Usage

Expand All @@ -24,25 +25,71 @@ and detailed information for it will be dumped.
## General Options

<%= general_options_usage %>
#
## Status Options

* `-short`: Display short output. Shows only the most recent task event.

## Examples

Short status of an alloc:

```
nomad alloc-status 9f3276d6-c873-c0a3-81ae-247e8c665cbe
ID = 9f3276d6-c873-c0a3-81ae-247e8c665cbe
EvalID = dc186cc2-a9b2-218e-cc00-eea3d4eaccf4
$ nomad alloc-status --short a7365fe4-8b9f-4284-612d-a101fb41e773
ID = a7365fe4-8b9f-4284-612d-a101fb41e773
EvalID = 44c2d9ed-6377-ca3d-14a8-b2e6327230ce
Name = example.cache[0]
NodeID = <none>
NodeID = e55859b1-4330-f00b-da49-8a292432ead3
JobID = example
ClientStatus = failed
ClientDescription = <none>
ClientStatus = running
NodesEvaluated = 1
NodesFiltered = 1
NodesFiltered = 0
NodesExhausted = 0
AllocationTime = 15.242µs
AllocationTime = 911.026µs
CoalescedFailures = 0
redis
web

==> Tasks
Name State LastEvent Time
redis running Started 02:29:40 11/17/15
web running Started 02:30:41 11/17/15

==> Status
Allocation "9f3276d6-c873-c0a3-81ae-247e8c665cbe" status "failed" (1/1 nodes filtered)
* Constraint "$attr.kernel.name = linux" filtered 1 nodes
Allocation "a7365fe4-8b9f-4284-612d-a101fb41e773" status "running" (0/1 nodes filtered)
* Score "e55859b1-4330-f00b-da49-8a292432ead3.binpack" = 10.334026
```

Full status of an alloc, which shows one of the tasks dying and then being restarted:

```
$ nomad alloc-status a7365fe4-8b9f-4284-612d-a101fb41e773
ID = a7365fe4-8b9f-4284-612d-a101fb41e773
EvalID = 44c2d9ed-6377-ca3d-14a8-b2e6327230ce
Name = example.cache[0]
NodeID = e55859b1-4330-f00b-da49-8a292432ead3
JobID = example
ClientStatus = running
NodesEvaluated = 1
NodesFiltered = 0
NodesExhausted = 0
AllocationTime = 911.026µs
CoalescedFailures = 0

==> Task "redis" is "running"
Recent Events:
Time Type Description
02:29:40 11/17/15 Started <none>

==> Task "web" is "running"
Recent Events:
Time Type Description
02:30:41 11/17/15 Started <none>
02:30:02 11/17/15 Terminated Exit Code: 137, Exit Message: "Docker container exited with non-zero exit code: 137"
02:29:40 11/17/15 Started <none>

==> Status
Allocation "a7365fe4-8b9f-4284-612d-a101fb41e773" status "running" (0/1 nodes filtered)
* Score "e55859b1-4330-f00b-da49-8a292432ead3.binpack" = 10.334026

```