Skip to content

Commit

Permalink
Merge pull request #619 from hashicorp/f-dc-empty
Browse files Browse the repository at this point in the history
Alloc error message when no nodes are in DC
  • Loading branch information
dadgar committed Jan 5, 2016
2 parents 9acc586 + c51b69a commit 8d110eb
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 16 deletions.
1 change: 1 addition & 0 deletions api/allocations.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type Allocation struct {
type AllocationMetric struct {
NodesEvaluated int
NodesFiltered int
NodesAvailable map[string]int
ClassFiltered map[string]int
ConstraintFiltered map[string]int
NodesExhausted int
Expand Down
8 changes: 8 additions & 0 deletions command/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,14 @@ func dumpAllocStatus(ui cli.Ui, alloc *api.Allocation) {
ui.Output(" * No nodes were eligible for evaluation")
}

// Print a helpful message if the user has asked for a DC that has no
// available nodes.
for dc, available := range alloc.Metrics.NodesAvailable {
if available == 0 {
ui.Output(fmt.Sprintf(" * No nodes are available in datacenter %q", dc))
}
}

// Print filter info
for class, num := range alloc.Metrics.ClassFiltered {
ui.Output(fmt.Sprintf(" * Class %q filtered %d nodes", class, num))
Expand Down
3 changes: 3 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1695,6 +1695,9 @@ type AllocMetric struct {
// NodesFiltered is the number of nodes filtered due to a constraint
NodesFiltered int

// NodesAvailable is the number of nodes available for evaluation per DC.
NodesAvailable map[string]int

// ClassFiltered is the number of nodes filtered by class
ClassFiltered map[string]int

Expand Down
6 changes: 5 additions & 1 deletion scheduler/generic_sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func (s *GenericScheduler) computeJobAllocs() error {
// computePlacements computes placements for allocations
func (s *GenericScheduler) computePlacements(place []allocTuple) error {
// Get the base nodes
nodes, err := readyNodesInDCs(s.state, s.job.Datacenters)
nodes, byDC, err := readyNodesInDCs(s.state, s.job.Datacenters)
if err != nil {
return err
}
Expand Down Expand Up @@ -279,6 +279,9 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error {
Metrics: s.ctx.Metrics(),
}

// Store the available nodes by datacenter
s.ctx.Metrics().NodesAvailable = byDC

// Set fields based on if we found an allocation option
if option != nil {
// Generate the service ids for the tasks which this allocation is going
Expand All @@ -300,5 +303,6 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error {
failedTG[missing.TaskGroup] = alloc
}
}

return nil
}
5 changes: 5 additions & 0 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
t.Fatalf("bad: %#v", out[0].Metrics)
}

// Check the available nodes
if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 0 {
t.Fatalf("bad: %#v", out[0].Metrics)
}

h.AssertEvalStatus(t, structs.EvalStatusComplete)
}

Expand Down
20 changes: 12 additions & 8 deletions scheduler/system_sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ type SystemScheduler struct {
state State
planner Planner

eval *structs.Evaluation
job *structs.Job
plan *structs.Plan
ctx *EvalContext
stack *SystemStack
nodes []*structs.Node
eval *structs.Evaluation
job *structs.Job
plan *structs.Plan
ctx *EvalContext
stack *SystemStack
nodes []*structs.Node
nodesByDC map[string]int

limitReached bool
nextEval *structs.Evaluation
Expand Down Expand Up @@ -86,7 +87,7 @@ func (s *SystemScheduler) process() (bool, error) {

// Get the ready nodes in the required datacenters
if s.job != nil {
s.nodes, err = readyNodesInDCs(s.state, s.job.Datacenters)
s.nodes, s.nodesByDC, err = readyNodesInDCs(s.state, s.job.Datacenters)
if err != nil {
return false, fmt.Errorf("failed to get ready nodes: %v", err)
}
Expand Down Expand Up @@ -219,7 +220,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
return fmt.Errorf("could not find node %q", missing.Alloc.NodeID)
}

// Update the set of placement ndoes
// Update the set of placement nodes
nodes[0] = node
s.stack.SetNodes(nodes)

Expand All @@ -246,6 +247,9 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
Metrics: s.ctx.Metrics(),
}

// Store the available nodes by datacenter
s.ctx.Metrics().NodesAvailable = s.nodesByDC

// Set fields based on if we found an allocation option
if option != nil {
// Generate the service ids for the tasks that this allocation is going
Expand Down
5 changes: 5 additions & 0 deletions scheduler/system_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ func TestSystemSched_JobRegister(t *testing.T) {
t.Fatalf("bad: %#v", out)
}

// Check the available nodes
if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 10 {
t.Fatalf("bad: %#v", out[0].Metrics)
}

h.AssertEvalStatus(t, structs.EvalStatusComplete)
}

Expand Down
14 changes: 8 additions & 6 deletions scheduler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,19 +172,20 @@ func diffSystemAllocs(job *structs.Job, nodes []*structs.Node, taintedNodes map[
return result
}

// readyNodesInDCs returns all the ready nodes in the given datacenters
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
// readyNodesInDCs returns all the ready nodes in the given datacenters and a
// mapping of each data center to the count of ready nodes.
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]int, error) {
// Index the DCs
dcMap := make(map[string]struct{}, len(dcs))
dcMap := make(map[string]int, len(dcs))
for _, dc := range dcs {
dcMap[dc] = struct{}{}
dcMap[dc] = 0
}

// Scan the nodes
var out []*structs.Node
iter, err := state.Nodes()
if err != nil {
return nil, err
return nil, nil, err
}
for {
raw := iter.Next()
Expand All @@ -204,8 +205,9 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, error) {
continue
}
out = append(out, node)
dcMap[node.Datacenter] += 1
}
return out, nil
return out, dcMap, nil
}

// retryMax is used to retry a callback until it returns success or
Expand Down
8 changes: 7 additions & 1 deletion scheduler/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func TestReadyNodesInDCs(t *testing.T) {
noErr(t, state.UpsertNode(1002, node3))
noErr(t, state.UpsertNode(1003, node4))

nodes, err := readyNodesInDCs(state, []string{"dc1", "dc2"})
nodes, dc, err := readyNodesInDCs(state, []string{"dc1", "dc2"})
if err != nil {
t.Fatalf("err: %v", err)
}
Expand All @@ -215,6 +215,12 @@ func TestReadyNodesInDCs(t *testing.T) {
if nodes[0].ID == node3.ID || nodes[1].ID == node3.ID {
t.Fatalf("Bad: %#v", nodes)
}
if count, ok := dc["dc1"]; !ok || count != 1 {
t.Fatalf("Bad: dc1 count %v", count)
}
if count, ok := dc["dc2"]; !ok || count != 1 {
t.Fatalf("Bad: dc2 count %v", count)
}
}

func TestRetryMax(t *testing.T) {
Expand Down

0 comments on commit 8d110eb

Please sign in to comment.