From 96ab783b3f957ada041aa966bec4ff558dc5147a Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 18 May 2016 18:11:40 -0700
Subject: [PATCH 1/6] Scheduler no longer produces failed allocations; failed
 alloc metrics stored in evaluation

---
 api/evaluations.go              |   1 +
 nomad/plan_apply.go             |   3 -
 nomad/plan_apply_test.go        |  17 +----
 nomad/structs/structs.go        |  42 +++++++-----
 scheduler/generic_sched.go      |  74 +++++++++++----------
 scheduler/generic_sched_test.go | 113 +++++++++++++++++++-------------
 scheduler/system_sched.go       |  53 +++++++--------
 scheduler/testing.go            |   1 -
 8 files changed, 161 insertions(+), 143 deletions(-)

diff --git a/api/evaluations.go b/api/evaluations.go
index 304f5ae72add..6d05a4da0e63 100644
--- a/api/evaluations.go
+++ b/api/evaluations.go
@@ -67,6 +67,7 @@ type Evaluation struct {
 	Wait              time.Duration
 	NextEval          string
 	PreviousEval      string
+	FailedTGAllocs    map[string]*AllocationMetric
 	CreateIndex       uint64
 	ModifyIndex       uint64
 }
diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go
index 5dc668f99e12..c094f16e8fdc 100644
--- a/nomad/plan_apply.go
+++ b/nomad/plan_apply.go
@@ -124,7 +124,6 @@ func (s *Server) applyPlan(job *structs.Job, result *structs.PlanResult, snap *s
 	// are multiple updates per node
 	minUpdates := len(result.NodeUpdate)
 	minUpdates += len(result.NodeAllocation)
-	minUpdates += len(result.FailedAllocs)
 
 	// Setup the update request
 	req := structs.AllocUpdateRequest{
@@ -137,7 +136,6 @@ func (s *Server) applyPlan(job *structs.Job, result *structs.PlanResult, snap *s
 	for _, allocList := range result.NodeAllocation {
 		req.Alloc = append(req.Alloc, allocList...)
 	}
-	req.Alloc = append(req.Alloc, result.FailedAllocs...)
 
 	// Set the time the alloc was applied for the first time. This can be used
 	// to approximate the scheduling time.
@@ -200,7 +198,6 @@ func evaluatePlan(pool *EvaluatePool, snap *state.StateSnapshot, plan *structs.P
 	result := &structs.PlanResult{
 		NodeUpdate:     make(map[string][]*structs.Allocation),
 		NodeAllocation: make(map[string][]*structs.Allocation),
-		FailedAllocs:   plan.FailedAllocs,
 	}
 
 	// Collect all the nodeIDs
diff --git a/nomad/plan_apply_test.go b/nomad/plan_apply_test.go
index b33152da31bb..8fde59f9e5ae 100644
--- a/nomad/plan_apply_test.go
+++ b/nomad/plan_apply_test.go
@@ -51,12 +51,10 @@ func TestPlanApply_applyPlan(t *testing.T) {
 
 	// Register alloc
 	alloc := mock.Alloc()
-	allocFail := mock.Alloc()
 	plan := &structs.PlanResult{
 		NodeAllocation: map[string][]*structs.Allocation{
 			node.ID: []*structs.Allocation{alloc},
 		},
-		FailedAllocs: []*structs.Allocation{allocFail},
 	}
 
 	// Snapshot the state
@@ -94,15 +92,6 @@ func TestPlanApply_applyPlan(t *testing.T) {
 		t.Fatalf("missing alloc")
 	}
 
-	// Lookup the allocation
-	out, err = s1.fsm.State().AllocByID(allocFail.ID)
-	if err != nil {
-		t.Fatalf("err: %v", err)
-	}
-	if out == nil {
-		t.Fatalf("missing alloc")
-	}
-
 	// Evict alloc, Register alloc2
 	allocEvict := new(structs.Allocation)
 	*allocEvict = *alloc
@@ -178,12 +167,10 @@ func TestPlanApply_EvalPlan_Simple(t *testing.T) {
 	snap, _ := state.Snapshot()
 
 	alloc := mock.Alloc()
-	allocFail := mock.Alloc()
 	plan := &structs.Plan{
 		NodeAllocation: map[string][]*structs.Allocation{
 			node.ID: []*structs.Allocation{alloc},
 		},
-		FailedAllocs: []*structs.Allocation{allocFail},
 	}
 
 	pool := NewEvaluatePool(workerPoolSize, workerPoolBufferSize)
@@ -196,8 +183,8 @@ func TestPlanApply_EvalPlan_Simple(t *testing.T) {
 	if result == nil {
 		t.Fatalf("missing result")
 	}
-	if !reflect.DeepEqual(result.FailedAllocs, plan.FailedAllocs) {
-		t.Fatalf("missing failed allocs")
+	if !reflect.DeepEqual(result.NodeAllocation, plan.NodeAllocation) {
+		t.Fatalf("incorrect node allocations")
 	}
 }
 
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index dbff03732cae..45e9e64e5d48 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -2617,6 +2617,11 @@ type Evaluation struct {
 	// This is used to support rolling upgrades, where we need a chain of evaluations.
 	PreviousEval string
 
+	// FailedTGAllocs are task groups which have allocations that could not be
+	// made, but the metrics are persisted so that the user can use the feedback
+	// to determine the cause.
+	FailedTGAllocs map[string]*AllocMetric
+
 	// ClassEligibility tracks computed node classes that have been explicitly
 	// marked as eligible or ineligible.
 	ClassEligibility map[string]bool
@@ -2655,6 +2660,25 @@ func (e *Evaluation) Copy() *Evaluation {
 	}
 	ne := new(Evaluation)
 	*ne = *e
+
+	// Copy ClassEligibility
+	if e.ClassEligibility != nil {
+		classes := make(map[string]bool, len(e.ClassEligibility))
+		for class, elig := range e.ClassEligibility {
+			classes[class] = elig
+		}
+		ne.ClassEligibility = classes
+	}
+
+	// Copy FailedTGAllocs
+	if e.FailedTGAllocs != nil {
+		failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs))
+		for tg, metric := range e.FailedTGAllocs {
+			failedTGs[tg] = metric.Copy()
+		}
+		ne.FailedTGAllocs = failedTGs
+	}
+
 	return ne
 }
 
@@ -2769,11 +2793,6 @@ type Plan struct {
 	// The evicts must be considered prior to the allocations.
 	NodeAllocation map[string][]*Allocation
 
-	// FailedAllocs are allocations that could not be made,
-	// but are persisted so that the user can use the feedback
-	// to determine the cause.
-	FailedAllocs []*Allocation
-
 	// Annotations contains annotations by the scheduler to be used by operators
 	// to understand the decisions made by the scheduler.
 	Annotations *PlanAnnotations
@@ -2821,13 +2840,9 @@ func (p *Plan) AppendAlloc(alloc *Allocation) {
 	p.NodeAllocation[node] = append(existing, alloc)
 }
 
-func (p *Plan) AppendFailed(alloc *Allocation) {
-	p.FailedAllocs = append(p.FailedAllocs, alloc)
-}
-
 // IsNoOp checks if this plan would do nothing
 func (p *Plan) IsNoOp() bool {
-	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0
+	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0
 }
 
 // PlanResult is the result of a plan submitted to the leader.
@@ -2838,11 +2853,6 @@ type PlanResult struct {
 	// NodeAllocation contains all the allocations that were committed.
 	NodeAllocation map[string][]*Allocation
 
-	// FailedAllocs are allocations that could not be made,
-	// but are persisted so that the user can use the feedback
-	// to determine the cause.
-	FailedAllocs []*Allocation
-
 	// RefreshIndex is the index the worker should refresh state up to.
 	// This allows all evictions and allocations to be materialized.
 	// If any allocations were rejected due to stale data (node state,
@@ -2856,7 +2866,7 @@ type PlanResult struct {
 
 // IsNoOp checks if this plan result would do nothing
 func (p *PlanResult) IsNoOp() bool {
-	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0
+	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0
 }
 
 // FullCommit is used to check if all the allocations in a plan
diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go
index 2cce204a1159..0e7d838e40d0 100644
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -158,6 +158,9 @@ func (s *GenericScheduler) process() (bool, error) {
 	// Create a plan
 	s.plan = s.eval.MakePlan(s.job)
 
+	// Reset the failed allocations
+	s.eval.FailedTGAllocs = nil
+
 	// Create an evaluation context
 	s.ctx = NewEvalContext(s.state, s.plan, s.logger)
 
@@ -173,6 +176,16 @@ func (s *GenericScheduler) process() (bool, error) {
 		return false, err
 	}
 
+	// If there are failed allocations, we need to create a blocked evaluation
+	// to place the failed allocations when resources become available.
+	if len(s.eval.FailedTGAllocs) != 0 && s.blocked == nil {
+		if err := s.createBlockedEval(); err != nil {
+			s.logger.Printf("[ERR] sched: %#v failed to make blocked eval: %v", s.eval, err)
+			return false, err
+		}
+		s.logger.Printf("[DEBUG] sched: %#v: failed to place all allocations, blocked eval '%s' created", s.eval, s.blocked.ID)
+	}
+
 	// If the plan is a no-op, we can bail. If AnnotatePlan is set submit the plan
 	// anyways to get the annotations.
 	if s.plan.IsNoOp() && !s.eval.AnnotatePlan {
@@ -190,16 +203,6 @@ func (s *GenericScheduler) process() (bool, error) {
 		s.logger.Printf("[DEBUG] sched: %#v: rolling update limit reached, next eval '%s' created", s.eval, s.nextEval.ID)
 	}
 
-	// If there are failed allocations, we need to create a blocked evaluation
-	// to place the failed allocations when resources become available.
-	if len(s.plan.FailedAllocs) != 0 && s.blocked == nil {
-		if err := s.createBlockedEval(); err != nil {
-			s.logger.Printf("[ERR] sched: %#v failed to make blocked eval: %v", s.eval, err)
-			return false, err
-		}
-		s.logger.Printf("[DEBUG] sched: %#v: failed to place all allocations, blocked eval '%s' created", s.eval, s.blocked.ID)
-	}
-
 	// Submit the plan and store the results.
 	result, newState, err := s.planner.SubmitPlan(s.plan)
 	s.planResult = result
@@ -365,50 +368,49 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error {
 	// Update the set of placement ndoes
 	s.stack.SetNodes(nodes)
 
-	// Track the failed task groups so that we can coalesce
-	// the failures together to avoid creating many failed allocs.
-	failedTG := make(map[*structs.TaskGroup]*structs.Allocation)
-
 	for _, missing := range place {
 		// Check if this task group has already failed
-		if alloc, ok := failedTG[missing.TaskGroup]; ok {
-			alloc.Metrics.CoalescedFailures += 1
-			continue
+		if s.eval.FailedTGAllocs != nil {
+			if metric, ok := s.eval.FailedTGAllocs[missing.TaskGroup.Name]; ok {
+				metric.CoalescedFailures += 1
+				continue
+			}
 		}
 
 		// Attempt to match the task group
 		option, _ := s.stack.Select(missing.TaskGroup)
 
-		// Create an allocation for this
-		alloc := &structs.Allocation{
-			ID:        structs.GenerateUUID(),
-			EvalID:    s.eval.ID,
-			Name:      missing.Name,
-			JobID:     s.job.ID,
-			TaskGroup: missing.TaskGroup.Name,
-			Metrics:   s.ctx.Metrics(),
-		}
-
 		// Store the available nodes by datacenter
 		s.ctx.Metrics().NodesAvailable = byDC
 
 		// Set fields based on if we found an allocation option
 		if option != nil {
+			// Create an allocation for this
+			alloc := &structs.Allocation{
+				ID:            structs.GenerateUUID(),
+				EvalID:        s.eval.ID,
+				Name:          missing.Name,
+				JobID:         s.job.ID,
+				TaskGroup:     missing.TaskGroup.Name,
+				Metrics:       s.ctx.Metrics(),
+				NodeID:        option.Node.ID,
+				TaskResources: option.TaskResources,
+				DesiredStatus: structs.AllocDesiredStatusRun,
+				ClientStatus:  structs.AllocClientStatusPending,
+			}
+
 			// Generate service IDs tasks in this allocation
 			// COMPAT - This is no longer required and would be removed in v0.4
 			alloc.PopulateServiceIDs(missing.TaskGroup)
 
-			alloc.NodeID = option.Node.ID
-			alloc.TaskResources = option.TaskResources
-			alloc.DesiredStatus = structs.AllocDesiredStatusRun
-			alloc.ClientStatus = structs.AllocClientStatusPending
 			s.plan.AppendAlloc(alloc)
 		} else {
-			alloc.DesiredStatus = structs.AllocDesiredStatusFailed
-			alloc.DesiredDescription = "failed to find a node for placement"
-			alloc.ClientStatus = structs.AllocClientStatusFailed
-			s.plan.AppendFailed(alloc)
-			failedTG[missing.TaskGroup] = alloc
+			// Lazy initialize the failed map
+			if s.eval.FailedTGAllocs == nil {
+				s.eval.FailedTGAllocs = make(map[string]*structs.AllocMetric)
+			}
+
+			s.eval.FailedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics()
 		}
 	}
 
diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go
index e3d5e9a84cba..09b980155572 100644
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -224,39 +224,39 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Ensure a single plan
-	if len(h.Plans) != 1 {
+	// Ensure no plan
+	if len(h.Plans) != 0 {
 		t.Fatalf("bad: %#v", h.Plans)
 	}
-	plan := h.Plans[0]
 
-	// Ensure the plan has created a follow up eval.
+	// Ensure there is a follow up eval.
 	if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked {
 		t.Fatalf("bad: %#v", h.CreateEvals)
 	}
 
-	// Ensure the plan failed to alloc
-	if len(plan.FailedAllocs) != 1 {
-		t.Fatalf("bad: %#v", plan)
+	if len(h.Evals) != 1 {
+		t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
 	}
+	outEval := h.Evals[0]
 
-	// Lookup the allocations by JobID
-	out, err := h.State.AllocsByJob(job.ID)
-	noErr(t, err)
+	// Ensure the plan failed to alloc
+	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
+		t.Fatalf("bad: %#v", outEval)
+	}
 
-	// Ensure all allocations placed
-	if len(out) != 1 {
-		t.Fatalf("bad: %#v", out)
+	metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name]
+	if !ok {
+		t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs)
 	}
 
 	// Check the coalesced failures
-	if out[0].Metrics.CoalescedFailures != 9 {
-		t.Fatalf("bad: %#v", out[0].Metrics)
+	if metrics.CoalescedFailures != 9 {
+		t.Fatalf("bad: %#v", metrics)
 	}
 
 	// Check the available nodes
-	if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 0 {
-		t.Fatalf("bad: %#v", out[0].Metrics)
+	if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 0 {
+		t.Fatalf("bad: %#v", metrics)
 	}
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
@@ -295,11 +295,10 @@ func TestServiceSched_JobRegister_BlockedEval(t *testing.T) {
 		t.Fatalf("err: %v", err)
 	}
 
-	// Ensure a single plan
-	if len(h.Plans) != 1 {
+	// Ensure no plan
+	if len(h.Plans) != 0 {
 		t.Fatalf("bad: %#v", h.Plans)
 	}
-	plan := h.Plans[0]
 
 	// Ensure the plan has created a follow up eval.
 	if len(h.CreateEvals) != 1 {
@@ -320,31 +319,34 @@ func TestServiceSched_JobRegister_BlockedEval(t *testing.T) {
 		t.Fatalf("bad: %#v", created)
 	}
 
-	// Ensure the plan failed to alloc
-	if len(plan.FailedAllocs) != 1 {
-		t.Fatalf("bad: %#v", plan)
+	// Ensure there is a follow up eval.
+	if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked {
+		t.Fatalf("bad: %#v", h.CreateEvals)
 	}
 
-	// Lookup the allocations by JobID
-	out, err := h.State.AllocsByJob(job.ID)
-	noErr(t, err)
+	if len(h.Evals) != 1 {
+		t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
+	}
+	outEval := h.Evals[0]
 
-	// Ensure all allocations placed
-	if len(out) != 1 {
-		for _, a := range out {
-			t.Logf("%#v", a)
-		}
-		t.Fatalf("bad: %#v", out)
+	// Ensure the plan failed to alloc
+	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
+		t.Fatalf("bad: %#v", outEval)
+	}
+
+	metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name]
+	if !ok {
+		t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs)
 	}
 
 	// Check the coalesced failures
-	if out[0].Metrics.CoalescedFailures != 9 {
-		t.Fatalf("bad: %#v", out[0].Metrics)
+	if metrics.CoalescedFailures != 9 {
+		t.Fatalf("bad: %#v", metrics)
 	}
 
 	// Check the available nodes
-	if count, ok := out[0].Metrics.NodesAvailable["dc1"]; !ok || count != 2 {
-		t.Fatalf("bad: %#v", out[0].Metrics)
+	if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 2 {
+		t.Fatalf("bad: %#v", metrics)
 	}
 
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
@@ -403,19 +405,34 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
 	if len(planned) != 2 {
 		t.Fatalf("bad: %#v", plan)
 	}
-	if len(plan.FailedAllocs) != 1 {
-		t.Fatalf("bad: %#v", plan)
-	}
 
-	// Lookup the allocations by JobID
+	// Ensure two allocations placed
 	out, err := h.State.AllocsByJob(job.ID)
 	noErr(t, err)
-
-	// Ensure all allocations placed
-	if len(out) != 3 {
+	if len(out) != 2 {
 		t.Fatalf("bad: %#v", out)
 	}
 
+	// Ensure the plan failed to alloc one tg
+	if len(h.Evals) != 1 {
+		t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
+	}
+	outEval := h.Evals[0]
+
+	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
+		t.Fatalf("bad: %#v", outEval)
+	}
+
+	metrics, ok := outEval.FailedTGAllocs[tg2.Name]
+	if !ok {
+		t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs)
+	}
+
+	// Check the coalesced failures
+	if metrics.CoalescedFailures != tg2.Count-1 {
+		t.Fatalf("bad: %#v", metrics)
+	}
+
 	h.AssertEvalStatus(t, structs.EvalStatusComplete)
 }
 
@@ -586,9 +603,13 @@ func TestServiceSched_JobModify_IncrCount_NodeLimit(t *testing.T) {
 		t.Fatalf("bad: %#v", plan)
 	}
 
-	// Ensure the plan didn't to alloc
-	if len(plan.FailedAllocs) != 0 {
-		t.Fatalf("bad: %#v", plan)
+	// Ensure the plan had no failures
+	if len(h.Evals) != 1 {
+		t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
+	}
+	outEval := h.Evals[0]
+	if outEval == nil || len(outEval.FailedTGAllocs) != 0 {
+		t.Fatalf("bad: %#v", outEval)
 	}
 
 	// Lookup the allocations by JobID
diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go
index 3499059fa8d9..e5a559e23dca 100644
--- a/scheduler/system_sched.go
+++ b/scheduler/system_sched.go
@@ -98,6 +98,9 @@ func (s *SystemScheduler) process() (bool, error) {
 	// Create a plan
 	s.plan = s.eval.MakePlan(s.job)
 
+	// Reset the failed allocations
+	s.eval.FailedTGAllocs = nil
+
 	// Create an evaluation context
 	s.ctx = NewEvalContext(s.state, s.plan, s.logger)
 
@@ -220,10 +223,6 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
 		nodeByID[node.ID] = node
 	}
 
-	// Track the failed task groups so that we can coalesce
-	// the failures together to avoid creating many failed allocs.
-	failedTG := make(map[*structs.TaskGroup]*structs.Allocation)
-
 	nodes := make([]*structs.Node, 1)
 	for _, missing := range place {
 		node, ok := nodeByID[missing.Alloc.NodeID]
@@ -238,45 +237,47 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
 		// Attempt to match the task group
 		option, _ := s.stack.Select(missing.TaskGroup)
 
-		if option == nil {
+		if option == nil && s.eval.FailedTGAllocs != nil {
 			// Check if this task group has already failed
-			if alloc, ok := failedTG[missing.TaskGroup]; ok {
-				alloc.Metrics.CoalescedFailures += 1
+			if metric, ok := s.eval.FailedTGAllocs[missing.TaskGroup.Name]; ok {
+				metric.CoalescedFailures += 1
 				continue
 			}
 		}
 
-		// Create an allocation for this
-		alloc := &structs.Allocation{
-			ID:        structs.GenerateUUID(),
-			EvalID:    s.eval.ID,
-			Name:      missing.Name,
-			JobID:     s.job.ID,
-			TaskGroup: missing.TaskGroup.Name,
-			Metrics:   s.ctx.Metrics(),
-		}
-
 		// Store the available nodes by datacenter
 		s.ctx.Metrics().NodesAvailable = s.nodesByDC
 
 		// Set fields based on if we found an allocation option
 		if option != nil {
+			// Create an allocation for this
+			alloc := &structs.Allocation{
+				ID:            structs.GenerateUUID(),
+				EvalID:        s.eval.ID,
+				Name:          missing.Name,
+				JobID:         s.job.ID,
+				TaskGroup:     missing.TaskGroup.Name,
+				Metrics:       s.ctx.Metrics(),
+				NodeID:        option.Node.ID,
+				TaskResources: option.TaskResources,
+				DesiredStatus: structs.AllocDesiredStatusRun,
+				ClientStatus:  structs.AllocClientStatusPending,
+			}
+
 			// Generate service IDs tasks in this allocation
 			// COMPAT - This is no longer required and would be removed in v0.4
 			alloc.PopulateServiceIDs(missing.TaskGroup)
 
-			alloc.NodeID = option.Node.ID
-			alloc.TaskResources = option.TaskResources
-			alloc.DesiredStatus = structs.AllocDesiredStatusRun
-			alloc.ClientStatus = structs.AllocClientStatusPending
 			s.plan.AppendAlloc(alloc)
 		} else {
-			alloc.DesiredStatus = structs.AllocDesiredStatusFailed
-			alloc.DesiredDescription = "failed to find a node for placement"
-			alloc.ClientStatus = structs.AllocClientStatusFailed
-			s.plan.AppendFailed(alloc)
-			failedTG[missing.TaskGroup] = alloc
+			// Lazy initialize the failed map
+			if s.eval.FailedTGAllocs == nil {
+				s.eval.FailedTGAllocs = make(map[string]*structs.AllocMetric)
+			}
+
+			s.eval.FailedTGAllocs[missing.TaskGroup.Name] = s.ctx.Metrics()
 		}
 	}
+
 	return nil
 }
diff --git a/scheduler/testing.go b/scheduler/testing.go
index fbd1aeda702a..b7a4d7da1d1d 100644
--- a/scheduler/testing.go
+++ b/scheduler/testing.go
@@ -91,7 +91,6 @@ func (h *Harness) SubmitPlan(plan *structs.Plan) (*structs.PlanResult, State, er
 	for _, allocList := range plan.NodeAllocation {
 		allocs = append(allocs, allocList...)
 	}
-	allocs = append(allocs, plan.FailedAllocs...)
 
 	// Attach the plan to all the allocations. It is pulled out in the
 	// payload to avoid the redundancy of encoding, but should be denormalized

From 154875c0bc90ddc14fb285c4b174a43095e79392 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Wed, 18 May 2016 22:02:51 -0700
Subject: [PATCH 2/6] Show task group failures in eval-monitor

---
 command/monitor.go | 45 +++++++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/command/monitor.go b/command/monitor.go
index 025f8e22ee02..62b6dc83667f 100644
--- a/command/monitor.go
+++ b/command/monitor.go
@@ -288,9 +288,25 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int {
 		m.update(state)
 
 		switch eval.Status {
-		case structs.EvalStatusComplete, structs.EvalStatusFailed:
-			m.ui.Info(fmt.Sprintf("Evaluation %q finished with status %q",
-				limit(eval.ID, m.length), eval.Status))
+		case structs.EvalStatusComplete, structs.EvalStatusFailed, structs.EvalStatusCancelled:
+			if len(eval.FailedTGAllocs) == 0 {
+				m.ui.Info(fmt.Sprintf("Evaluation %q finished with status %q",
+					limit(eval.ID, m.length), eval.Status))
+			} else {
+				// There were failures making the allocations
+				m.ui.Info(fmt.Sprintf("Evaluation %q finished with status %q but failed to place all allocations:",
+					limit(eval.ID, m.length), eval.Status))
+
+				// Print the failures per task group
+				for tg, metrics := range eval.FailedTGAllocs {
+					noun := "allocation"
+					if metrics.CoalescedFailures > 0 {
+						noun += "s"
+					}
+					m.ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
+					dumpAllocMetrics(m.ui, metrics, false)
+				}
+			}
 		default:
 			// Wait for the next update
 			time.Sleep(updateWait)
@@ -332,41 +348,46 @@ func dumpAllocStatus(ui cli.Ui, alloc *api.Allocation, length int) {
 	ui.Output(fmt.Sprintf("Allocation %q status %q (%d/%d nodes filtered)",
 		limit(alloc.ID, length), alloc.ClientStatus,
 		alloc.Metrics.NodesFiltered, alloc.Metrics.NodesEvaluated))
+	dumpAllocMetrics(ui, alloc.Metrics, true)
+}
 
+func dumpAllocMetrics(ui cli.Ui, metrics *api.AllocationMetric, scores bool) {
 	// Print a helpful message if we have an eligibility problem
-	if alloc.Metrics.NodesEvaluated == 0 {
+	if metrics.NodesEvaluated == 0 {
 		ui.Output("  * No nodes were eligible for evaluation")
 	}
 
 	// Print a helpful message if the user has asked for a DC that has no
 	// available nodes.
-	for dc, available := range alloc.Metrics.NodesAvailable {
+	for dc, available := range metrics.NodesAvailable {
 		if available == 0 {
 			ui.Output(fmt.Sprintf("  * No nodes are available in datacenter %q", dc))
 		}
 	}
 
 	// Print filter info
-	for class, num := range alloc.Metrics.ClassFiltered {
+	for class, num := range metrics.ClassFiltered {
 		ui.Output(fmt.Sprintf("  * Class %q filtered %d nodes", class, num))
 	}
-	for cs, num := range alloc.Metrics.ConstraintFiltered {
+	for cs, num := range metrics.ConstraintFiltered {
 		ui.Output(fmt.Sprintf("  * Constraint %q filtered %d nodes", cs, num))
 	}
 
 	// Print exhaustion info
-	if ne := alloc.Metrics.NodesExhausted; ne > 0 {
+	if ne := metrics.NodesExhausted; ne > 0 {
 		ui.Output(fmt.Sprintf("  * Resources exhausted on %d nodes", ne))
 	}
-	for class, num := range alloc.Metrics.ClassExhausted {
+	for class, num := range metrics.ClassExhausted {
 		ui.Output(fmt.Sprintf("  * Class %q exhausted on %d nodes", class, num))
 	}
-	for dim, num := range alloc.Metrics.DimensionExhausted {
+	for dim, num := range metrics.DimensionExhausted {
 		ui.Output(fmt.Sprintf("  * Dimension %q exhausted on %d nodes", dim, num))
 	}
 
 	// Print scores
-	for name, score := range alloc.Metrics.Scores {
-		ui.Output(fmt.Sprintf("  * Score %q = %f", name, score))
+	if scores {
+		for name, score := range metrics.Scores {
+			ui.Output(fmt.Sprintf("  * Score %q = %f", name, score))
+		}
 	}
 }

From 6deadf1ccd2ea4b86c61ce3bb2d67888852c7292 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 19 May 2016 13:09:52 -0700
Subject: [PATCH 3/6] Evals track blocked evals they create

---
 api/evaluations.go              | 33 +++++++++++++++++----------------
 nomad/structs/structs.go        |  5 +++++
 scheduler/generic_sched.go      |  6 +++---
 scheduler/generic_sched_test.go | 20 +++++++++++++++++++-
 scheduler/system_sched.go       |  6 +++---
 scheduler/util.go               |  5 ++++-
 scheduler/util_test.go          | 21 +++++++++++++++++++--
 7 files changed, 70 insertions(+), 26 deletions(-)

diff --git a/api/evaluations.go b/api/evaluations.go
index 6d05a4da0e63..a10c3cba57a2 100644
--- a/api/evaluations.go
+++ b/api/evaluations.go
@@ -54,22 +54,23 @@ func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*Allocation
 
 // Evaluation is used to serialize an evaluation.
 type Evaluation struct {
-	ID                string
-	Priority          int
-	Type              string
-	TriggeredBy       string
-	JobID             string
-	JobModifyIndex    uint64
-	NodeID            string
-	NodeModifyIndex   uint64
-	Status            string
-	StatusDescription string
-	Wait              time.Duration
-	NextEval          string
-	PreviousEval      string
-	FailedTGAllocs    map[string]*AllocationMetric
-	CreateIndex       uint64
-	ModifyIndex       uint64
+	ID                 string
+	Priority           int
+	Type               string
+	TriggeredBy        string
+	JobID              string
+	JobModifyIndex     uint64
+	NodeID             string
+	NodeModifyIndex    uint64
+	Status             string
+	StatusDescription  string
+	Wait               time.Duration
+	NextEval           string
+	PreviousEval       string
+	SpawnedBlockedEval string
+	FailedTGAllocs     map[string]*AllocationMetric
+	CreateIndex        uint64
+	ModifyIndex        uint64
 }
 
 // EvalIndexSort is a wrapper to sort evaluations by CreateIndex.
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 45e9e64e5d48..577893850bb5 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -2617,6 +2617,11 @@ type Evaluation struct {
 	// This is used to support rolling upgrades, where we need a chain of evaluations.
 	PreviousEval string
 
+	// SpawnedBlockedEval is the evaluation ID for a created blocked eval. A
+	// blocked eval will be created if all allocations could not be placed due
+	// to constraints or lacking resources.
+	SpawnedBlockedEval string
+
 	// FailedTGAllocs are task groups which have allocations that could not be
 	// made, but the metrics are persisted so that the user can use the feedback
 	// to determine the cause.
diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go
index 0e7d838e40d0..2595fbac7c86 100644
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -100,7 +100,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	default:
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
 			eval.TriggeredBy)
-		return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusFailed, desc)
 	}
 
 	// Retry up to the maxScheduleAttempts and reset if progress is made.
@@ -117,7 +117,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 			if err := s.createBlockedEval(); err != nil {
 				mErr.Errors = append(mErr.Errors, err)
 			}
-			if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error()); err != nil {
+			if err := setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, statusErr.EvalStatus, err.Error()); err != nil {
 				mErr.Errors = append(mErr.Errors, err)
 			}
 			return mErr.ErrorOrNil()
@@ -126,7 +126,7 @@ func (s *GenericScheduler) Process(eval *structs.Evaluation) error {
 	}
 
 	// Update the status to complete
-	return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, s.blocked, structs.EvalStatusComplete, "")
 }
 
 // createBlockedEval creates a blocked eval and stores it.
diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go
index 09b980155572..879da2a837dc 100644
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -48,6 +48,14 @@ func TestServiceSched_JobRegister(t *testing.T) {
 		t.Fatalf("expected no annotations")
 	}
 
+	// Ensure the eval has no spawned blocked eval
+	if len(h.Evals) != 1 {
+		t.Fatalf("bad: %#v", h.Evals)
+		if h.Evals[0].SpawnedBlockedEval != "" {
+			t.Fatalf("bad: %#v", h.Evals[0])
+		}
+	}
+
 	// Ensure the plan allocated
 	var planned []*structs.Allocation
 	for _, allocList := range plan.NodeAllocation {
@@ -239,6 +247,11 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
 	}
 	outEval := h.Evals[0]
 
+	// Ensure the eval has its spawned blocked eval
+	if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
+		t.Fatalf("bad: %#v", outEval)
+	}
+
 	// Ensure the plan failed to alloc
 	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
 		t.Fatalf("bad: %#v", outEval)
@@ -413,12 +426,17 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
 		t.Fatalf("bad: %#v", out)
 	}
 
-	// Ensure the plan failed to alloc one tg
 	if len(h.Evals) != 1 {
 		t.Fatalf("incorrect number of updated eval: %#v", h.Evals)
 	}
 	outEval := h.Evals[0]
 
+	// Ensure the eval has its spawned blocked eval
+	if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
+		t.Fatalf("bad: %#v", outEval)
+	}
+
+	// Ensure the plan failed to alloc one tg
 	if outEval == nil || len(outEval.FailedTGAllocs) != 1 {
 		t.Fatalf("bad: %#v", outEval)
 	}
diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go
index e5a559e23dca..e0bef083ad64 100644
--- a/scheduler/system_sched.go
+++ b/scheduler/system_sched.go
@@ -60,20 +60,20 @@ func (s *SystemScheduler) Process(eval *structs.Evaluation) error {
 	default:
 		desc := fmt.Sprintf("scheduler cannot handle '%s' evaluation reason",
 			eval.TriggeredBy)
-		return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusFailed, desc)
+		return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusFailed, desc)
 	}
 
 	// Retry up to the maxSystemScheduleAttempts and reset if progress is made.
 	progress := func() bool { return progressMade(s.planResult) }
 	if err := retryMax(maxSystemScheduleAttempts, s.process, progress); err != nil {
 		if statusErr, ok := err.(*SetStatusError); ok {
-			return setStatus(s.logger, s.planner, s.eval, s.nextEval, statusErr.EvalStatus, err.Error())
+			return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, statusErr.EvalStatus, err.Error())
 		}
 		return err
 	}
 
 	// Update the status to complete
-	return setStatus(s.logger, s.planner, s.eval, s.nextEval, structs.EvalStatusComplete, "")
+	return setStatus(s.logger, s.planner, s.eval, s.nextEval, nil, structs.EvalStatusComplete, "")
 }
 
 // process is wrapped in retryMax to iteratively run the handler until we have no
diff --git a/scheduler/util.go b/scheduler/util.go
index 4dc411dc9f68..89ad49dc921e 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -356,7 +356,7 @@ func networkPortMap(n *structs.NetworkResource) map[string]int {
 }
 
 // setStatus is used to update the status of the evaluation
-func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Evaluation, status, desc string) error {
+func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlocked *structs.Evaluation, status, desc string) error {
 	logger.Printf("[DEBUG] sched: %#v: setting status to %s", eval, status)
 	newEval := eval.Copy()
 	newEval.Status = status
@@ -364,6 +364,9 @@ func setStatus(logger *log.Logger, planner Planner, eval, nextEval *structs.Eval
 	if nextEval != nil {
 		newEval.NextEval = nextEval.ID
 	}
+	if spawnedBlocked != nil {
+		newEval.SpawnedBlockedEval = spawnedBlocked.ID
+	}
 	return planner.UpdateEval(newEval)
 }
 
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index 1d689fe2b380..73fc29f8f712 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -485,7 +485,7 @@ func TestSetStatus(t *testing.T) {
 	eval := mock.Eval()
 	status := "a"
 	desc := "b"
-	if err := setStatus(logger, h, eval, nil, status, desc); err != nil {
+	if err := setStatus(logger, h, eval, nil, nil, status, desc); err != nil {
 		t.Fatalf("setStatus() failed: %v", err)
 	}
 
@@ -498,9 +498,10 @@ func TestSetStatus(t *testing.T) {
 		t.Fatalf("setStatus() submited invalid eval: %v", newEval)
 	}
 
+	// Test next evals
 	h = NewHarness(t)
 	next := mock.Eval()
-	if err := setStatus(logger, h, eval, next, status, desc); err != nil {
+	if err := setStatus(logger, h, eval, next, nil, status, desc); err != nil {
 		t.Fatalf("setStatus() failed: %v", err)
 	}
 
@@ -512,6 +513,22 @@ func TestSetStatus(t *testing.T) {
 	if newEval.NextEval != next.ID {
 		t.Fatalf("setStatus() didn't set nextEval correctly: %v", newEval)
 	}
+
+	// Test blocked evals
+	h = NewHarness(t)
+	blocked := mock.Eval()
+	if err := setStatus(logger, h, eval, nil, blocked, status, desc); err != nil {
+		t.Fatalf("setStatus() failed: %v", err)
+	}
+
+	if len(h.Evals) != 1 {
+		t.Fatalf("setStatus() didn't update plan: %v", h.Evals)
+	}
+
+	newEval = h.Evals[0]
+	if newEval.SpawnedBlockedEval != blocked.ID {
+		t.Fatalf("setStatus() didn't set SpawnedBlockedEval correctly: %v", newEval)
+	}
 }
 
 func TestInplaceUpdate_ChangedTaskGroup(t *testing.T) {

From 9f5ab6648fc439b83b052d27323402dc5569c000 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 19 May 2016 13:16:10 -0700
Subject: [PATCH 4/6] print spawned eval in monitor

---
 command/monitor.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/command/monitor.go b/command/monitor.go
index 62b6dc83667f..f5417f26a877 100644
--- a/command/monitor.go
+++ b/command/monitor.go
@@ -306,6 +306,11 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int {
 					m.ui.Output(fmt.Sprintf("Task Group %q (failed to place %d %s):", tg, metrics.CoalescedFailures+1, noun))
 					dumpAllocMetrics(m.ui, metrics, false)
 				}
+
+				if eval.SpawnedBlockedEval != "" {
+					m.ui.Output(fmt.Sprintf("Spawned follow up blocked evaluation %q to place remainder",
+						limit(eval.SpawnedBlockedEval, m.length)))
+				}
 			}
 		default:
 			// Wait for the next update

From 92bddbc3a52444f9e1dfa25872242c3b4511afb6 Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 24 May 2016 18:12:59 -0700
Subject: [PATCH 5/6] rename SpawnedBlockedEval and simplify map safety check

---
 api/evaluations.go              | 34 ++++++++++++++++-----------------
 command/monitor.go              |  6 +++---
 nomad/structs/structs.go        |  8 ++++----
 scheduler/generic_sched.go      | 10 ++++------
 scheduler/generic_sched_test.go |  6 +++---
 scheduler/system_sched.go       |  2 +-
 scheduler/util.go               |  2 +-
 scheduler/util_test.go          |  4 ++--
 8 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/api/evaluations.go b/api/evaluations.go
index a10c3cba57a2..37774d321ca3 100644
--- a/api/evaluations.go
+++ b/api/evaluations.go
@@ -54,23 +54,23 @@ func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*Allocation
 
 // Evaluation is used to serialize an evaluation.
 type Evaluation struct {
-	ID                 string
-	Priority           int
-	Type               string
-	TriggeredBy        string
-	JobID              string
-	JobModifyIndex     uint64
-	NodeID             string
-	NodeModifyIndex    uint64
-	Status             string
-	StatusDescription  string
-	Wait               time.Duration
-	NextEval           string
-	PreviousEval       string
-	SpawnedBlockedEval string
-	FailedTGAllocs     map[string]*AllocationMetric
-	CreateIndex        uint64
-	ModifyIndex        uint64
+	ID                string
+	Priority          int
+	Type              string
+	TriggeredBy       string
+	JobID             string
+	JobModifyIndex    uint64
+	NodeID            string
+	NodeModifyIndex   uint64
+	Status            string
+	StatusDescription string
+	Wait              time.Duration
+	NextEval          string
+	PreviousEval      string
+	BlockedEval       string
+	FailedTGAllocs    map[string]*AllocationMetric
+	CreateIndex       uint64
+	ModifyIndex       uint64
 }
 
 // EvalIndexSort is a wrapper to sort evaluations by CreateIndex.
diff --git a/command/monitor.go b/command/monitor.go
index f5417f26a877..5cfe6810f3c6 100644
--- a/command/monitor.go
+++ b/command/monitor.go
@@ -307,9 +307,9 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int {
 					dumpAllocMetrics(m.ui, metrics, false)
 				}
 
-				if eval.SpawnedBlockedEval != "" {
-					m.ui.Output(fmt.Sprintf("Spawned follow up blocked evaluation %q to place remainder",
-						limit(eval.SpawnedBlockedEval, m.length)))
+				if eval.BlockedEval != "" {
+					m.ui.Output(fmt.Sprintf("Evaluation %q waiting for additional capacity to place remainder",
+						limit(eval.BlockedEval, m.length)))
 				}
 			}
 		default:
diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go
index 577893850bb5..4a70e96bb87e 100644
--- a/nomad/structs/structs.go
+++ b/nomad/structs/structs.go
@@ -2617,10 +2617,10 @@ type Evaluation struct {
 	// This is used to support rolling upgrades, where we need a chain of evaluations.
 	PreviousEval string
 
-	// SpawnedBlockedEval is the evaluation ID for a created blocked eval. A
+	// BlockedEval is the evaluation ID for a created blocked eval. A
 	// blocked eval will be created if all allocations could not be placed due
 	// to constraints or lacking resources.
-	SpawnedBlockedEval string
+	BlockedEval string
 
 	// FailedTGAllocs are task groups which have allocations that could not be
 	// made, but the metrics are persisted so that the user can use the feedback
@@ -2744,10 +2744,10 @@ func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
 	}
 }
 
-// BlockedEval creates a blocked evaluation to followup this eval to place any
+// CreateBlockedEval creates a blocked evaluation to followup this eval to place any
 // failed allocations. It takes the classes marked explicitly eligible or
 // ineligible and whether the job has escaped computed node classes.
-func (e *Evaluation) BlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation {
+func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation {
 	return &Evaluation{
 		ID:                   GenerateUUID(),
 		Priority:             e.Priority,
diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go
index 2595fbac7c86..056c90d4eda7 100644
--- a/scheduler/generic_sched.go
+++ b/scheduler/generic_sched.go
@@ -140,7 +140,7 @@ func (s *GenericScheduler) createBlockedEval() error {
 		classEligibility = e.GetClasses()
 	}
 
-	s.blocked = s.eval.BlockedEval(classEligibility, escaped)
+	s.blocked = s.eval.CreateBlockedEval(classEligibility, escaped)
 	return s.planner.CreateEval(s.blocked)
 }
 
@@ -370,11 +370,9 @@ func (s *GenericScheduler) computePlacements(place []allocTuple) error {
 
 	for _, missing := range place {
 		// Check if this task group has already failed
-		if s.eval.FailedTGAllocs != nil {
-			if metric, ok := s.eval.FailedTGAllocs[missing.TaskGroup.Name]; ok {
-				metric.CoalescedFailures += 1
-				continue
-			}
+		if metric, ok := s.eval.FailedTGAllocs[missing.TaskGroup.Name]; ok {
+			metric.CoalescedFailures += 1
+			continue
 		}
 
 		// Attempt to match the task group
diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go
index 879da2a837dc..ff6b98301808 100644
--- a/scheduler/generic_sched_test.go
+++ b/scheduler/generic_sched_test.go
@@ -51,7 +51,7 @@ func TestServiceSched_JobRegister(t *testing.T) {
 	// Ensure the eval has no spawned blocked eval
 	if len(h.Evals) != 1 {
 		t.Fatalf("bad: %#v", h.Evals)
-		if h.Evals[0].SpawnedBlockedEval != "" {
+		if h.Evals[0].BlockedEval != "" {
 			t.Fatalf("bad: %#v", h.Evals[0])
 		}
 	}
@@ -248,7 +248,7 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
 	outEval := h.Evals[0]
 
 	// Ensure the eval has its spawned blocked eval
-	if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
+	if outEval.BlockedEval != h.CreateEvals[0].ID {
 		t.Fatalf("bad: %#v", outEval)
 	}
 
@@ -432,7 +432,7 @@ func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) {
 	outEval := h.Evals[0]
 
 	// Ensure the eval has its spawned blocked eval
-	if outEval.SpawnedBlockedEval != h.CreateEvals[0].ID {
+	if outEval.BlockedEval != h.CreateEvals[0].ID {
 		t.Fatalf("bad: %#v", outEval)
 	}
 
diff --git a/scheduler/system_sched.go b/scheduler/system_sched.go
index e0bef083ad64..9f64e95be25c 100644
--- a/scheduler/system_sched.go
+++ b/scheduler/system_sched.go
@@ -237,7 +237,7 @@ func (s *SystemScheduler) computePlacements(place []allocTuple) error {
 		// Attempt to match the task group
 		option, _ := s.stack.Select(missing.TaskGroup)
 
-		if option == nil && s.eval.FailedTGAllocs != nil {
+		if option == nil {
 			// Check if this task group has already failed
 			if metric, ok := s.eval.FailedTGAllocs[missing.TaskGroup.Name]; ok {
 				metric.CoalescedFailures += 1
diff --git a/scheduler/util.go b/scheduler/util.go
index 89ad49dc921e..9961649c5478 100644
--- a/scheduler/util.go
+++ b/scheduler/util.go
@@ -365,7 +365,7 @@ func setStatus(logger *log.Logger, planner Planner, eval, nextEval, spawnedBlock
 		newEval.NextEval = nextEval.ID
 	}
 	if spawnedBlocked != nil {
-		newEval.SpawnedBlockedEval = spawnedBlocked.ID
+		newEval.BlockedEval = spawnedBlocked.ID
 	}
 	return planner.UpdateEval(newEval)
 }
diff --git a/scheduler/util_test.go b/scheduler/util_test.go
index 73fc29f8f712..e9957a5012ae 100644
--- a/scheduler/util_test.go
+++ b/scheduler/util_test.go
@@ -526,8 +526,8 @@ func TestSetStatus(t *testing.T) {
 	}
 
 	newEval = h.Evals[0]
-	if newEval.SpawnedBlockedEval != blocked.ID {
-		t.Fatalf("setStatus() didn't set SpawnedBlockedEval correctly: %v", newEval)
+	if newEval.BlockedEval != blocked.ID {
+		t.Fatalf("setStatus() didn't set BlockedEval correctly: %v", newEval)
 	}
 }
 

From ed94128d68bb45c578c5d3ddb9a138d4056e099c Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Tue, 24 May 2016 18:42:05 -0700
Subject: [PATCH 6/6] output and test fix

---
 command/monitor.go | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/command/monitor.go b/command/monitor.go
index 5cfe6810f3c6..51d4fe78f382 100644
--- a/command/monitor.go
+++ b/command/monitor.go
@@ -147,10 +147,14 @@ func (m *monitor) update(update *evalState) {
 		} else {
 			switch {
 			case existing.client != alloc.client:
+				description := ""
+				if alloc.clientDesc != "" {
+					description = fmt.Sprintf(" (%s)", alloc.clientDesc)
+				}
 				// Allocation status has changed
 				m.ui.Output(fmt.Sprintf(
-					"Allocation %q status changed: %q -> %q (%s)",
-					limit(alloc.id, m.length), existing.client, alloc.client, alloc.clientDesc))
+					"Allocation %q status changed: %q -> %q%s",
+					limit(alloc.id, m.length), existing.client, alloc.client, description))
 			}
 		}
 	}
@@ -294,6 +298,7 @@ func (m *monitor) monitor(evalID string, allowPrefix bool) int {
 					limit(eval.ID, m.length), eval.Status))
 			} else {
 				// There were failures making the allocations
+				schedFailure = true
 				m.ui.Info(fmt.Sprintf("Evaluation %q finished with status %q but failed to place all allocations:",
 					limit(eval.ID, m.length), eval.Status))