From 66c59b064da59c965a5b10b7ffec2396b9e67bce Mon Sep 17 00:00:00 2001
From: Alex Dadgar <alex.dadgar@gmail.com>
Date: Thu, 3 Aug 2017 17:42:14 -0700
Subject: [PATCH] Lost allocs replaced even if deployment failed

This PR allows the scheduler to replace lost allocations even if the job
has a failed or paused deployment. The prior behavior was confusing to
users.

Fixes https://github.com/hashicorp/nomad/issues/2958
---
 scheduler/reconcile.go      | 20 ++++++++++++++++++--
 scheduler/reconcile_test.go | 11 +++++++----
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go
index 4c8dc1282ce0..23313e5cf23b 100644
--- a/scheduler/reconcile.go
+++ b/scheduler/reconcile.go
@@ -365,13 +365,29 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
 		dstate.DesiredTotal += len(place)
 	}
 
-	if !a.deploymentPaused && !a.deploymentFailed && !canaryState {
-		// Place all new allocations
+	// deploymentPlaceReady tracks whether the deployment is in a state where
+	// placements can be made without any other consideration.
+	deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !canaryState
+
+	if deploymentPlaceReady {
 		desiredChanges.Place += uint64(len(place))
 		for _, p := range place {
 			a.result.place = append(a.result.place, p)
 		}
+	} else if !deploymentPlaceReady && len(lost) != 0 {
+		// We are in a situation where we shouldn't be placing more than we need
+		// to but we have lost allocations. It is a very weird user experience
+		// if you have a node go down and Nomad doesn't replace the allocations
+		// because the deployment is paused/failed so we only place to recover
+		// the lost allocations.
+		allowed := helper.IntMin(len(lost), len(place))
+		desiredChanges.Place += uint64(allowed)
+		for _, p := range place[:allowed] {
+			a.result.place = append(a.result.place, p)
+		}
+	}
 
+	if deploymentPlaceReady {
 		// Do all destructive updates
 		min := helper.IntMin(len(destructive), limit)
 		limit -= min
diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go
index 85b823fd71de..c9a61164ef81 100644
--- a/scheduler/reconcile_test.go
+++ b/scheduler/reconcile_test.go
@@ -47,7 +47,7 @@ Update stanza Tests:
 √  Don't create a deployment if there are no changes
 √  Deployment created by all inplace updates
 √  Paused or failed deployment doesn't create any more canaries
-√  Paused or failed deployment doesn't do any placements
+√  Paused or failed deployment doesn't do any placements unless replacing lost allocs
 √  Paused or failed deployment doesn't do destructive updates
 √  Paused does do migrations
 √  Failed deployment doesn't do migrations
@@ -2538,8 +2538,9 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
 	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
 }
 
-// Tests the reconciler handles a failed deployment and does no placements
-func TestReconciler_FailedDeployment_NoPlacements(t *testing.T) {
+// Tests the reconciler handles a failed deployment and only replaces lost
+// deployments
+func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) {
 	job := mock.Job()
 	job.TaskGroups[0].Update = noCanaryUpdate
 
@@ -2602,18 +2603,20 @@ func TestReconciler_FailedDeployment_NoPlacements(t *testing.T) {
 	assertResults(t, r, &resultExpectation{
 		createDeployment:  nil,
 		deploymentUpdates: nil,
-		place:             0,
+		place:             1, // Only replace the lost node
 		inplace:           0,
 		stop:              2,
 		followupEvalWait:  0, // Since the deployment is failed, there should be no followup
 		desiredTGUpdates: map[string]*structs.DesiredUpdates{
 			job.TaskGroups[0].Name: {
+				Place:  1,
 				Stop:   2,
 				Ignore: 8,
 			},
 		},
 	})
 
+	assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
 	assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
 }