Skip to content

Commit

Permalink
Merge pull request #2961 from hashicorp/b-deployment-ttl
Browse files Browse the repository at this point in the history
Lost allocs replaced even if deployment failed
  • Loading branch information
dadgar committed Aug 4, 2017
2 parents 8b46d40 + 66c59b0 commit 4428d8b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
20 changes: 18 additions & 2 deletions scheduler/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,29 @@ func (a *allocReconciler) computeGroup(group string, all allocSet) bool {
dstate.DesiredTotal += len(place)
}

if !a.deploymentPaused && !a.deploymentFailed && !canaryState {
// Place all new allocations
// deploymentPlaceReady tracks whether the deployment is in a state where
// placements can be made without any other consideration.
deploymentPlaceReady := !a.deploymentPaused && !a.deploymentFailed && !canaryState

if deploymentPlaceReady {
desiredChanges.Place += uint64(len(place))
for _, p := range place {
a.result.place = append(a.result.place, p)
}
} else if !deploymentPlaceReady && len(lost) != 0 {
// We are in a situation where we shouldn't be placing more than we need
// to but we have lost allocations. It is a very weird user experience
// if you have a node go down and Nomad doesn't replace the allocations
// because the deployment is paused/failed so we only place to recover
// the lost allocations.
allowed := helper.IntMin(len(lost), len(place))
desiredChanges.Place += uint64(allowed)
for _, p := range place[:allowed] {
a.result.place = append(a.result.place, p)
}
}

if deploymentPlaceReady {
// Do all destructive updates
min := helper.IntMin(len(destructive), limit)
limit -= min
Expand Down
11 changes: 7 additions & 4 deletions scheduler/reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Update stanza Tests:
√ Don't create a deployment if there are no changes
√ Deployment created by all inplace updates
√ Paused or failed deployment doesn't create any more canaries
√ Paused or failed deployment doesn't do any placements
√ Paused or failed deployment doesn't do any placements unless replacing lost allocs
√ Paused or failed deployment doesn't do destructive updates
√ Paused does do migrations
√ Failed deployment doesn't do migrations
Expand Down Expand Up @@ -2538,8 +2538,9 @@ func TestReconciler_TaintedNode_RollingUpgrade(t *testing.T) {
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
}

// Tests the reconciler handles a failed deployment and does no placements
func TestReconciler_FailedDeployment_NoPlacements(t *testing.T) {
// Tests the reconciler handles a failed deployment and only replaces lost
// deployments
func TestReconciler_FailedDeployment_PlacementLost(t *testing.T) {
job := mock.Job()
job.TaskGroups[0].Update = noCanaryUpdate

Expand Down Expand Up @@ -2602,18 +2603,20 @@ func TestReconciler_FailedDeployment_NoPlacements(t *testing.T) {
assertResults(t, r, &resultExpectation{
createDeployment: nil,
deploymentUpdates: nil,
place: 0,
place: 1, // Only replace the lost node
inplace: 0,
stop: 2,
followupEvalWait: 0, // Since the deployment is failed, there should be no followup
desiredTGUpdates: map[string]*structs.DesiredUpdates{
job.TaskGroups[0].Name: {
Place: 1,
Stop: 2,
Ignore: 8,
},
},
})

assertNamesHaveIndexes(t, intRange(0, 0), placeResultsToNames(r.place))
assertNamesHaveIndexes(t, intRange(0, 1), stopResultsToNames(r.stop))
}

Expand Down

0 comments on commit 4428d8b

Please sign in to comment.