Skip to content

Commit

Permalink
core: reschedule evicted batch job when resources become available
Browse files Browse the repository at this point in the history
This PR fixes a bug where an evicted batch job would not be rescheduled
once resources become available.

Closes #9890
  • Loading branch information
shoenig committed Jun 3, 2022
1 parent 4cd07bb commit 367ca2c
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .changelog/13205.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core: Fixed a bug where an evicted batch job would not be rescheduled
```
7 changes: 5 additions & 2 deletions scheduler/reconcile_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ func (a allocSet) filterByRescheduleable(isBatch bool, now time.Time, evalID str
return
}

// shouldFilter returns whether the alloc should be ignored or considered untainted
// shouldFilter returns whether the alloc should be ignored or considered untainted.
//
// Ignored allocs are filtered out.
// Untainted allocs count against the desired total.
// Filtering logic for batch jobs:
Expand All @@ -309,11 +310,13 @@ func shouldFilter(alloc *structs.Allocation, isBatch bool) (untainted, ignore bo
// complete but not failed, they shouldn't be replaced.
if isBatch {
switch alloc.DesiredStatus {
case structs.AllocDesiredStatusStop, structs.AllocDesiredStatusEvict:
case structs.AllocDesiredStatusStop:
if alloc.RanSuccessfully() {
return true, false
}
return false, true
case structs.AllocDesiredStatusEvict:
return false, true
default:
}

Expand Down
110 changes: 109 additions & 1 deletion scheduler/reconcile_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ package scheduler

import (
"testing"

"time"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/mock"
Expand Down Expand Up @@ -134,3 +135,110 @@ func TestAllocSet_filterByTainted(t *testing.T) {
require.Contains(lost, "lost1")
require.Contains(lost, "lost2")
}

func TestReconcile_shouldFilter(t *testing.T) {
testCases := []struct {
description string
batch bool
failed bool
desiredStatus string
clientStatus string

untainted bool
ignore bool
}{
{
description: "batch running",
batch: true,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusRunning,
untainted: true,
ignore: false,
},
{
description: "batch stopped success",
batch: true,
failed: false,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusRunning,
untainted: true,
ignore: false,
},
{
description: "batch stopped failed",
batch: true,
failed: true,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "batch evicted",
batch: true,
desiredStatus: structs.AllocDesiredStatusEvict,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "batch failed",
batch: true,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusFailed,
untainted: false,
ignore: false,
},
{
description: "service running",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusRunning,
untainted: false,
ignore: false,
},
{
description: "service stopped",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusStop,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "service evicted",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusEvict,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
{
description: "service client complete",
batch: false,
failed: false,
desiredStatus: structs.AllocDesiredStatusRun,
clientStatus: structs.AllocClientStatusComplete,
untainted: false,
ignore: true,
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
alloc := &structs.Allocation{
DesiredStatus: tc.desiredStatus,
TaskStates: map[string]*structs.TaskState{"task": {State: structs.TaskStateDead, Failed: tc.failed}},
ClientStatus: tc.clientStatus,
}

untainted, ignore := shouldFilter(alloc, tc.batch)
require.Equal(t, tc.untainted, untainted)
require.Equal(t, tc.ignore, ignore)
})
}
}

0 comments on commit 367ca2c

Please sign in to comment.