Skip to content

Commit

Permalink
switch to table test for lost node cases
Browse files Browse the repository at this point in the history
  • Loading branch information
drewbailey committed Jan 7, 2020
1 parent 5ae6c9b commit c3bc3fc
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 84 deletions.
181 changes: 99 additions & 82 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2628,103 +2628,120 @@ func TestServiceSched_JobDeregister_Stopped(t *testing.T) {
}

func TestServiceSched_NodeDown(t *testing.T) {
h := NewHarness(t)

// Register a node
node := mock.Node()
node.Status = structs.NodeStatusDown
require.NoError(t, h.State.UpsertNode(h.NextIndex(), node))

// Generate a fake job with allocations and an update policy.
job := mock.Job()
require.NoError(t, h.State.UpsertJob(h.NextIndex(), job))

var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = node.ID
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
allocs = append(allocs, alloc)
}

// Cover each terminal case and ensure it doesn't change to lost
allocs[7].DesiredStatus = structs.AllocDesiredStatusRun
allocs[7].ClientStatus = structs.AllocClientStatusLost
allocs[8].DesiredStatus = structs.AllocDesiredStatusRun
allocs[8].ClientStatus = structs.AllocClientStatusFailed
allocs[9].DesiredStatus = structs.AllocDesiredStatusRun
allocs[9].ClientStatus = structs.AllocClientStatusComplete

toBeRescheduled := map[string]bool{allocs[8].ID: true}

// Mark some allocs as running
for i := 0; i < 3; i++ {
out := allocs[i]
out.ClientStatus = structs.AllocClientStatusRunning
cases := []struct {
desired string
client string
migrate bool
reschedule bool
terminal bool
lost bool
}{
{
desired: structs.AllocDesiredStatusStop,
client: structs.AllocClientStatusRunning,
lost: true,
},
{
desired: structs.AllocDesiredStatusRun,
client: structs.AllocClientStatusPending,
migrate: true,
},
{
desired: structs.AllocDesiredStatusRun,
client: structs.AllocClientStatusRunning,
migrate: true,
},
{
desired: structs.AllocDesiredStatusRun,
client: structs.AllocClientStatusLost,
terminal: true,
},
{
desired: structs.AllocDesiredStatusRun,
client: structs.AllocClientStatusComplete,
terminal: true,
},
{
desired: structs.AllocDesiredStatusRun,
client: structs.AllocClientStatusFailed,
reschedule: true,
},
{
desired: structs.AllocDesiredStatusEvict,
client: structs.AllocClientStatusRunning,
lost: true,
},
}

// Mark one alloc as in-progress evict
allocs[4].DesiredStatus = structs.AllocDesiredStatusEvict
allocs[4].ClientStatus = structs.AllocClientStatusRunning
for i, tc := range cases {
t.Run(fmt.Sprintf(""), func(t *testing.T) {
h := NewHarness(t)

// Mark appropriate allocs for migration
toBeMigrated := map[string]bool{}
for i := 0; i < 3; i++ {
out := allocs[i]
out.DesiredTransition.Migrate = helper.BoolToPtr(true)
toBeMigrated[out.ID] = true
}
// Register a node
node := mock.Node()
node.Status = structs.NodeStatusDown
require.NoError(t, h.State.UpsertNode(h.NextIndex(), node))

toBeLost := map[string]bool{}
for i := len(toBeMigrated); i < 7; i++ {
toBeLost[allocs[i].ID] = true
}
// Generate a fake job with allocations and an update policy.
job := mock.Job()
require.NoError(t, h.State.UpsertJob(h.NextIndex(), job))

require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
alloc := mock.Alloc()
alloc.Job = job
alloc.JobID = job.ID
alloc.NodeID = node.ID
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)

// Create a mock evaluation to deal with drain
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: 50,
TriggeredBy: structs.EvalTriggerNodeUpdate,
JobID: job.ID,
NodeID: node.ID,
Status: structs.EvalStatusPending,
}
require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval}))
alloc.DesiredStatus = tc.desired
alloc.ClientStatus = tc.client

// Process the evaluation
err := h.Process(NewServiceScheduler, eval)
require.NoError(t, err)
// Mark for migration if necessary
alloc.DesiredTransition.Migrate = helper.BoolToPtr(tc.migrate)

// Ensure a single plan
require.Len(t, h.Plans, 1)
plan := h.Plans[0]
allocs := []*structs.Allocation{alloc}
require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs))

// Test the scheduler marked all non-terminal allocations as lost
require.Len(t, plan.NodeUpdate[node.ID], len(toBeMigrated)+len(toBeLost)+len(toBeRescheduled))
// Create a mock evaluation to deal with drain
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: 50,
TriggeredBy: structs.EvalTriggerNodeUpdate,
JobID: job.ID,
NodeID: node.ID,
Status: structs.EvalStatusPending,
}
require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval}))

for _, out := range plan.NodeUpdate[node.ID] {
t.Run("alloc "+out.ID, func(t *testing.T) {
require.Equal(t, structs.AllocDesiredStatusStop, out.DesiredStatus)
// Process the evaluation
err := h.Process(NewServiceScheduler, eval)
require.NoError(t, err)

if toBeMigrated[out.ID] {
// there is no indicator on job itself that marks it as migrated
require.NotEqual(t, structs.AllocClientStatusLost, out.ClientStatus)
} else if toBeLost[out.ID] {
require.Equal(t, structs.AllocClientStatusLost, out.ClientStatus)
} else if toBeRescheduled[out.ID] {
require.Equal(t, structs.AllocClientStatusFailed, out.ClientStatus)
if tc.terminal {
// No plan for terminal state allocs
require.Len(t, h.Plans, 0)
} else {
require.Fail(t, "unexpected alloc update")
require.Len(t, h.Plans, 1)

plan := h.Plans[0]
out := plan.NodeUpdate[node.ID]
require.Len(t, out, 1)

outAlloc := out[0]
if tc.migrate {
require.NotEqual(t, structs.AllocClientStatusLost, outAlloc.ClientStatus)
} else if tc.reschedule {
require.Equal(t, structs.AllocClientStatusFailed, outAlloc.ClientStatus)
} else if tc.lost {
require.Equal(t, structs.AllocClientStatusLost, outAlloc.ClientStatus)
} else {
require.Fail(t, "unexpected alloc update")
}
}

h.AssertEvalStatus(t, structs.EvalStatusComplete)
})
}

h.AssertEvalStatus(t, structs.EvalStatusComplete)
}

func TestServiceSched_NodeUpdate(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions scheduler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -809,8 +809,8 @@ func updateNonTerminalAllocsToLost(plan *structs.Plan, tainted map[string]*struc
continue
}

// If the scheduler has marked it as stop already but the alloc wasn't
// terminal on the client change the status to lost.
// If the scheduler has marked it as stop or evict already but the alloc
// wasn't terminal on the client change the status to lost.
if (alloc.DesiredStatus == structs.AllocDesiredStatusStop ||
alloc.DesiredStatus == structs.AllocDesiredStatusEvict) &&
(alloc.ClientStatus == structs.AllocClientStatusRunning ||
Expand Down

0 comments on commit c3bc3fc

Please sign in to comment.