Skip to content

Commit

Permalink
scheduler: fix panic when preempting and evicting
Browse files Browse the repository at this point in the history
Fixes #6787

In ProposedAllocs the proposed alloc slice was being copied while its
contents were not. Since RemoveAllocs nils elements of the proposed
alloc slice and is called twice, it could panic on the second call when
erroneously accessing a nil'd alloc.

The fix is to not copy the proposed alloc slice and pass the slice
returned by the 1st RemoveAllocs call to the 2nd call, thus maintaining
the trimmed length.
  • Loading branch information
schmichael committed Dec 3, 2019
1 parent 830c0b1 commit 6112ad9
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 7 deletions.
13 changes: 6 additions & 7 deletions scheduler/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ type Context interface {
// Reset is invoked after making a placement
Reset()

// ProposedAllocs returns the proposed allocations for a node
// which is the existing allocations, removing evictions, and
// adding any planned placements.
// ProposedAllocs returns the proposed allocations for a node which are
// the existing allocations, removing evictions, and adding any planned
// placements.
ProposedAllocs(nodeID string) ([]*structs.Allocation, error)

// RegexpCache is a cache of regular expressions
Expand Down Expand Up @@ -120,22 +120,21 @@ func (e *EvalContext) Reset() {
func (e *EvalContext) ProposedAllocs(nodeID string) ([]*structs.Allocation, error) {
// Get the existing allocations that are non-terminal
ws := memdb.NewWatchSet()
existingAlloc, err := e.state.AllocsByNodeTerminal(ws, nodeID, false)
proposed, err := e.state.AllocsByNodeTerminal(ws, nodeID, false)
if err != nil {
return nil, err
}

// Determine the proposed allocation by first removing allocations
// that are planned evictions and adding the new allocations.
proposed := existingAlloc
if update := e.plan.NodeUpdate[nodeID]; len(update) > 0 {
proposed = structs.RemoveAllocs(existingAlloc, update)
proposed = structs.RemoveAllocs(proposed, update)
}

// Remove any allocs that are being preempted
nodePreemptedAllocs := e.plan.NodePreemptions[nodeID]
if len(nodePreemptedAllocs) > 0 {
proposed = structs.RemoveAllocs(existingAlloc, nodePreemptedAllocs)
proposed = structs.RemoveAllocs(proposed, nodePreemptedAllocs)
}

// We create an index of the existing allocations so that if an inplace
Expand Down
110 changes: 110 additions & 0 deletions scheduler/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,116 @@ func TestEvalContext_ProposedAlloc(t *testing.T) {
}
}

// TestEvalContext_ProposedAlloc_EvictPreempt asserts both Evicted and
// Preempted allocs are removed from the allocs propsed for a node.
//
// See https://github.com/hashicorp/nomad/issues/6787
//
func TestEvalContext_ProposedAlloc_EvictPreempt(t *testing.T) {
t.Parallel()
state, ctx := testContext(t)
nodes := []*RankedNode{
{
Node: &structs.Node{
ID: uuid.Generate(),
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 1024 * 3,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 1024 * 3,
},
},
},
},
}

// Add existing allocations
j1, j2, j3 := mock.Job(), mock.Job(), mock.Job()
allocEvict := &structs.Allocation{
ID: uuid.Generate(),
Namespace: structs.DefaultNamespace,
EvalID: uuid.Generate(),
NodeID: nodes[0].Node.ID,
JobID: j1.ID,
Job: j1,
AllocatedResources: &structs.AllocatedResources{
Tasks: map[string]*structs.AllocatedTaskResources{
"web": {
Cpu: structs.AllocatedCpuResources{
CpuShares: 1024,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 1024,
},
},
},
},
DesiredStatus: structs.AllocDesiredStatusRun,
ClientStatus: structs.AllocClientStatusPending,
TaskGroup: "web",
}
allocPreempt := &structs.Allocation{
ID: uuid.Generate(),
Namespace: structs.DefaultNamespace,
EvalID: uuid.Generate(),
NodeID: nodes[0].Node.ID,
JobID: j2.ID,
Job: j2,
AllocatedResources: &structs.AllocatedResources{
Tasks: map[string]*structs.AllocatedTaskResources{
"web": {
Cpu: structs.AllocatedCpuResources{
CpuShares: 1024,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 1024,
},
},
},
},
DesiredStatus: structs.AllocDesiredStatusRun,
ClientStatus: structs.AllocClientStatusPending,
TaskGroup: "web",
}
allocPropose := &structs.Allocation{
ID: uuid.Generate(),
Namespace: structs.DefaultNamespace,
EvalID: uuid.Generate(),
NodeID: nodes[0].Node.ID,
JobID: j3.ID,
Job: j3,
AllocatedResources: &structs.AllocatedResources{
Tasks: map[string]*structs.AllocatedTaskResources{
"web": {
Cpu: structs.AllocatedCpuResources{
CpuShares: 1024,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 1024,
},
},
},
},
DesiredStatus: structs.AllocDesiredStatusRun,
ClientStatus: structs.AllocClientStatusPending,
TaskGroup: "web",
}
require.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(allocEvict.JobID)))
require.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(allocPreempt.JobID)))
require.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(allocPropose.JobID)))
require.NoError(t, state.UpsertAllocs(1000, []*structs.Allocation{allocEvict, allocPreempt, allocPropose}))

// Plan to evict one alloc and preempt another
plan := ctx.Plan()
plan.NodePreemptions[nodes[0].Node.ID] = []*structs.Allocation{allocEvict}
plan.NodeUpdate[nodes[0].Node.ID] = []*structs.Allocation{allocPreempt}

proposed, err := ctx.ProposedAllocs(nodes[0].Node.ID)
require.NoError(t, err)
require.Len(t, proposed, 1)
}

func TestEvalEligibility_JobStatus(t *testing.T) {
e := NewEvalEligibility()
cc := "v1:100"
Expand Down

0 comments on commit 6112ad9

Please sign in to comment.