Skip to content

Commit

Permalink
core: fix panic when AllocatedResources is nil
Browse files Browse the repository at this point in the history
Fix for #6540
  • Loading branch information
schmichael committed Oct 28, 2019
1 parent 5239e69 commit 08a1785
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 4 deletions.
86 changes: 86 additions & 0 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2164,6 +2164,92 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
}
}

// TestServiceSched_JobModify_InPlace08 asserts that inplace updates of
// allocations created with Nomad 0.8 do not cause panics.
//
// COMPAT(0.11) - While we do not guarantee that upgrades from 0.8 -> 0.10

This comment has been minimized.

Copy link
@samm-git

samm-git Nov 5, 2019

Hi, is there any documentation around which not recommending to skip one version on upgrade? We got into that issue on our env.

This comment has been minimized.

Copy link
@schmichael

schmichael Nov 6, 2019

Author Member

Great question, we'll get our docs updated. I filed an issue if you want to track our progress: #6633

This comment has been minimized.

Copy link
@samm-git

samm-git Nov 6, 2019

@schmichael great, thank you. We already upgraded our runbook, but this would help others to not get into the same trap :)

// (skipping 0.9) are safe, we do want to avoid panics in the scheduler which
// cause unrecoverable server outages with no chance of recovery.
//
// Safe to remove in 0.11.0 as no one should ever be trying to upgrade from 0.8
// to 0.11!
func TestServiceSched_JobModify_InPlace08(t *testing.T) {
h := NewHarness(t)

// Create node
node := mock.Node()
noErr(t, h.State.UpsertNode(h.NextIndex(), node))

// Generate a fake job with 0.8 allocations
job := mock.Job()
job.TaskGroups[0].Count = 1
noErr(t, h.State.UpsertJob(h.NextIndex(), job))

// Create 0.8 alloc
alloc := mock.Alloc()
alloc.Job = job.Copy()
alloc.JobID = job.ID
alloc.NodeID = node.ID
alloc.AllocatedResources = nil // 0.8 didn't have this
noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc}))

// Update the job inplace
job2 := job.Copy()

job2.TaskGroups[0].Tasks[0].Services[0].Tags[0] = "newtag"
noErr(t, h.State.UpsertJob(h.NextIndex(), job2))

// Create a mock evaluation
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: 50,
TriggeredBy: structs.EvalTriggerJobRegister,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval}))

// Process the evaluation
err := h.Process(NewServiceScheduler, eval)
require.NoError(t, err)

// Ensure a single plan
require.Len(t, h.Plans, 1)
plan := h.Plans[0]

// Ensure the plan did not evict any allocs
var update []*structs.Allocation
for _, updateList := range plan.NodeUpdate {
update = append(update, updateList...)
}
require.Zero(t, update)

// Ensure the plan updated the existing alloc
var planned []*structs.Allocation
for _, allocList := range plan.NodeAllocation {
planned = append(planned, allocList...)
}
require.Len(t, planned, 1)
for _, p := range planned {
require.Equal(t, job2, p.Job)
}

// Lookup the allocations by JobID
ws := memdb.NewWatchSet()
out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false)
noErr(t, err)

// Ensure all allocations placed
require.Len(t, out, 1)
h.AssertEvalStatus(t, structs.EvalStatusComplete)

newAlloc := out[0]

// Verify AllocatedResources was set
require.NotNil(t, newAlloc.AllocatedResources)
}

func TestServiceSched_JobModify_DistinctProperty(t *testing.T) {
h := NewHarness(t)

Expand Down
14 changes: 10 additions & 4 deletions scheduler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -838,13 +838,19 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
Tasks: option.TaskResources,
Shared: structs.AllocatedSharedResources{
DiskMB: int64(newTG.EphemeralDisk.SizeMB),
// Since this is an inplace update, we should copy network
// information from the original alloc. This is similar to
// how we copy network info for task level networks above.
Networks: existing.AllocatedResources.Shared.Networks,
},
}

// Since this is an inplace update, we should copy network
// information from the original alloc. This is similar to how
// we copy network info for task level networks above.
//
// existing.AllocatedResources is nil on Allocations created by
// Nomad v0.8 or earlier.
if existing.AllocatedResources != nil {
newAlloc.AllocatedResources.Shared.Networks = existing.AllocatedResources.Shared.Networks
}

// Use metrics from existing alloc for in place upgrade
// This is because if the inplace upgrade succeeded, any scoring metadata from
// when it first went through the scheduler should still be preserved. Using scoring
Expand Down

0 comments on commit 08a1785

Please sign in to comment.