Skip to content

Commit

Permalink
Merge pull request #7762 from hashicorp/b-in-place-update-deviceids
Browse files Browse the repository at this point in the history
Perserve device ids in in-place alloc updates
  • Loading branch information
Mahmood Ali committed Apr 21, 2020
2 parents dd2e387 + dacb634 commit 15b7474
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ BUG FIXES:
* connect: Fixed a bug where some connect proxy fields would be dropped from 'job inspect' output [[GH-7397](https://github.com/hashicorp/nomad/issues/7397)]
* connect: Fixed a bug where an absent connect sidecar_service stanza would trigger panic [[GH-7683](https://github.com/hashicorp/nomad/pull/7683)]
* connect: Fixed bugs where some connect parameters would be ignored [[GH-7690](https://github.com/hashicorp/nomad/pull/7690)] [[GH-7684](https://github.com/hashicorp/nomad/pull/7684)]
* scheduler: Fixed a bug in managing allocated devices for a job allocation in in-place update scenarios [[GH-7762](https://github.com/hashicorp/nomad/issues/7762)]

## 0.11.0 (April 8, 2020)

Expand Down
17 changes: 15 additions & 2 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2072,6 +2072,15 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
require.NoError(t, h.State.UpsertJob(h.NextIndex(), job))
require.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d))

taskName := job.TaskGroups[0].Tasks[0].Name

adr := structs.AllocatedDeviceResource{
Type: "gpu",
Vendor: "nvidia",
Name: "1080ti",
DeviceIDs: []string{uuid.Generate()},
}

// Create allocs that are part of the old deployment
var allocs []*structs.Allocation
for i := 0; i < 10; i++ {
Expand All @@ -2082,6 +2091,7 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
alloc.Name = fmt.Sprintf("my-job.web[%d]", i)
alloc.DeploymentID = d.ID
alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)}
alloc.AllocatedResources.Tasks[taskName].Devices = []*structs.AllocatedDeviceResource{&adr}
allocs = append(allocs, alloc)
}
require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs))
Expand Down Expand Up @@ -2155,13 +2165,16 @@ func TestServiceSched_JobModify_InPlace(t *testing.T) {
}
h.AssertEvalStatus(t, structs.EvalStatusComplete)

// Verify the network did not change
// Verify the allocated networks and devices did not change
rp := structs.Port{Label: "admin", Value: 5000}
for _, alloc := range out {
for _, resources := range alloc.TaskResources {
for _, resources := range alloc.AllocatedResources.Tasks {
if resources.Networks[0].ReservedPorts[0] != rp {
t.Fatalf("bad: %#v", alloc)
}
if len(resources.Devices) == 0 || reflect.DeepEqual(resources.Devices[0], adr) {
t.Fatalf("bad devices has changed: %#v", alloc)
}
}
}

Expand Down
12 changes: 9 additions & 3 deletions scheduler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,22 +614,25 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
continue
}

// Restore the network offers from the existing allocation.
// Restore the network and device offers from the existing allocation.
// We do not allow network resources (reserved/dynamic ports)
// to be updated. This is guarded in taskUpdated, so we can
// safely restore those here.
for task, resources := range option.TaskResources {
var networks structs.Networks
var devices []*structs.AllocatedDeviceResource
if update.Alloc.AllocatedResources != nil {
if tr, ok := update.Alloc.AllocatedResources.Tasks[task]; ok {
networks = tr.Networks
devices = tr.Devices
}
} else if tr, ok := update.Alloc.TaskResources[task]; ok {
networks = tr.Networks
}

// Add thhe networks back
// Add the networks and devices back
resources.Networks = networks
resources.Devices = devices
}

// Create a shallow copy
Expand Down Expand Up @@ -892,22 +895,25 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
return false, true, nil
}

// Restore the network offers from the existing allocation.
// Restore the network and device offers from the existing allocation.
// We do not allow network resources (reserved/dynamic ports)
// to be updated. This is guarded in taskUpdated, so we can
// safely restore those here.
for task, resources := range option.TaskResources {
var networks structs.Networks
var devices []*structs.AllocatedDeviceResource
if existing.AllocatedResources != nil {
if tr, ok := existing.AllocatedResources.Tasks[task]; ok {
networks = tr.Networks
devices = tr.Devices
}
} else if tr, ok := existing.TaskResources[task]; ok {
networks = tr.Networks
}

// Add the networks back
resources.Networks = networks
resources.Devices = devices
}

// Create a shallow copy
Expand Down

0 comments on commit 15b7474

Please sign in to comment.