Skip to content

Commit

Permalink
scheduler: correctly detect inplace update with wildcard datacenters
Browse files Browse the repository at this point in the history
Wildcard datacenters introduced a bug where a job with any wildcard datacenters
will always be treated as a destructive update when we check whether a
datacenter has been removed from the jobspec.

Includes updating the helper so that callers don't have to loop over the job's
datacenters.
  • Loading branch information
tgross committed Mar 7, 2023
1 parent 605f155 commit 424cfca
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 16 deletions.
3 changes: 3 additions & 0 deletions .changelog/16362.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed a bug where allocs of system jobs with wildcard datacenters would be destructively updated
```
7 changes: 2 additions & 5 deletions nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1620,11 +1620,8 @@ func (n *Node) createNodeEvals(node *structs.Node, nodeIndex uint64) ([]string,
// here, but datacenter is a good optimization to start with as
// datacenter cardinality tends to be low so the check
// shouldn't add much work.
for _, dc := range job.Datacenters {
if node.IsInDC(dc) {
sysJobs = append(sysJobs, job)
break
}
if node.IsInAnyDC(job.Datacenters) {
sysJobs = append(sysJobs, job)
}
}

Expand Down
9 changes: 7 additions & 2 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -2311,8 +2311,13 @@ func (n *Node) ComparableResources() *ComparableResources {
}
}

func (n *Node) IsInDC(dc string) bool {
return glob.Glob(dc, n.Datacenter)
func (n *Node) IsInAnyDC(datacenters []string) bool {
for _, dc := range datacenters {
if glob.Glob(dc, n.Datacenter) {
return true
}
}
return false
}

// Stub returns a summarized version of the node
Expand Down
14 changes: 5 additions & 9 deletions scheduler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/nomad/structs"
"golang.org/x/exp/slices"
)

// allocTuple is a tuple of the allocation name and potential alloc ID
Expand Down Expand Up @@ -66,12 +65,9 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]str
notReady[node.ID] = struct{}{}
continue
}
for _, dc := range dcs {
if node.IsInDC(dc) {
out = append(out, node)
dcMap[node.Datacenter]++
break
}
if node.IsInAnyDC(dcs) {
out = append(out, node)
dcMap[node.Datacenter]++
}
}
return out, notReady, dcMap, nil
Expand Down Expand Up @@ -558,7 +554,7 @@ func inplaceUpdate(ctx Context, eval *structs.Evaluation, job *structs.Job,
}

// The alloc is on a node that's now in an ineligible DC
if !slices.Contains(job.Datacenters, node.Datacenter) {
if !node.IsInAnyDC(job.Datacenters) {
continue
}

Expand Down Expand Up @@ -802,7 +798,7 @@ func genericAllocUpdateFn(ctx Context, stack Stack, evalID string) allocUpdateTy
}

// The alloc is on a node that's now in an ineligible DC
if !slices.Contains(newJob.Datacenters, node.Datacenter) {
if !node.IsInAnyDC(newJob.Datacenters) {
return false, true, nil
}

Expand Down
30 changes: 30 additions & 0 deletions scheduler/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ func TestTaskUpdatedSpread(t *testing.T) {

require.False(t, tasksUpdated(j5, j6, name))
}

func TestTasksUpdated(t *testing.T) {
ci.Parallel(t)

Expand Down Expand Up @@ -972,6 +973,35 @@ func TestInplaceUpdate_Success(t *testing.T) {
}
}

func TestInplaceUpdate_WildcardDatacenters(t *testing.T) {
ci.Parallel(t)

store, ctx := testContext(t)
eval := mock.Eval()
job := mock.Job()
job.Datacenters = []string{"*"}

node := mock.Node()
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 900, node))

// Register an alloc
alloc := mock.AllocForNode(node)
alloc.Job = job
alloc.JobID = job.ID
must.NoError(t, store.UpsertJobSummary(1000, mock.JobSummary(alloc.JobID)))
must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 1001, []*structs.Allocation{alloc}))

updates := []allocTuple{{Alloc: alloc, TaskGroup: job.TaskGroups[0]}}
stack := NewGenericStack(false, ctx)
unplaced, inplace := inplaceUpdate(ctx, eval, job, stack, updates)

must.Len(t, 1, inplace,
must.Sprintf("inplaceUpdate should have an inplace update"))
must.Len(t, 0, unplaced)
must.MapNotEmpty(t, ctx.plan.NodeAllocation,
must.Sprintf("inplaceUpdate should have an inplace update"))
}

func TestUtil_connectUpdated(t *testing.T) {
ci.Parallel(t)

Expand Down

0 comments on commit 424cfca

Please sign in to comment.