Skip to content

Commit

Permalink
scheduler: system scheduler should reconcile overlapping live allocs
Browse files Browse the repository at this point in the history
The system scheduler uses a separate code path for reconciliation. During the
investigation into the "plan for node rejected" bug which was fixed in #16401,
it was discovered this code path doesn't maintain the invariant that no more
than 1 allocation per system job task group (or `count` allocations for sysbatch
jobs) should be left running on a given client. While this condition should be
impossible to encounter, the scheduler should be reconciling these cases.

Add a new `ensureSingleSystemAlloc` function that enforces the invariant that
there can be only a single desired-running allocation for a given system job on
a given node.
  • Loading branch information
tgross committed Mar 13, 2023
1 parent 5f37b2f commit 8e7a610
Show file tree
Hide file tree
Showing 5 changed files with 350 additions and 67 deletions.
3 changes: 3 additions & 0 deletions .changelog/16097.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed a bug where overlapping system allocations would not be stopped
```
7 changes: 4 additions & 3 deletions nomad/mock/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,10 @@ func SystemJob() *structs.Job {
Meta: map[string]string{
"owner": "armon",
},
Status: structs.JobStatusPending,
CreateIndex: 42,
ModifyIndex: 99,
Status: structs.JobStatusPending,
CreateIndex: 42,
ModifyIndex: 99,
JobModifyIndex: 99,
}
job.Canonicalize()
return job
Expand Down
69 changes: 67 additions & 2 deletions scheduler/scheduler_system_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@ import (
"time"

memdb "github.com/hashicorp/go-memdb"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)

func TestSystemSched_JobRegister(t *testing.T) {
Expand Down Expand Up @@ -3084,3 +3086,66 @@ func TestSystemSched_CSITopology(t *testing.T) {
must.Eq(t, structs.EvalStatusComplete, h.Evals[0].Status)

}

func TestSystemSched_OverlappingAllocations(t *testing.T) {
ci.Parallel(t)
h := NewHarness(t)

node := mock.Node()
node.Name = "good-node-1"
must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node))

// Create 2 allocs for 2 versions of the same system job on the same node.
// This should not happen but has been observed, so the scheduler must "fix"
// this by stopping the old version.
oldJob := mock.SystemJob()
oldJob.ID = "my-job"
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), oldJob))

job := oldJob.Copy()
job.Version++
must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job))

allocOld := mock.Alloc()
allocOld.Job = oldJob
allocOld.JobID = oldJob.ID
allocOld.NodeID = node.ID
allocOld.Name = "my-job.web[0]"

allocCurrent := allocOld.Copy()
allocCurrent.ID = uuid.Generate()
allocCurrent.Job = job

must.NoError(t, h.State.UpsertAllocs(structs.MsgTypeTestSetup, h.NextIndex(),
[]*structs.Allocation{
allocOld, allocCurrent}))

// Create a mock evaluation to deregister the job
eval := &structs.Evaluation{
Namespace: structs.DefaultNamespace,
ID: uuid.Generate(),
Priority: job.Priority,
TriggeredBy: structs.EvalTriggerJobRegister,
JobID: job.ID,
Status: structs.EvalStatusPending,
}
lastIndex := h.NextIndex()
must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup,
lastIndex, []*structs.Evaluation{eval}))

// Process the evaluation
must.NoError(t, h.Process(NewSystemScheduler, eval))

// Ensure a single plan
must.Len(t, 1, h.Plans)
plan := h.Plans[0]
must.Nil(t, plan.Annotations, must.Sprint("expected no annotations"))

test.Len(t, 1, plan.NodeUpdate[node.ID], test.Sprint("expected 1 evict/stop"))
if len(plan.NodeUpdate[node.ID]) > 0 {
test.Eq(t, allocOld.ID, plan.NodeUpdate[node.ID][0].ID,
test.Sprintf("expected alloc=%s to be evicted/stopped", allocOld.ID))
}

test.Len(t, 0, plan.NodeAllocation[node.ID], test.Sprint("expected no updates"))
}
Loading

0 comments on commit 8e7a610

Please sign in to comment.