From 65b976c04aa2a5fea4e59e00466bd8c84f946a30 Mon Sep 17 00:00:00 2001 From: Jorge Marey Date: Fri, 11 Mar 2022 11:21:28 +0100 Subject: [PATCH 1/2] Fix in-place updates over ineligible nodes --- nomad/plan_apply.go | 11 +++++++++-- nomad/plan_apply_test.go | 33 +++++++++++++++++++++++++++++++++ nomad/structs/funcs.go | 18 ++++++++++++++++++ 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 8721b339a439..555f235cd079 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -662,8 +662,6 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri return false, "node is disconnected and contains invalid updates", nil } else if node.Status != structs.NodeStatusReady { return false, "node is not ready for placements", nil - } else if node.SchedulingEligibility == structs.NodeSchedulingIneligible { - return false, "node is not eligible", nil } // Get the existing allocations that are non-terminal @@ -672,6 +670,15 @@ func evaluateNodePlan(snap *state.StateSnapshot, plan *structs.Plan, nodeID stri return false, "", fmt.Errorf("failed to get existing allocations for '%s': %v", nodeID, err) } + // If nodeAllocations is a subset of the existing allocations we can continue, + // even if the node is not eligible, as only in-place updates or stop/evict are performed + if structs.AllocSubset(existingAlloc, plan.NodeAllocation[nodeID]) { + return true, "", nil + } + if node.SchedulingEligibility == structs.NodeSchedulingIneligible { + return false, "node is not eligible", nil + } + // Determine the proposed allocation by first removing allocations // that are planned evictions and adding the new allocations. var remove []*structs.Allocation diff --git a/nomad/plan_apply_test.go b/nomad/plan_apply_test.go index 984708010d74..82cf4a8d5dcf 100644 --- a/nomad/plan_apply_test.go +++ b/nomad/plan_apply_test.go @@ -887,6 +887,39 @@ func TestPlanApply_EvalNodePlan_UpdateExisting(t *testing.T) { } } +func TestPlanApply_EvalNodePlan_UpdateExisting_Ineligible(t *testing.T) { + t.Parallel() + alloc := mock.Alloc() + state := testStateStore(t) + node := mock.Node() + node.ReservedResources = nil + node.Reserved = nil + node.SchedulingEligibility = structs.NodeSchedulingIneligible + alloc.NodeID = node.ID + alloc.AllocatedResources = structs.NodeResourcesToAllocatedResources(node.NodeResources) + state.UpsertNode(structs.MsgTypeTestSetup, 1000, node) + state.UpsertAllocs(structs.MsgTypeTestSetup, 1001, []*structs.Allocation{alloc}) + snap, _ := state.Snapshot() + + plan := &structs.Plan{ + Job: alloc.Job, + NodeAllocation: map[string][]*structs.Allocation{ + node.ID: {alloc}, + }, + } + + fit, reason, err := evaluateNodePlan(snap, plan, node.ID) + if err != nil { + t.Fatalf("err: %v", err) + } + if !fit { + t.Fatalf("bad") + } + if reason != "" { + t.Fatalf("bad") + } +} + func TestPlanApply_EvalNodePlan_NodeFull_Evict(t *testing.T) { ci.Parallel(t) alloc := mock.Alloc() diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go index 18678d19178f..42983826a676 100644 --- a/nomad/structs/funcs.go +++ b/nomad/structs/funcs.go @@ -64,6 +64,24 @@ func RemoveAllocs(allocs []*Allocation, remove []*Allocation) []*Allocation { return r } +func AllocSubset(allocs []*Allocation, subset []*Allocation) bool { + if len(subset) == 0 { + return true + } + // Convert allocs into a map + allocMap := make(map[string]struct{}) + for _, alloc := range allocs { + allocMap[alloc.ID] = struct{}{} + } + + for _, alloc := range subset { + if _, ok := allocMap[alloc.ID]; !ok { + return false + } + } + return true +} + // FilterTerminalAllocs filters out all allocations in a terminal state and // returns the latest terminal allocations. func FilterTerminalAllocs(allocs []*Allocation) ([]*Allocation, map[string]*Allocation) { From 4be1ce86c09f2f19334196d7ccbfd177eaa2dcd4 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 6 Apr 2022 11:27:19 -0400 Subject: [PATCH 2/2] changelog entry --- .changelog/12264.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/12264.txt diff --git a/.changelog/12264.txt b/.changelog/12264.txt new file mode 100644 index 000000000000..bc9008f269bd --- /dev/null +++ b/.changelog/12264.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler: fixed a bug where in-place updates on ineligible nodes would be ignored +```