Skip to content

Commit

Permalink
Merge pull request #7730 from hashicorp/b-reserved-scoring
Browse files Browse the repository at this point in the history
core: fix node reservation scoring
  • Loading branch information
schmichael committed Apr 30, 2020
2 parents f592dd9 + e3cba0c commit cbcd3eb
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 31 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ BUG FIXES:

* api: autoscaling policies should not be returned for stopped jobs [[GH-7768](https://github.com/hashicorp/nomad/issues/7768)]
* core: job scale status endpoint was returning incorrect counts [[GH-7789](https://github.com/hashicorp/nomad/issues/7789)]
* core: Fixed a bug where scores for allocations were biased toward nodes with resource reservations [[GH-7730](https://github.com/hashicorp/nomad/issues/7730)]
* jobspec: autoscaling policy block should return a parsing error multiple `policy` blocks are provided [[GH-7716](https://github.com/hashicorp/nomad/issues/7716)]
* ui: Fixed a bug where exec popup had incorrect URL for jobs where name ≠ id [[GH-7814](https://github.com/hashicorp/nomad/issues/7814)]

Expand Down
13 changes: 6 additions & 7 deletions nomad/structs/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,9 @@ func FilterTerminalAllocs(allocs []*Allocation) ([]*Allocation, map[string]*Allo
// ensured there are no collisions. If checkDevices is set to true, we check if
// there is a device oversubscription.
func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevices bool) (bool, string, *ComparableResources, error) {
// Compute the utilization from zero
// Compute the allocs' utilization from zero
used := new(ComparableResources)

// Add the reserved resources of the node
used.Add(node.ComparableReservedResources())

// For each alloc, add the resources
for _, alloc := range allocs {
// Do not consider the resource impact of terminal allocations
Expand All @@ -117,9 +114,11 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
used.Add(alloc.ComparableResources())
}

// Check that the node resources are a super set of those
// that are being allocated
if superset, dimension := node.ComparableResources().Superset(used); !superset {
// Check that the node resources (after subtracting reserved) are a
// super set of those that are being allocated
available := node.ComparableResources()
available.Subtract(node.ComparableReservedResources())
if superset, dimension := available.Superset(used); !superset {
return false, dimension, used, nil
}

Expand Down
32 changes: 16 additions & 16 deletions nomad/structs/funcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,17 +188,17 @@ func TestAllocsFit_Old(t *testing.T) {
require.True(fit)

// Sanity check the used resources
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)

// Should not fit second allocation
fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
require.NoError(err)
require.False(fit)

// Sanity check the used resources
require.EqualValues(3000, used.Flattened.Cpu.CpuShares)
require.EqualValues(3072, used.Flattened.Memory.MemoryMB)
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
}

// COMPAT(0.11): Remove in 0.11
Expand Down Expand Up @@ -255,8 +255,8 @@ func TestAllocsFit_TerminalAlloc_Old(t *testing.T) {
require.True(fit)

// Sanity check the used resources
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)

// Should fit second allocation since it is terminal
a2 := a1.Copy()
Expand All @@ -266,8 +266,8 @@ func TestAllocsFit_TerminalAlloc_Old(t *testing.T) {
require.True(fit)

// Sanity check the used resources
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
}

func TestAllocsFit(t *testing.T) {
Expand Down Expand Up @@ -340,17 +340,17 @@ func TestAllocsFit(t *testing.T) {
require.True(fit)

// Sanity check the used resources
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)

// Should not fit second allocation
fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
require.NoError(err)
require.False(fit)

// Sanity check the used resources
require.EqualValues(3000, used.Flattened.Cpu.CpuShares)
require.EqualValues(3072, used.Flattened.Memory.MemoryMB)
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
}

func TestAllocsFit_TerminalAlloc(t *testing.T) {
Expand Down Expand Up @@ -424,8 +424,8 @@ func TestAllocsFit_TerminalAlloc(t *testing.T) {
require.True(fit)

// Sanity check the used resources
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)

// Should fit second allocation since it is terminal
a2 := a1.Copy()
Expand All @@ -435,8 +435,8 @@ func TestAllocsFit_TerminalAlloc(t *testing.T) {
require.True(fit, dim)

// Sanity check the used resources
require.EqualValues(2000, used.Flattened.Cpu.CpuShares)
require.EqualValues(2048, used.Flattened.Memory.MemoryMB)
require.EqualValues(1000, used.Flattened.Cpu.CpuShares)
require.EqualValues(1024, used.Flattened.Memory.MemoryMB)
}

// Tests that AllocsFit detects device collisions
Expand Down
122 changes: 120 additions & 2 deletions scheduler/rank_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package scheduler

import (
"sort"
"testing"

"github.com/hashicorp/nomad/helper/uuid"
Expand Down Expand Up @@ -122,11 +123,128 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) {
if out[0].FinalScore != 1.0 {
t.Fatalf("Bad Score: %v", out[0].FinalScore)
}
if out[1].FinalScore < 0.75 || out[1].FinalScore > 0.95 {
if out[1].FinalScore < 0.50 || out[1].FinalScore > 0.60 {
t.Fatalf("Bad Score: %v", out[1].FinalScore)
}
}

// TestBinPackIterator_NoExistingAlloc_MixedReserve asserts that node's with
// reserved resources are scored equivalent to as if they had a lower amount of
// resources.
func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) {
_, ctx := testContext(t)
nodes := []*RankedNode{
{
// Best fit
Node: &structs.Node{
Name: "no-reserved",
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 1100,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 1100,
},
},
},
},
{
// Not best fit if reserve is calculated properly
Node: &structs.Node{
Name: "reserved",
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 2000,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 2000,
},
},
ReservedResources: &structs.NodeReservedResources{
Cpu: structs.NodeReservedCpuResources{
CpuShares: 800,
},
Memory: structs.NodeReservedMemoryResources{
MemoryMB: 800,
},
},
},
},
{
// Even worse fit due to reservations
Node: &structs.Node{
Name: "reserved2",
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 2000,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 2000,
},
},
ReservedResources: &structs.NodeReservedResources{
Cpu: structs.NodeReservedCpuResources{
CpuShares: 500,
},
Memory: structs.NodeReservedMemoryResources{
MemoryMB: 500,
},
},
},
},
{
Node: &structs.Node{
Name: "overloaded",
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 900,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 900,
},
},
},
},
}
static := NewStaticRankIterator(ctx, nodes)

taskGroup := &structs.TaskGroup{
EphemeralDisk: &structs.EphemeralDisk{},
Tasks: []*structs.Task{
{
Name: "web",
Resources: &structs.Resources{
CPU: 1000,
MemoryMB: 1000,
},
},
},
}
binp := NewBinPackIterator(ctx, static, false, 0)
binp.SetTaskGroup(taskGroup)

scoreNorm := NewScoreNormalizationIterator(ctx, binp)

out := collectRanked(scoreNorm)

// Sort descending (highest score to lowest) and log for debugging
sort.Slice(out, func(i, j int) bool { return out[i].FinalScore > out[j].FinalScore })
for i := range out {
t.Logf("Node: %-12s Score: %-1.4f", out[i].Node.Name, out[i].FinalScore)
}

// 3 nodes should be feasible
require.Len(t, out, 3)

// Node without reservations is the best fit
require.Equal(t, nodes[0].Node.Name, out[0].Node.Name)

// Node with smallest remaining resources ("best fit") should get a
// higher score than node with more remaining resources ("worse fit")
require.Equal(t, nodes[1].Node.Name, out[1].Node.Name)
require.Equal(t, nodes[2].Node.Name, out[2].Node.Name)
}

// Tests bin packing iterator with network resources at task and task group level
func TestBinPackIterator_Network_Success(t *testing.T) {
_, ctx := testContext(t)
Expand Down Expand Up @@ -239,7 +357,7 @@ func TestBinPackIterator_Network_Success(t *testing.T) {
// First node should have a perfect score
require.Equal(1.0, out[0].FinalScore)

if out[1].FinalScore < 0.75 || out[1].FinalScore > 0.95 {
if out[1].FinalScore < 0.50 || out[1].FinalScore > 0.60 {
t.Fatalf("Bad Score: %v", out[1].FinalScore)
}

Expand Down
29 changes: 23 additions & 6 deletions website/pages/docs/upgrade/upgrade-specific.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@ details provided for their upgrades as a result of new features or changed
behavior. This page is used to document those details separately from the
standard upgrade flow.

## Nomad 0.11.2

### Scheduler Scoring Changes

Prior to Nomad 0.11.2 the scheduler algorithm used a [node's reserved
resources][reserved]
incorrectly during scoring. The result of this bug was that scoring biased in
favor of nodes with reserved resources vs nodes without reserved resources.

Placements will be more correct but slightly different in v0.11.2 vs earlier
versions of Nomad. Operators do *not* need to take any actions as the impact of
the bug fix will only minimally affect scoring.

Feasability (whether a node is capable of running a job at all) is *not*
affected.

## Nomad 0.11.0

### client.template: `vault_grace` deprecation
Expand Down Expand Up @@ -466,21 +482,22 @@ draining the node so no tasks are running on it. This can be verified by running
state. Once that is done the client can be killed, the `data_dir` should be
deleted and then Nomad 0.3.0 can be launched.

[dangling-containers]: /docs/drivers/docker#dangling-containers
[drain-api]: /api-docs/nodes#drain-node
[drain-cli]: /docs/commands/node/drain
[dangling-containers]: /docs/drivers/docker#dangling-containers
[gh-6787]: https://github.com/hashicorp/nomad/issues/6787
[hcl2]: https://github.com/hashicorp/hcl2
[limits]: /docs/configuration#limits
[lxc]: /docs/drivers/external/lxc
[migrate]: /docs/job-specification/migrate
[plugins]: /docs/drivers/external
[plugin-stanza]: /docs/configuration/plugin
[preemption]: /docs/internals/scheduling/preemption
[plugins]: /docs/drivers/external
[preemption-api]: /api-docs/operator#update-scheduler-configuration
[preemption]: /docs/internals/scheduling/preemption
[reserved]: /docs/configuration/client#reserved-parameters
[task-config]: /docs/job-specification/task#config
[validate]: /docs/commands/job/validate
[vault_grace]: /docs/job-specification/template
[update]: /docs/job-specification/update
[tls-guide]: https://learn.hashicorp.com/nomad/transport-security/enable-tls
[tls-vault-guide]: https://learn.hashicorp.com/nomad/vault-integration/vault-pki-nomad
[update]: /docs/job-specification/update
[validate]: /docs/commands/job/validate
[vault_grace]: /docs/job-specification/template

0 comments on commit cbcd3eb

Please sign in to comment.