Skip to content

Commit

Permalink
core: fix node reservation scoring
Browse files Browse the repository at this point in the history
The BinPackIter accounted for node reservations twice when scoring nodes
which could bias scores toward nodes with reservations.

Pseudo-code for previous algorithm:
```
	proposed  = reservedResources + sum(allocsResources)
	available = nodeResources - reservedResources
	score     = 1 - (proposed / available)
```

The node's reserved resources are added to the total resources used by
allocations, and then the node's reserved resources are later
substracted from the node's overall resources.

The new algorithm is:
```
	proposed  = sum(allocResources)
	available = nodeResources - reservedResources
	score     = 1 - (proposed / available)
```

The node's reserved resources are no longer added to the total resources
used by allocations.

My guess as to how this bug happened is that the resource utilization
variable (`util`) is calculated and returned by the `AllocsFit` function
which needs to take reserved resources into account as a basic
feasibility check.

To avoid re-calculating alloc resource usage (because there may be a
large number of allocs), we reused `util` in the `ScoreFit` function.
`ScoreFit` properly accounts for reserved resources by subtracting them
from the node's overall resources. However since `util` _also_ took
reserved resources into account the score would be incorrect.

Prior to the fix the added test output:
```
Node: reserved     Score: 1.0000
Node: reserved2    Score: 1.0000
Node: no-reserved  Score: 0.9741
```

The scores being 1.0 for *both* nodes with reserved resources is a good
hint something is wrong as they should receive different scores. Upon
further inspection the double accounting of reserved resources caused
their scores to be >1.0 and clamped.

After the fix the added test outputs:
```
Node: no-reserved  Score: 0.9741
Node: reserved     Score: 0.9480
Node: reserved2    Score: 0.8717
```
  • Loading branch information
schmichael committed Apr 15, 2020
1 parent 417f50f commit f02c834
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 7 deletions.
13 changes: 6 additions & 7 deletions nomad/structs/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,9 @@ func FilterTerminalAllocs(allocs []*Allocation) ([]*Allocation, map[string]*Allo
// ensured there are no collisions. If checkDevices is set to true, we check if
// there is a device oversubscription.
func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevices bool) (bool, string, *ComparableResources, error) {
// Compute the utilization from zero
// Compute the allocs' utilization from zero
used := new(ComparableResources)

// Add the reserved resources of the node
used.Add(node.ComparableReservedResources())

// For each alloc, add the resources
for _, alloc := range allocs {
// Do not consider the resource impact of terminal allocations
Expand All @@ -117,9 +114,11 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
used.Add(alloc.ComparableResources())
}

// Check that the node resources are a super set of those
// that are being allocated
if superset, dimension := node.ComparableResources().Superset(used); !superset {
// Check that the node resources (after subtracting reserved) are a
// super set of those that are being allocated
available := node.ComparableResources()
available.Subtract(node.ComparableReservedResources())
if superset, dimension := available.Superset(used); !superset {
return false, dimension, used, nil
}

Expand Down
114 changes: 114 additions & 0 deletions scheduler/rank_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package scheduler

import (
"sort"
"testing"

"github.com/hashicorp/nomad/helper/uuid"
Expand Down Expand Up @@ -127,6 +128,119 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) {
}
}

// TestBinPackIterator_NoExistingAlloc_MixedReserve asserts that node's with
// reserved resources are scored equivalent to as if they had a lower amount of
// resources.
func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) {
_, ctx := testContext(t)
nodes := []*RankedNode{
{
// Best fit
Node: &structs.Node{
Name: "no-reserved",
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 1100,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 1100,
},
},
},
},
{
// Not best fit if reserve is calculated properly
Node: &structs.Node{
Name: "reserved",
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 2000,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 2000,
},
},
ReservedResources: &structs.NodeReservedResources{
Cpu: structs.NodeReservedCpuResources{
CpuShares: 800,
},
Memory: structs.NodeReservedMemoryResources{
MemoryMB: 800,
},
},
},
},
{
// Even worse fit due to reservations
Node: &structs.Node{
Name: "reserved2",
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 2000,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 2000,
},
},
ReservedResources: &structs.NodeReservedResources{
Cpu: structs.NodeReservedCpuResources{
CpuShares: 500,
},
Memory: structs.NodeReservedMemoryResources{
MemoryMB: 500,
},
},
},
},
{
Node: &structs.Node{
Name: "overloaded",
NodeResources: &structs.NodeResources{
Cpu: structs.NodeCpuResources{
CpuShares: 900,
},
Memory: structs.NodeMemoryResources{
MemoryMB: 900,
},
},
},
},
}
static := NewStaticRankIterator(ctx, nodes)

taskGroup := &structs.TaskGroup{
EphemeralDisk: &structs.EphemeralDisk{},
Tasks: []*structs.Task{
{
Name: "web",
Resources: &structs.Resources{
CPU: 1000,
MemoryMB: 1000,
},
},
},
}
binp := NewBinPackIterator(ctx, static, false, 0)
binp.SetTaskGroup(taskGroup)

scoreNorm := NewScoreNormalizationIterator(ctx, binp)

out := collectRanked(scoreNorm)

// Sort descending (highest score to lowest) and log for debugging
sort.Slice(out, func(i, j int) bool { return out[i].FinalScore > out[j].FinalScore })
for i := range out {
t.Logf("Node: %-12s Score: %-1.4f", out[i].Node.Name, out[i].FinalScore)
}

// 3 nodes should be feasible
require.Len(t, out, 3)

// Node without reservations is the best fit
require.Equal(t, nodes[0].Node.Name, out[0].Node.Name)
require.Equal(t, nodes[1].Node.Name, out[1].Node.Name)
}

// Tests bin packing iterator with network resources at task and task group level
func TestBinPackIterator_Network_Success(t *testing.T) {
_, ctx := testContext(t)
Expand Down

0 comments on commit f02c834

Please sign in to comment.