Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scheduler: prevent -Inf in spread scoring #17198

Merged
merged 1 commit into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/17198.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed a bug where scores for spread scheduling could be -Inf
```
15 changes: 14 additions & 1 deletion scheduler/spread.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ type SpreadIterator struct {
// blocks
sumSpreadWeights int32

// lowestSpreadBoost tracks the lowest spread boost across all spread blocks
lowestSpreadBoost float64

// hasSpread is used to early return when the job/task group
// does not have spread configured
hasSpread bool
Expand All @@ -56,6 +59,7 @@ func NewSpreadIterator(ctx Context, source RankIterator) *SpreadIterator {
source: source,
groupPropertySets: make(map[string][]*propertySet),
tgSpreadInfo: make(map[string]spreadAttributeMap),
lowestSpreadBoost: -1.0,
}
return iter
}
Expand Down Expand Up @@ -117,6 +121,7 @@ func (iter *SpreadIterator) hasSpreads() bool {
}

func (iter *SpreadIterator) Next() *RankedNode {

for {
option := iter.source.Next()

Expand Down Expand Up @@ -165,7 +170,7 @@ func (iter *SpreadIterator) Next() *RankedNode {
desiredCount, ok = spreadDetails.desiredCounts[implicitTarget]
if !ok {
// The desired count for this attribute is zero if it gets here
// so use the maximum possible penalty for this node
// so use the default negative penalty for this node
totalSpreadScore -= 1.0
continue
}
Expand All @@ -174,12 +179,20 @@ func (iter *SpreadIterator) Next() *RankedNode {
// Calculate the relative weight of this specific spread attribute
spreadWeight := float64(spreadDetails.weight) / float64(iter.sumSpreadWeights)

if desiredCount == 0 {
totalSpreadScore += iter.lowestSpreadBoost
continue
}

// Score Boost is proportional the difference between current and desired count
// It is negative when the used count is greater than the desired count
// It is multiplied with the spread weight to account for cases where the job has
// more than one spread attribute
scoreBoost := ((desiredCount - float64(usedCount)) / desiredCount) * spreadWeight
totalSpreadScore += scoreBoost
if scoreBoost < iter.lowestSpreadBoost {
iter.lowestSpreadBoost = scoreBoost
}
}
}

Expand Down
100 changes: 100 additions & 0 deletions scheduler/spread_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import (
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -561,6 +563,104 @@ func TestSpreadIterator_MaxPenalty(t *testing.T) {

}

func TestSpreadIterator_NoInfinity(t *testing.T) {
ci.Parallel(t)

store, ctx := testContext(t)
var nodes []*RankedNode

// Add 3 nodes in different DCs to the state store
for i := 1; i < 4; i++ {
node := mock.Node()
node.Datacenter = fmt.Sprintf("dc%d", i)
must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, uint64(100+i), node))
nodes = append(nodes, &RankedNode{Node: node})
}

static := NewStaticRankIterator(ctx, nodes)

job := mock.Job()
tg := job.TaskGroups[0]
job.TaskGroups[0].Count = 8

// Create spread target of 50% in dc1, 50% in dc2, and 0% in the implicit target
spread := &structs.Spread{
Weight: 100,
Attribute: "${node.datacenter}",
SpreadTarget: []*structs.SpreadTarget{
{
Value: "dc1",
Percent: 50,
},
{
Value: "dc2",
Percent: 50,
},
{
Value: "*",
Percent: 0,
},
},
}
tg.Spreads = []*structs.Spread{spread}
spreadIter := NewSpreadIterator(ctx, static)
spreadIter.SetJob(job)
spreadIter.SetTaskGroup(tg)

scoreNorm := NewScoreNormalizationIterator(ctx, spreadIter)

out := collectRanked(scoreNorm)

// Scores should be even between dc1 and dc2 nodes, without an -Inf on dc3
must.Len(t, 3, out)
test.Eq(t, 0.75, out[0].FinalScore)
test.Eq(t, 0.75, out[1].FinalScore)
test.Eq(t, -1, out[2].FinalScore)

// Reset scores
for _, node := range nodes {
node.Scores = nil
node.FinalScore = 0
}

// Create very unbalanced spread target to force large negative scores
spread = &structs.Spread{
Weight: 100,
Attribute: "${node.datacenter}",
SpreadTarget: []*structs.SpreadTarget{
{
Value: "dc1",
Percent: 99,
},
{
Value: "dc2",
Percent: 1,
},
{
Value: "*",
Percent: 0,
},
},
}
tg.Spreads = []*structs.Spread{spread}
static = NewStaticRankIterator(ctx, nodes)
spreadIter = NewSpreadIterator(ctx, static)
spreadIter.SetJob(job)
spreadIter.SetTaskGroup(tg)

scoreNorm = NewScoreNormalizationIterator(ctx, spreadIter)

out = collectRanked(scoreNorm)

// Scores should heavily favor dc1, with an -Inf on dc3
must.Len(t, 3, out)
desired := 8 * 0.99 // 8 allocs * 99%
test.Eq(t, (desired-1)/desired, out[0].FinalScore)
test.Eq(t, -11.5, out[1].FinalScore)
test.LessEq(t, out[1].FinalScore, out[2].FinalScore,
test.Sprintf("expected implicit dc3 to be <= dc2"))
}

func Test_evenSpreadScoreBoost(t *testing.T) {
ci.Parallel(t)

Expand Down