Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

spread scheduling algorithm #7810

Merged
merged 4 commits into from
May 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
FEATURES:
* **Task dependencies UI**: task lifecycle charts and details

IMPROVEMENTS:

* core: Allow spreading allocations as an alternative to binpacking [[GH-7810](https://github.com/hashicorp/nomad/issues/7810)]

BUG FIXES:

* api: autoscaling policies should not be returned for stopped jobs [[GH-7768](https://github.com/hashicorp/nomad/issues/7768)]
Expand Down
14 changes: 14 additions & 0 deletions api/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,11 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error {
return nil
}

// SchedulerConfiguration is the config for controlling scheduler behavior
type SchedulerConfiguration struct {
// SchedulerAlgorithm lets you select between available scheduling algorithms.
SchedulerAlgorithm SchedulerAlgorithm

// PreemptionConfig specifies whether to enable eviction of lower
// priority jobs to place higher priority jobs.
PreemptionConfig PreemptionConfig
Expand Down Expand Up @@ -140,6 +144,16 @@ type SchedulerSetConfigurationResponse struct {
WriteMeta
}

// SchedulerAlgorithm is an enum string that encapsulates the valid options for a
// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the
// scheduler to be user-selectable.
type SchedulerAlgorithm string

const (
SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack"
SchedulerAlgorithmSpread SchedulerAlgorithm = "spread"
)

// PreemptionConfig specifies whether preemption is enabled based on scheduler type
type PreemptionConfig struct {
SystemSchedulerEnabled bool
Expand Down
1 change: 1 addition & 0 deletions command/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ func TestAgent_ServerConfig_SchedulerFlags(t *testing.T) {
"default case",
nil,
structs.SchedulerConfiguration{
SchedulerAlgorithm: "binpack",
PreemptionConfig: structs.PreemptionConfig{
SystemSchedulerEnabled: true,
},
Expand Down
5 changes: 5 additions & 0 deletions command/agent/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,11 @@ func (c *Command) isValidConfig(config, cmdConfig *Config) bool {
c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
}

if err := config.Server.DefaultSchedulerConfig.Validate(); err != nil {
c.Ui.Error(err.Error())
return false
}

return true
}

Expand Down
1 change: 1 addition & 0 deletions command/agent/config_parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ var basicConfig = &Config{
RetryMaxAttempts: 3,
},
DefaultSchedulerConfig: &structs.SchedulerConfiguration{
SchedulerAlgorithm: "spread",
PreemptionConfig: structs.PreemptionConfig{
SystemSchedulerEnabled: true,
BatchSchedulerEnabled: true,
Expand Down
7 changes: 7 additions & 0 deletions command/agent/operator_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"github.com/hashicorp/raft"
)

// OperatorRequest is used route operator/raft API requests to the implementing
// functions.
func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
path := strings.TrimPrefix(req.URL.Path, "/v1/operator/raft/")
switch {
Expand Down Expand Up @@ -252,12 +254,17 @@ func (s *HTTPServer) schedulerUpdateConfig(resp http.ResponseWriter, req *http.R
}

args.Config = structs.SchedulerConfiguration{
SchedulerAlgorithm: structs.SchedulerAlgorithm(conf.SchedulerAlgorithm),
PreemptionConfig: structs.PreemptionConfig{
SystemSchedulerEnabled: conf.PreemptionConfig.SystemSchedulerEnabled,
BatchSchedulerEnabled: conf.PreemptionConfig.BatchSchedulerEnabled,
ServiceSchedulerEnabled: conf.PreemptionConfig.ServiceSchedulerEnabled},
}

if err := args.Config.Validate(); err != nil {
return nil, CodedError(http.StatusBadRequest, err.Error())
}

// Check for cas value
params := req.URL.Query()
if _, ok := params["cas"]; ok {
Expand Down
2 changes: 2 additions & 0 deletions command/agent/testdata/basic.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ server {
}

default_scheduler_config {
scheduler_algorithm = "spread"

preemption_config {
batch_scheduler_enabled = true
system_scheduler_enabled = true
Expand Down
1 change: 1 addition & 0 deletions command/agent/testdata/basic.json
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@
"2.2.2.2"
],
"default_scheduler_config": [{
"scheduler_algorithm": "spread",
"preemption_config": [{
"batch_scheduler_enabled": true,
"system_scheduler_enabled": true,
Expand Down
1 change: 1 addition & 0 deletions nomad/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ func DefaultConfig() *Config {
ServerHealthInterval: 2 * time.Second,
AutopilotInterval: 10 * time.Second,
DefaultSchedulerConfig: structs.SchedulerConfiguration{
SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
PreemptionConfig: structs.PreemptionConfig{
SystemSchedulerEnabled: true,
BatchSchedulerEnabled: false,
Expand Down
37 changes: 31 additions & 6 deletions nomad/structs/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
return true, "", used, nil
}

// ScoreFit is used to score the fit based on the Google work published here:
// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
// This is equivalent to their BestFit v3
func ScoreFit(node *Node, util *ComparableResources) float64 {
func computeFreePercentage(node *Node, util *ComparableResources) (freePctCpu, freePctRam float64) {
// COMPAT(0.11): Remove in 0.11
reserved := node.ComparableReservedResources()
res := node.ComparableResources()
Expand All @@ -165,8 +162,18 @@ func ScoreFit(node *Node, util *ComparableResources) float64 {
}

// Compute the free percentage
freePctCpu := 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu)
freePctRam := 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem)
freePctCpu = 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu)
freePctRam = 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem)
return freePctCpu, freePctRam
}

// ScoreFitBinPack computes a fit score to achieve pinbacking behavior.
// Score is in [0, 18]
//
// It's the BestFit v3 on the Google work published here:
// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
func ScoreFitBinPack(node *Node, util *ComparableResources) float64 {
freePctCpu, freePctRam := computeFreePercentage(node, util)

// Total will be "maximized" the smaller the value is.
// At 100% utilization, the total is 2, while at 0% util it is 20.
Expand All @@ -187,6 +194,24 @@ func ScoreFit(node *Node, util *ComparableResources) float64 {
return score
}

// ScoreFitBinSpread computes a fit score to achieve spread behavior.
// Score is in [0, 18]
//
// This is equivalent to Worst Fit of
// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
func ScoreFitSpread(node *Node, util *ComparableResources) float64 {
freePctCpu, freePctRam := computeFreePercentage(node, util)
total := math.Pow(10, freePctCpu) + math.Pow(10, freePctRam)
score := total - 2

if score > 18.0 {
score = 18.0
} else if score < 0 {
score = 0
}
return score
}

func CopySliceConstraints(s []*Constraint) []*Constraint {
l := len(s)
if l == 0 {
Expand Down
89 changes: 45 additions & 44 deletions nomad/structs/funcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ func TestAllocsFit_Devices(t *testing.T) {
}

// COMPAT(0.11): Remove in 0.11
func TestScoreFit_Old(t *testing.T) {
func TestScoreFitBinPack_Old(t *testing.T) {
node := &Node{}
node.Resources = &Resources{
CPU: 4096,
Expand All @@ -528,7 +528,7 @@ func TestScoreFit_Old(t *testing.T) {
},
},
}
score := ScoreFit(node, util)
score := ScoreFitBinPack(node, util)
if score != 18.0 {
t.Fatalf("bad: %v", score)
}
Expand All @@ -544,7 +544,7 @@ func TestScoreFit_Old(t *testing.T) {
},
},
}
score = ScoreFit(node, util)
score = ScoreFitBinPack(node, util)
if score != 0.0 {
t.Fatalf("bad: %v", score)
}
Expand All @@ -560,13 +560,13 @@ func TestScoreFit_Old(t *testing.T) {
},
},
}
score = ScoreFit(node, util)
score = ScoreFitBinPack(node, util)
if score < 10.0 || score > 16.0 {
t.Fatalf("bad: %v", score)
}
}

func TestScoreFit(t *testing.T) {
func TestScoreFitBinPack(t *testing.T) {
node := &Node{}
node.NodeResources = &NodeResources{
Cpu: NodeCpuResources{
Expand All @@ -585,52 +585,53 @@ func TestScoreFit(t *testing.T) {
},
}

// Test a perfect fit
util := &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 2048,
},
Memory: AllocatedMemoryResources{
MemoryMB: 4096,
cases := []struct {
name string
flattened AllocatedTaskResources
binPackScore float64
spreadScore float64
}{
{
name: "almost filled node, but with just enough hole",
flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{CpuShares: 2048},
Memory: AllocatedMemoryResources{MemoryMB: 4096},
},
binPackScore: 18,
spreadScore: 0,
},
}
score := ScoreFit(node, util)
if score != 18.0 {
t.Fatalf("bad: %v", score)
}

// Test the worst fit
util = &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 0,
},
Memory: AllocatedMemoryResources{
MemoryMB: 0,
{
name: "unutilized node",
flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{CpuShares: 0},
Memory: AllocatedMemoryResources{MemoryMB: 0},
},
binPackScore: 0,
spreadScore: 18,
},
}
score = ScoreFit(node, util)
if score != 0.0 {
t.Fatalf("bad: %v", score)
}

// Test a mid-case scenario
util = &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 1024,
},
Memory: AllocatedMemoryResources{
MemoryMB: 2048,
{
name: "mid-case scnario",
flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{CpuShares: 1024},
Memory: AllocatedMemoryResources{MemoryMB: 2048},
},
binPackScore: 13.675,
spreadScore: 4.325,
},
}
score = ScoreFit(node, util)
if score < 10.0 || score > 16.0 {
t.Fatalf("bad: %v", score)

for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
util := &ComparableResources{Flattened: c.flattened}

binPackScore := ScoreFitBinPack(node, util)
require.InDelta(t, c.binPackScore, binPackScore, 0.001, "binpack score")

spreadScore := ScoreFitSpread(node, util)
require.InDelta(t, c.spreadScore, spreadScore, 0.001, "spread score")

require.InDelta(t, 18, binPackScore+spreadScore, 0.001, "score sum")
})
}
}

Expand Down
41 changes: 41 additions & 0 deletions nomad/structs/operator.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package structs

import (
"fmt"
"time"

"github.com/hashicorp/raft"
Expand Down Expand Up @@ -124,8 +125,26 @@ type AutopilotConfig struct {
ModifyIndex uint64
}

// SchedulerAlgorithm is an enum string that encapsulates the valid options for a
// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the
// scheduler to be user-selectable.
type SchedulerAlgorithm string

const (
// SchedulerAlgorithmBinpack indicates that the scheduler should spread
// allocations as evenly as possible over the available hardware.
SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack"

// SchedulerAlgorithmSpread indicates that the scheduler should spread
// allocations as evenly as possible over the available hardware.
SchedulerAlgorithmSpread SchedulerAlgorithm = "spread"
)

// SchedulerConfiguration is the config for controlling scheduler behavior
type SchedulerConfiguration struct {
// SchedulerAlgorithm lets you select between available scheduling algorithms.
SchedulerAlgorithm SchedulerAlgorithm `hcl:"scheduler_algorithm"`

// PreemptionConfig specifies whether to enable eviction of lower
// priority jobs to place higher priority jobs.
PreemptionConfig PreemptionConfig `hcl:"preemption_config"`
Expand All @@ -135,6 +154,28 @@ type SchedulerConfiguration struct {
ModifyIndex uint64
}

func (s *SchedulerConfiguration) EffectiveSchedulerAlgorithm() SchedulerAlgorithm {
if s == nil || s.SchedulerAlgorithm == "" {
return SchedulerAlgorithmBinpack
}

return s.SchedulerAlgorithm
}

func (s *SchedulerConfiguration) Validate() error {
if s == nil {
return nil
}

switch s.SchedulerAlgorithm {
case "", SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread:
default:
return fmt.Errorf("invalid scheduler algorithm: %v", s.SchedulerAlgorithm)
}

return nil
}

// SchedulerConfigurationResponse is the response object that wraps SchedulerConfiguration
type SchedulerConfigurationResponse struct {
// SchedulerConfig contains scheduler config options
Expand Down
2 changes: 1 addition & 1 deletion scheduler/preemption_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ func TestPreemption(t *testing.T) {
ctx.plan.NodePreemptions[node.ID] = tc.currentPreemptions
}
static := NewStaticRankIterator(ctx, nodes)
binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority)
binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority, structs.SchedulerAlgorithmBinpack)
job := mock.Job()
job.Priority = tc.jobPriority
binPackIter.SetJob(job)
Expand Down
Loading