Skip to content

Commit

Permalink
Merge pull request #7810 from hashicorp/spread-configuration
Browse files Browse the repository at this point in the history
spread scheduling algorithm
  • Loading branch information
Mahmood Ali committed May 1, 2020
1 parent 7d62bce commit 8883843
Show file tree
Hide file tree
Showing 16 changed files with 173 additions and 60 deletions.
14 changes: 14 additions & 0 deletions api/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,11 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error {
return nil
}

// SchedulerConfiguration is the config for controlling scheduler behavior
type SchedulerConfiguration struct {
// SchedulerAlgorithm lets you select between available scheduling algorithms.
SchedulerAlgorithm SchedulerAlgorithm

// PreemptionConfig specifies whether to enable eviction of lower
// priority jobs to place higher priority jobs.
PreemptionConfig PreemptionConfig
Expand Down Expand Up @@ -137,6 +141,16 @@ type SchedulerSetConfigurationResponse struct {
WriteMeta
}

// SchedulerAlgorithm is an enum string that encapsulates the valid options for a
// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the
// scheduler to be user-selectable.
type SchedulerAlgorithm string

const (
SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack"
SchedulerAlgorithmSpread SchedulerAlgorithm = "spread"
)

// PreemptionConfig specifies whether preemption is enabled based on scheduler type
type PreemptionConfig struct {
SystemSchedulerEnabled bool
Expand Down
1 change: 1 addition & 0 deletions command/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ func TestAgent_ServerConfig_SchedulerFlags(t *testing.T) {
"default case",
nil,
structs.SchedulerConfiguration{
SchedulerAlgorithm: "binpack",
PreemptionConfig: structs.PreemptionConfig{
SystemSchedulerEnabled: true,
},
Expand Down
5 changes: 5 additions & 0 deletions command/agent/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,11 @@ func (c *Command) isValidConfig(config *Config) bool {
c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
}

if err := config.Server.DefaultSchedulerConfig.Validate(); err != nil {
c.Ui.Error(err.Error())
return false
}

return true
}

Expand Down
1 change: 1 addition & 0 deletions command/agent/config_parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ var basicConfig = &Config{
RetryMaxAttempts: 3,
},
DefaultSchedulerConfig: &structs.SchedulerConfiguration{
SchedulerAlgorithm: "spread",
PreemptionConfig: structs.PreemptionConfig{
SystemSchedulerEnabled: true,
BatchSchedulerEnabled: true,
Expand Down
7 changes: 7 additions & 0 deletions command/agent/operator_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"github.com/hashicorp/raft"
)

// OperatorRequest is used route operator/raft API requests to the implementing
// functions.
func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
path := strings.TrimPrefix(req.URL.Path, "/v1/operator/raft/")
switch {
Expand Down Expand Up @@ -250,12 +252,17 @@ func (s *HTTPServer) schedulerUpdateConfig(resp http.ResponseWriter, req *http.R
}

args.Config = structs.SchedulerConfiguration{
SchedulerAlgorithm: structs.SchedulerAlgorithm(conf.SchedulerAlgorithm),
PreemptionConfig: structs.PreemptionConfig{
SystemSchedulerEnabled: conf.PreemptionConfig.SystemSchedulerEnabled,
BatchSchedulerEnabled: conf.PreemptionConfig.BatchSchedulerEnabled,
ServiceSchedulerEnabled: conf.PreemptionConfig.ServiceSchedulerEnabled},
}

if err := args.Config.Validate(); err != nil {
return nil, CodedError(http.StatusBadRequest, err.Error())
}

// Check for cas value
params := req.URL.Query()
if _, ok := params["cas"]; ok {
Expand Down
2 changes: 2 additions & 0 deletions command/agent/testdata/basic.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ server {
}

default_scheduler_config {
scheduler_algorithm = "spread"

preemption_config {
batch_scheduler_enabled = true
system_scheduler_enabled = true
Expand Down
1 change: 1 addition & 0 deletions command/agent/testdata/basic.json
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@
"2.2.2.2"
],
"default_scheduler_config": [{
"scheduler_algorithm": "spread",
"preemption_config": [{
"batch_scheduler_enabled": true,
"system_scheduler_enabled": true,
Expand Down
1 change: 1 addition & 0 deletions nomad/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ func DefaultConfig() *Config {
ServerHealthInterval: 2 * time.Second,
AutopilotInterval: 10 * time.Second,
DefaultSchedulerConfig: structs.SchedulerConfiguration{
SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
PreemptionConfig: structs.PreemptionConfig{
SystemSchedulerEnabled: true,
BatchSchedulerEnabled: false,
Expand Down
37 changes: 31 additions & 6 deletions nomad/structs/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
return true, "", used, nil
}

// ScoreFit is used to score the fit based on the Google work published here:
// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
// This is equivalent to their BestFit v3
func ScoreFit(node *Node, util *ComparableResources) float64 {
func computeFreePercentage(node *Node, util *ComparableResources) (freePctCpu, freePctRam float64) {
// COMPAT(0.11): Remove in 0.11
reserved := node.ComparableReservedResources()
res := node.ComparableResources()
Expand All @@ -165,8 +162,18 @@ func ScoreFit(node *Node, util *ComparableResources) float64 {
}

// Compute the free percentage
freePctCpu := 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu)
freePctRam := 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem)
freePctCpu = 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu)
freePctRam = 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem)
return freePctCpu, freePctRam
}

// ScoreFitBinPack computes a fit score to achieve pinbacking behavior.
// Score is in [0, 18]
//
// It's the BestFit v3 on the Google work published here:
// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
func ScoreFitBinPack(node *Node, util *ComparableResources) float64 {
freePctCpu, freePctRam := computeFreePercentage(node, util)

// Total will be "maximized" the smaller the value is.
// At 100% utilization, the total is 2, while at 0% util it is 20.
Expand All @@ -187,6 +194,24 @@ func ScoreFit(node *Node, util *ComparableResources) float64 {
return score
}

// ScoreFitBinSpread computes a fit score to achieve spread behavior.
// Score is in [0, 18]
//
// This is equivalent to Worst Fit of
// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
func ScoreFitSpread(node *Node, util *ComparableResources) float64 {
freePctCpu, freePctRam := computeFreePercentage(node, util)
total := math.Pow(10, freePctCpu) + math.Pow(10, freePctRam)
score := total - 2

if score > 18.0 {
score = 18.0
} else if score < 0 {
score = 0
}
return score
}

func CopySliceConstraints(s []*Constraint) []*Constraint {
l := len(s)
if l == 0 {
Expand Down
89 changes: 45 additions & 44 deletions nomad/structs/funcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ func TestAllocsFit_Devices(t *testing.T) {
}

// COMPAT(0.11): Remove in 0.11
func TestScoreFit_Old(t *testing.T) {
func TestScoreFitBinPack_Old(t *testing.T) {
node := &Node{}
node.Resources = &Resources{
CPU: 4096,
Expand All @@ -528,7 +528,7 @@ func TestScoreFit_Old(t *testing.T) {
},
},
}
score := ScoreFit(node, util)
score := ScoreFitBinPack(node, util)
if score != 18.0 {
t.Fatalf("bad: %v", score)
}
Expand All @@ -544,7 +544,7 @@ func TestScoreFit_Old(t *testing.T) {
},
},
}
score = ScoreFit(node, util)
score = ScoreFitBinPack(node, util)
if score != 0.0 {
t.Fatalf("bad: %v", score)
}
Expand All @@ -560,13 +560,13 @@ func TestScoreFit_Old(t *testing.T) {
},
},
}
score = ScoreFit(node, util)
score = ScoreFitBinPack(node, util)
if score < 10.0 || score > 16.0 {
t.Fatalf("bad: %v", score)
}
}

func TestScoreFit(t *testing.T) {
func TestScoreFitBinPack(t *testing.T) {
node := &Node{}
node.NodeResources = &NodeResources{
Cpu: NodeCpuResources{
Expand All @@ -585,52 +585,53 @@ func TestScoreFit(t *testing.T) {
},
}

// Test a perfect fit
util := &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 2048,
},
Memory: AllocatedMemoryResources{
MemoryMB: 4096,
cases := []struct {
name string
flattened AllocatedTaskResources
binPackScore float64
spreadScore float64
}{
{
name: "almost filled node, but with just enough hole",
flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{CpuShares: 2048},
Memory: AllocatedMemoryResources{MemoryMB: 4096},
},
binPackScore: 18,
spreadScore: 0,
},
}
score := ScoreFit(node, util)
if score != 18.0 {
t.Fatalf("bad: %v", score)
}

// Test the worst fit
util = &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 0,
},
Memory: AllocatedMemoryResources{
MemoryMB: 0,
{
name: "unutilized node",
flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{CpuShares: 0},
Memory: AllocatedMemoryResources{MemoryMB: 0},
},
binPackScore: 0,
spreadScore: 18,
},
}
score = ScoreFit(node, util)
if score != 0.0 {
t.Fatalf("bad: %v", score)
}

// Test a mid-case scenario
util = &ComparableResources{
Flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{
CpuShares: 1024,
},
Memory: AllocatedMemoryResources{
MemoryMB: 2048,
{
name: "mid-case scnario",
flattened: AllocatedTaskResources{
Cpu: AllocatedCpuResources{CpuShares: 1024},
Memory: AllocatedMemoryResources{MemoryMB: 2048},
},
binPackScore: 13.675,
spreadScore: 4.325,
},
}
score = ScoreFit(node, util)
if score < 10.0 || score > 16.0 {
t.Fatalf("bad: %v", score)

for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
util := &ComparableResources{Flattened: c.flattened}

binPackScore := ScoreFitBinPack(node, util)
require.InDelta(t, c.binPackScore, binPackScore, 0.001, "binpack score")

spreadScore := ScoreFitSpread(node, util)
require.InDelta(t, c.spreadScore, spreadScore, 0.001, "spread score")

require.InDelta(t, 18, binPackScore+spreadScore, 0.001, "score sum")
})
}
}

Expand Down
41 changes: 41 additions & 0 deletions nomad/structs/operator.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package structs

import (
"fmt"
"time"

"github.com/hashicorp/raft"
Expand Down Expand Up @@ -120,8 +121,26 @@ type AutopilotConfig struct {
ModifyIndex uint64
}

// SchedulerAlgorithm is an enum string that encapsulates the valid options for a
// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the
// scheduler to be user-selectable.
type SchedulerAlgorithm string

const (
// SchedulerAlgorithmBinpack indicates that the scheduler should spread
// allocations as evenly as possible over the available hardware.
SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack"

// SchedulerAlgorithmSpread indicates that the scheduler should spread
// allocations as evenly as possible over the available hardware.
SchedulerAlgorithmSpread SchedulerAlgorithm = "spread"
)

// SchedulerConfiguration is the config for controlling scheduler behavior
type SchedulerConfiguration struct {
// SchedulerAlgorithm lets you select between available scheduling algorithms.
SchedulerAlgorithm SchedulerAlgorithm `hcl:"scheduler_algorithm"`

// PreemptionConfig specifies whether to enable eviction of lower
// priority jobs to place higher priority jobs.
PreemptionConfig PreemptionConfig `hcl:"preemption_config"`
Expand All @@ -131,6 +150,28 @@ type SchedulerConfiguration struct {
ModifyIndex uint64
}

func (s *SchedulerConfiguration) EffectiveSchedulerAlgorithm() SchedulerAlgorithm {
if s == nil || s.SchedulerAlgorithm == "" {
return SchedulerAlgorithmBinpack
}

return s.SchedulerAlgorithm
}

func (s *SchedulerConfiguration) Validate() error {
if s == nil {
return nil
}

switch s.SchedulerAlgorithm {
case "", SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread:
default:
return fmt.Errorf("invalid scheduler algorithm: %v", s.SchedulerAlgorithm)
}

return nil
}

// SchedulerConfigurationResponse is the response object that wraps SchedulerConfiguration
type SchedulerConfigurationResponse struct {
// SchedulerConfig contains scheduler config options
Expand Down
2 changes: 1 addition & 1 deletion scheduler/preemption_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,7 @@ func TestPreemption(t *testing.T) {
ctx.plan.NodePreemptions[node.ID] = tc.currentPreemptions
}
static := NewStaticRankIterator(ctx, nodes)
binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority)
binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority, structs.SchedulerAlgorithmBinpack)
job := mock.Job()
job.Priority = tc.jobPriority
binPackIter.SetJob(job)
Expand Down
Loading

0 comments on commit 8883843

Please sign in to comment.