From 6571ccefbc13e6dbed7bd53fcbe1fe9666f2d312 Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Fri, 3 Apr 2020 19:03:14 -0400 Subject: [PATCH 1/4] Add SchedulerAlgorithm to SchedulerConfig --- api/operator.go | 9 +++++++++ command/agent/operator_endpoint.go | 7 +++++++ nomad/config.go | 1 + nomad/structs/operator.go | 29 +++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/api/operator.go b/api/operator.go index d215326f4407..f46216f31049 100644 --- a/api/operator.go +++ b/api/operator.go @@ -112,7 +112,11 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error { return nil } +// SchedulerConfiguration is the config for controlling scheduler behavior type SchedulerConfiguration struct { + // SchedulerAlgorithm lets you select between available scheduling algorithms. + SchedulerAlgorithm string + // PreemptionConfig specifies whether to enable eviction of lower // priority jobs to place higher priority jobs. PreemptionConfig PreemptionConfig @@ -140,6 +144,11 @@ type SchedulerSetConfigurationResponse struct { WriteMeta } +// SchedulerAlgorithm is an enum string that encapsulates the valid options for a +// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the +// scheduler to be user-selectable. +type SchedulerAlgorithm string + // PreemptionConfig specifies whether preemption is enabled based on scheduler type type PreemptionConfig struct { SystemSchedulerEnabled bool diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go index 7ac48c0fdf6c..8abe7cdf57ff 100644 --- a/command/agent/operator_endpoint.go +++ b/command/agent/operator_endpoint.go @@ -14,6 +14,8 @@ import ( "github.com/hashicorp/raft" ) +// OperatorRequest is used route operator/raft API requests to the implementing +// functions. func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) { path := strings.TrimPrefix(req.URL.Path, "/v1/operator/raft/") switch { @@ -251,7 +253,12 @@ func (s *HTTPServer) schedulerUpdateConfig(resp http.ResponseWriter, req *http.R return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing scheduler config: %v", err)) } + if !structs.SchedulerAlgorithmIsValid(conf.SchedulerAlgorithm) { + return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Invalid scheduler algorithm selected.")) + } + args.Config = structs.SchedulerConfiguration{ + SchedulerAlgorithm: conf.SchedulerAlgorithm, PreemptionConfig: structs.PreemptionConfig{ SystemSchedulerEnabled: conf.PreemptionConfig.SystemSchedulerEnabled, BatchSchedulerEnabled: conf.PreemptionConfig.BatchSchedulerEnabled, diff --git a/nomad/config.go b/nomad/config.go index 8a0fb7a181b7..8b4fc1c2568a 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -403,6 +403,7 @@ func DefaultConfig() *Config { ServerHealthInterval: 2 * time.Second, AutopilotInterval: 10 * time.Second, DefaultSchedulerConfig: structs.SchedulerConfiguration{ + SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack, PreemptionConfig: structs.PreemptionConfig{ SystemSchedulerEnabled: true, BatchSchedulerEnabled: false, diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go index 03d8caedb352..101ce63514f3 100644 --- a/nomad/structs/operator.go +++ b/nomad/structs/operator.go @@ -124,8 +124,37 @@ type AutopilotConfig struct { ModifyIndex uint64 } +// SchedulerAlgorithm is an enum string that encapsulates the valid options for a +// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the +// scheduler to be user-selectable. +type SchedulerAlgorithm string + +const ( + // SchedulerAlgorithmBinpack indicates that the scheduler should spread + // allocations as evenly as possible over the available hardware. + SchedulerAlgorithmBinpack string = "binpack" + + // SchedulerAlgorithmSpread indicates that the scheduler should spread + // allocations as evenly as possible over the available hardware. + SchedulerAlgorithmSpread string = "spread" +) + +// SchedulerAlgorithmIsValid validates the given SchedulerAlgorithm string and +// returns true only when a correct algorithm is specified. +func SchedulerAlgorithmIsValid(alg string) bool { + switch alg { + case SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread: + return true + default: + return false + } +} + // SchedulerConfiguration is the config for controlling scheduler behavior type SchedulerConfiguration struct { + // SchedulerAlgorithm lets you select between available scheduling algorithms. + SchedulerAlgorithm string `hcl:"scheduler_algorithm"` + // PreemptionConfig specifies whether to enable eviction of lower // priority jobs to place higher priority jobs. PreemptionConfig PreemptionConfig `hcl:"preemption_config"` From 1af6a2adf15a663991df194313ffe3f87b75dd4a Mon Sep 17 00:00:00 2001 From: Charlie Voiselle <464492+angrycub@users.noreply.github.com> Date: Wed, 8 Apr 2020 15:59:16 -0400 Subject: [PATCH 2/4] Wiring algorithm to scheduler calls --- nomad/structs/funcs.go | 5 ++++- scheduler/rank.go | 15 +++++++++------ scheduler/stack.go | 5 ++++- scheduler/stack_oss.go | 8 +++++++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go index cee0dd4067bc..bb5bb65e86cc 100644 --- a/nomad/structs/funcs.go +++ b/nomad/structs/funcs.go @@ -151,7 +151,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi // ScoreFit is used to score the fit based on the Google work published here: // http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt // This is equivalent to their BestFit v3 -func ScoreFit(node *Node, util *ComparableResources) float64 { +func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { // COMPAT(0.11): Remove in 0.11 reserved := node.ComparableReservedResources() res := node.ComparableResources() @@ -176,6 +176,9 @@ func ScoreFit(node *Node, util *ComparableResources) float64 { // score. Because the floor is 20, we simply use that as an anchor. // This means at a perfect fit, we return 18 as the score. score := 20.0 - total + if algorithm == "spread" { + score = total - 2 + } // Bound the score, just in case // If the score is over 18, that means we've overfit the node. diff --git a/scheduler/rank.go b/scheduler/rank.go index e8633340f7c0..6556ab7af72c 100644 --- a/scheduler/rank.go +++ b/scheduler/rank.go @@ -151,19 +151,22 @@ type BinPackIterator struct { source RankIterator evict bool priority int + algorithm string jobId *structs.NamespacedID taskGroup *structs.TaskGroup } // NewBinPackIterator returns a BinPackIterator which tries to fit tasks // potentially evicting other tasks based on a given priority. -func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int) *BinPackIterator { +func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm string) *BinPackIterator { iter := &BinPackIterator{ - ctx: ctx, - source: source, - evict: evict, - priority: priority, + ctx: ctx, + source: source, + evict: evict, + priority: priority, + algorithm: algorithm, } + iter.ctx.Logger().Named("binpack").Trace("NewBinPackIterator created", "algorithm", algorithm) return iter } @@ -436,7 +439,7 @@ OUTER: } // Score the fit normally otherwise - fitness := structs.ScoreFit(option.Node, util) + fitness := structs.ScoreFit(option.Node, util, iter.algorithm) normalizedFit := fitness / binPackingMaxFitScore option.Scores = append(option.Scores, normalizedFit) iter.ctx.Metrics().ScoreNode(option.Node, "binpack", normalizedFit) diff --git a/scheduler/stack.go b/scheduler/stack.go index 6fd6df12200f..d1a484fd947c 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -238,10 +238,13 @@ func NewSystemStack(ctx Context) *SystemStack { // priority. _, schedConfig, _ := s.ctx.State().SchedulerConfig() enablePreemption := true + schedulerAlgorithm := "binpack" if schedConfig != nil { enablePreemption = schedConfig.PreemptionConfig.SystemSchedulerEnabled + schedulerAlgorithm = schedConfig.SchedulerAlgorithm } - s.binPack = NewBinPackIterator(ctx, rankSource, enablePreemption, 0) + + s.binPack = NewBinPackIterator(ctx, rankSource, enablePreemption, 0, schedulerAlgorithm) // Apply score normalization s.scoreNorm = NewScoreNormalizationIterator(ctx, s.binPack) diff --git a/scheduler/stack_oss.go b/scheduler/stack_oss.go index 50ff5a523258..8aaa98031043 100644 --- a/scheduler/stack_oss.go +++ b/scheduler/stack_oss.go @@ -60,7 +60,13 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { // Apply the bin packing, this depends on the resources needed // by a particular task group. - s.binPack = NewBinPackIterator(ctx, rankSource, false, 0) + _, schedConfig, _ := s.ctx.State().SchedulerConfig() + schedulerAlgorithm := "binpack" + if schedConfig != nil { + schedulerAlgorithm = schedConfig.SchedulerAlgorithm + } + + s.binPack = NewBinPackIterator(ctx, rankSource, false, 0, schedulerAlgorithm) // Apply the job anti-affinity iterator. This is to avoid placing // multiple allocations on the same node for this job. From 5078e0cfed88d1fc9f7704931ca4014b01a892f9 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Fri, 24 Apr 2020 10:47:43 -0400 Subject: [PATCH 3/4] tests and some clean up --- api/operator.go | 7 ++- command/agent/agent_test.go | 1 + command/agent/command.go | 5 ++ command/agent/config_parse_test.go | 1 + command/agent/operator_endpoint.go | 10 ++-- command/agent/testdata/basic.hcl | 2 + command/agent/testdata/basic.json | 1 + nomad/structs/funcs.go | 40 +++++++++++--- nomad/structs/funcs_test.go | 89 +++++++++++++++--------------- nomad/structs/operator.go | 40 +++++++++----- scheduler/preemption_test.go | 2 +- scheduler/rank.go | 22 +++++--- scheduler/rank_test.go | 14 ++--- scheduler/stack.go | 3 +- scheduler/stack_oss.go | 5 +- 15 files changed, 147 insertions(+), 95 deletions(-) diff --git a/api/operator.go b/api/operator.go index f46216f31049..5d4254442198 100644 --- a/api/operator.go +++ b/api/operator.go @@ -115,7 +115,7 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error { // SchedulerConfiguration is the config for controlling scheduler behavior type SchedulerConfiguration struct { // SchedulerAlgorithm lets you select between available scheduling algorithms. - SchedulerAlgorithm string + SchedulerAlgorithm SchedulerAlgorithm // PreemptionConfig specifies whether to enable eviction of lower // priority jobs to place higher priority jobs. @@ -149,6 +149,11 @@ type SchedulerSetConfigurationResponse struct { // scheduler to be user-selectable. type SchedulerAlgorithm string +const ( + SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack" + SchedulerAlgorithmSpread SchedulerAlgorithm = "spread" +) + // PreemptionConfig specifies whether preemption is enabled based on scheduler type type PreemptionConfig struct { SystemSchedulerEnabled bool diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index c5d5e232f81e..da278533b5e9 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -184,6 +184,7 @@ func TestAgent_ServerConfig_SchedulerFlags(t *testing.T) { "default case", nil, structs.SchedulerConfiguration{ + SchedulerAlgorithm: "binpack", PreemptionConfig: structs.PreemptionConfig{ SystemSchedulerEnabled: true, }, diff --git a/command/agent/command.go b/command/agent/command.go index bb1a2cda2a08..7546738e0a94 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -377,6 +377,11 @@ func (c *Command) isValidConfig(config, cmdConfig *Config) bool { c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") } + if err := config.Server.DefaultSchedulerConfig.Validate(); err != nil { + c.Ui.Error(err.Error()) + return false + } + return true } diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go index cd1284774d7f..157413ec0335 100644 --- a/command/agent/config_parse_test.go +++ b/command/agent/config_parse_test.go @@ -126,6 +126,7 @@ var basicConfig = &Config{ RetryMaxAttempts: 3, }, DefaultSchedulerConfig: &structs.SchedulerConfiguration{ + SchedulerAlgorithm: "spread", PreemptionConfig: structs.PreemptionConfig{ SystemSchedulerEnabled: true, BatchSchedulerEnabled: true, diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go index 8abe7cdf57ff..64d0ea25874b 100644 --- a/command/agent/operator_endpoint.go +++ b/command/agent/operator_endpoint.go @@ -253,18 +253,18 @@ func (s *HTTPServer) schedulerUpdateConfig(resp http.ResponseWriter, req *http.R return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing scheduler config: %v", err)) } - if !structs.SchedulerAlgorithmIsValid(conf.SchedulerAlgorithm) { - return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Invalid scheduler algorithm selected.")) - } - args.Config = structs.SchedulerConfiguration{ - SchedulerAlgorithm: conf.SchedulerAlgorithm, + SchedulerAlgorithm: structs.SchedulerAlgorithm(conf.SchedulerAlgorithm), PreemptionConfig: structs.PreemptionConfig{ SystemSchedulerEnabled: conf.PreemptionConfig.SystemSchedulerEnabled, BatchSchedulerEnabled: conf.PreemptionConfig.BatchSchedulerEnabled, ServiceSchedulerEnabled: conf.PreemptionConfig.ServiceSchedulerEnabled}, } + if err := args.Config.Validate(); err != nil { + return nil, CodedError(http.StatusBadRequest, err.Error()) + } + // Check for cas value params := req.URL.Query() if _, ok := params["cas"]; ok { diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl index a4edcfef6559..78d8d63eff8a 100644 --- a/command/agent/testdata/basic.hcl +++ b/command/agent/testdata/basic.hcl @@ -135,6 +135,8 @@ server { } default_scheduler_config { + scheduler_algorithm = "spread" + preemption_config { batch_scheduler_enabled = true system_scheduler_enabled = true diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json index 0052b68622d2..b02e0db8868a 100644 --- a/command/agent/testdata/basic.json +++ b/command/agent/testdata/basic.json @@ -297,6 +297,7 @@ "2.2.2.2" ], "default_scheduler_config": [{ + "scheduler_algorithm": "spread", "preemption_config": [{ "batch_scheduler_enabled": true, "system_scheduler_enabled": true, diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go index bb5bb65e86cc..855ab7ff0e10 100644 --- a/nomad/structs/funcs.go +++ b/nomad/structs/funcs.go @@ -148,10 +148,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi return true, "", used, nil } -// ScoreFit is used to score the fit based on the Google work published here: -// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt -// This is equivalent to their BestFit v3 -func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { +func computeFreePercentage(node *Node, util *ComparableResources) (freePctCpu, freePctRam float64) { // COMPAT(0.11): Remove in 0.11 reserved := node.ComparableReservedResources() res := node.ComparableResources() @@ -165,8 +162,18 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { } // Compute the free percentage - freePctCpu := 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu) - freePctRam := 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem) + freePctCpu = 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu) + freePctRam = 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem) + return freePctCpu, freePctRam +} + +// ScoreFitBinPack computes a fit score to achieve pinbacking behavior. +// Score is in [0, 18] +// +// It's the BestFit v3 on the Google work published here: +// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt +func ScoreFitBinPack(node *Node, util *ComparableResources) float64 { + freePctCpu, freePctRam := computeFreePercentage(node, util) // Total will be "maximized" the smaller the value is. // At 100% utilization, the total is 2, while at 0% util it is 20. @@ -176,9 +183,6 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { // score. Because the floor is 20, we simply use that as an anchor. // This means at a perfect fit, we return 18 as the score. score := 20.0 - total - if algorithm == "spread" { - score = total - 2 - } // Bound the score, just in case // If the score is over 18, that means we've overfit the node. @@ -190,6 +194,24 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 { return score } +// ScoreFitBinSpread computes a fit score to achieve spread behavior. +// Score is in [0, 18] +// +// This is equivalent to Worst Fit of +// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt +func ScoreFitSpread(node *Node, util *ComparableResources) float64 { + freePctCpu, freePctRam := computeFreePercentage(node, util) + total := math.Pow(10, freePctCpu) + math.Pow(10, freePctRam) + score := total - 2 + + if score > 18.0 { + score = 18.0 + } else if score < 0 { + score = 0 + } + return score +} + func CopySliceConstraints(s []*Constraint) []*Constraint { l := len(s) if l == 0 { diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go index 4302164a61fc..62236f0cd0df 100644 --- a/nomad/structs/funcs_test.go +++ b/nomad/structs/funcs_test.go @@ -506,7 +506,7 @@ func TestAllocsFit_Devices(t *testing.T) { } // COMPAT(0.11): Remove in 0.11 -func TestScoreFit_Old(t *testing.T) { +func TestScoreFitBinPack_Old(t *testing.T) { node := &Node{} node.Resources = &Resources{ CPU: 4096, @@ -528,7 +528,7 @@ func TestScoreFit_Old(t *testing.T) { }, }, } - score := ScoreFit(node, util) + score := ScoreFitBinPack(node, util) if score != 18.0 { t.Fatalf("bad: %v", score) } @@ -544,7 +544,7 @@ func TestScoreFit_Old(t *testing.T) { }, }, } - score = ScoreFit(node, util) + score = ScoreFitBinPack(node, util) if score != 0.0 { t.Fatalf("bad: %v", score) } @@ -560,13 +560,13 @@ func TestScoreFit_Old(t *testing.T) { }, }, } - score = ScoreFit(node, util) + score = ScoreFitBinPack(node, util) if score < 10.0 || score > 16.0 { t.Fatalf("bad: %v", score) } } -func TestScoreFit(t *testing.T) { +func TestScoreFitBinPack(t *testing.T) { node := &Node{} node.NodeResources = &NodeResources{ Cpu: NodeCpuResources{ @@ -585,52 +585,53 @@ func TestScoreFit(t *testing.T) { }, } - // Test a perfect fit - util := &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 2048, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 4096, + cases := []struct { + name string + flattened AllocatedTaskResources + binPackScore float64 + spreadScore float64 + }{ + { + name: "almost filled node, but with just enough whole", + flattened: AllocatedTaskResources{ + Cpu: AllocatedCpuResources{CpuShares: 2048}, + Memory: AllocatedMemoryResources{MemoryMB: 4096}, }, + binPackScore: 18, + spreadScore: 0, }, - } - score := ScoreFit(node, util) - if score != 18.0 { - t.Fatalf("bad: %v", score) - } - - // Test the worst fit - util = &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 0, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 0, + { + name: "unutilized node", + flattened: AllocatedTaskResources{ + Cpu: AllocatedCpuResources{CpuShares: 0}, + Memory: AllocatedMemoryResources{MemoryMB: 0}, }, + binPackScore: 0, + spreadScore: 18, }, - } - score = ScoreFit(node, util) - if score != 0.0 { - t.Fatalf("bad: %v", score) - } - - // Test a mid-case scenario - util = &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 1024, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 2048, + { + name: "mid-case scnario", + flattened: AllocatedTaskResources{ + Cpu: AllocatedCpuResources{CpuShares: 1024}, + Memory: AllocatedMemoryResources{MemoryMB: 2048}, }, + binPackScore: 13.675, + spreadScore: 4.325, }, } - score = ScoreFit(node, util) - if score < 10.0 || score > 16.0 { - t.Fatalf("bad: %v", score) + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + util := &ComparableResources{Flattened: c.flattened} + + binPackScore := ScoreFitBinPack(node, util) + require.InDelta(t, c.binPackScore, binPackScore, 0.001, "binpack score") + + spreadScore := ScoreFitSpread(node, util) + require.InDelta(t, c.spreadScore, spreadScore, 0.001, "spread score") + + require.InDelta(t, 18, binPackScore+spreadScore, 0.001, "score sum") + }) } } diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go index 101ce63514f3..38d181176000 100644 --- a/nomad/structs/operator.go +++ b/nomad/structs/operator.go @@ -1,6 +1,7 @@ package structs import ( + "fmt" "time" "github.com/hashicorp/raft" @@ -132,28 +133,17 @@ type SchedulerAlgorithm string const ( // SchedulerAlgorithmBinpack indicates that the scheduler should spread // allocations as evenly as possible over the available hardware. - SchedulerAlgorithmBinpack string = "binpack" + SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack" // SchedulerAlgorithmSpread indicates that the scheduler should spread // allocations as evenly as possible over the available hardware. - SchedulerAlgorithmSpread string = "spread" + SchedulerAlgorithmSpread SchedulerAlgorithm = "spread" ) -// SchedulerAlgorithmIsValid validates the given SchedulerAlgorithm string and -// returns true only when a correct algorithm is specified. -func SchedulerAlgorithmIsValid(alg string) bool { - switch alg { - case SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread: - return true - default: - return false - } -} - // SchedulerConfiguration is the config for controlling scheduler behavior type SchedulerConfiguration struct { // SchedulerAlgorithm lets you select between available scheduling algorithms. - SchedulerAlgorithm string `hcl:"scheduler_algorithm"` + SchedulerAlgorithm SchedulerAlgorithm `hcl:"scheduler_algorithm"` // PreemptionConfig specifies whether to enable eviction of lower // priority jobs to place higher priority jobs. @@ -164,6 +154,28 @@ type SchedulerConfiguration struct { ModifyIndex uint64 } +func (s *SchedulerConfiguration) EffectiveSchedulerAlgorithm() SchedulerAlgorithm { + if s == nil || s.SchedulerAlgorithm == "" { + return SchedulerAlgorithmBinpack + } + + return s.SchedulerAlgorithm +} + +func (s *SchedulerConfiguration) Validate() error { + if s == nil { + return nil + } + + switch s.SchedulerAlgorithm { + case "", SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread: + default: + return fmt.Errorf("invalid scheduler algorithm: %v", s.SchedulerAlgorithm) + } + + return nil +} + // SchedulerConfigurationResponse is the response object that wraps SchedulerConfiguration type SchedulerConfigurationResponse struct { // SchedulerConfig contains scheduler config options diff --git a/scheduler/preemption_test.go b/scheduler/preemption_test.go index 798c3985cc94..f8a43d7d90f2 100644 --- a/scheduler/preemption_test.go +++ b/scheduler/preemption_test.go @@ -1349,7 +1349,7 @@ func TestPreemption(t *testing.T) { ctx.plan.NodePreemptions[node.ID] = tc.currentPreemptions } static := NewStaticRankIterator(ctx, nodes) - binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority) + binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority, structs.SchedulerAlgorithmBinpack) job := mock.Job() job.Priority = tc.jobPriority binPackIter.SetJob(job) diff --git a/scheduler/rank.go b/scheduler/rank.go index 6556ab7af72c..06ef9c452f98 100644 --- a/scheduler/rank.go +++ b/scheduler/rank.go @@ -151,20 +151,26 @@ type BinPackIterator struct { source RankIterator evict bool priority int - algorithm string jobId *structs.NamespacedID taskGroup *structs.TaskGroup + scoreFit func(*structs.Node, *structs.ComparableResources) float64 } // NewBinPackIterator returns a BinPackIterator which tries to fit tasks // potentially evicting other tasks based on a given priority. -func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm string) *BinPackIterator { +func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm structs.SchedulerAlgorithm) *BinPackIterator { + + scoreFn := structs.ScoreFitBinPack + if algorithm == structs.SchedulerAlgorithmSpread { + scoreFn = structs.ScoreFitSpread + } + iter := &BinPackIterator{ - ctx: ctx, - source: source, - evict: evict, - priority: priority, - algorithm: algorithm, + ctx: ctx, + source: source, + evict: evict, + priority: priority, + scoreFit: scoreFn, } iter.ctx.Logger().Named("binpack").Trace("NewBinPackIterator created", "algorithm", algorithm) return iter @@ -439,7 +445,7 @@ OUTER: } // Score the fit normally otherwise - fitness := structs.ScoreFit(option.Node, util, iter.algorithm) + fitness := iter.scoreFit(option.Node, util) normalizedFit := fitness / binPackingMaxFitScore option.Scores = append(option.Scores, normalizedFit) iter.ctx.Metrics().ScoreNode(option.Node, "binpack", normalizedFit) diff --git a/scheduler/rank_test.go b/scheduler/rank_test.go index aa1ee01a1f02..7454d075a217 100644 --- a/scheduler/rank_test.go +++ b/scheduler/rank_test.go @@ -107,7 +107,7 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) { }, }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -220,7 +220,7 @@ func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) { }, }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -471,7 +471,7 @@ func TestBinPackIterator_Network_Failure(t *testing.T) { }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -569,7 +569,7 @@ func TestBinPackIterator_PlannedAlloc(t *testing.T) { }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -685,7 +685,7 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) { }, }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -805,7 +805,7 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { }, } - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(taskGroup) scoreNorm := NewScoreNormalizationIterator(ctx, binp) @@ -1110,7 +1110,7 @@ func TestBinPackIterator_Devices(t *testing.T) { } static := NewStaticRankIterator(ctx, []*RankedNode{{Node: c.Node}}) - binp := NewBinPackIterator(ctx, static, false, 0) + binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack) binp.SetTaskGroup(c.TaskGroup) out := binp.Next() diff --git a/scheduler/stack.go b/scheduler/stack.go index d1a484fd947c..b9ceb19bb31e 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -237,11 +237,10 @@ func NewSystemStack(ctx Context) *SystemStack { // by a particular task group. Enable eviction as system jobs are high // priority. _, schedConfig, _ := s.ctx.State().SchedulerConfig() + schedulerAlgorithm := schedConfig.EffectiveSchedulerAlgorithm() enablePreemption := true - schedulerAlgorithm := "binpack" if schedConfig != nil { enablePreemption = schedConfig.PreemptionConfig.SystemSchedulerEnabled - schedulerAlgorithm = schedConfig.SchedulerAlgorithm } s.binPack = NewBinPackIterator(ctx, rankSource, enablePreemption, 0, schedulerAlgorithm) diff --git a/scheduler/stack_oss.go b/scheduler/stack_oss.go index 8aaa98031043..a705b8c7395b 100644 --- a/scheduler/stack_oss.go +++ b/scheduler/stack_oss.go @@ -61,10 +61,7 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { // Apply the bin packing, this depends on the resources needed // by a particular task group. _, schedConfig, _ := s.ctx.State().SchedulerConfig() - schedulerAlgorithm := "binpack" - if schedConfig != nil { - schedulerAlgorithm = schedConfig.SchedulerAlgorithm - } + schedulerAlgorithm := schedConfig.EffectiveSchedulerAlgorithm() s.binPack = NewBinPackIterator(ctx, rankSource, false, 0, schedulerAlgorithm) From 9962b9fe7ac2969731477c888a79a1e7b4a3369f Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Fri, 1 May 2020 13:12:52 -0400 Subject: [PATCH 4/4] changelog and fix typo --- CHANGELOG.md | 4 ++++ nomad/structs/funcs_test.go | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e19855fb26bf..218d053bcca2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ FEATURES: * **Task dependencies UI**: task lifecycle charts and details +IMPROVEMENTS: + + * core: Allow spreading allocations as an alternative to binpacking [[GH-7810](https://github.com/hashicorp/nomad/issues/7810)] + BUG FIXES: * api: autoscaling policies should not be returned for stopped jobs [[GH-7768](https://github.com/hashicorp/nomad/issues/7768)] diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go index 62236f0cd0df..725909abeccf 100644 --- a/nomad/structs/funcs_test.go +++ b/nomad/structs/funcs_test.go @@ -592,7 +592,7 @@ func TestScoreFitBinPack(t *testing.T) { spreadScore float64 }{ { - name: "almost filled node, but with just enough whole", + name: "almost filled node, but with just enough hole", flattened: AllocatedTaskResources{ Cpu: AllocatedCpuResources{CpuShares: 2048}, Memory: AllocatedMemoryResources{MemoryMB: 4096},