From 6571ccefbc13e6dbed7bd53fcbe1fe9666f2d312 Mon Sep 17 00:00:00 2001
From: Charlie Voiselle <464492+angrycub@users.noreply.github.com>
Date: Fri, 3 Apr 2020 19:03:14 -0400
Subject: [PATCH 1/4] Add SchedulerAlgorithm to SchedulerConfig

---
 api/operator.go                    |  9 +++++++++
 command/agent/operator_endpoint.go |  7 +++++++
 nomad/config.go                    |  1 +
 nomad/structs/operator.go          | 29 +++++++++++++++++++++++++++++
 4 files changed, 46 insertions(+)

diff --git a/api/operator.go b/api/operator.go
index d215326f4407..f46216f31049 100644
--- a/api/operator.go
+++ b/api/operator.go
@@ -112,7 +112,11 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error {
 	return nil
 }
 
+// SchedulerConfiguration is the config for controlling scheduler behavior
 type SchedulerConfiguration struct {
+	// SchedulerAlgorithm lets you select between available scheduling algorithms.
+	SchedulerAlgorithm string
+
 	// PreemptionConfig specifies whether to enable eviction of lower
 	// priority jobs to place higher priority jobs.
 	PreemptionConfig PreemptionConfig
@@ -140,6 +144,11 @@ type SchedulerSetConfigurationResponse struct {
 	WriteMeta
 }
 
+// SchedulerAlgorithm is an enum string that encapsulates the valid options for a
+// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the
+// scheduler to be user-selectable.
+type SchedulerAlgorithm string
+
 // PreemptionConfig specifies whether preemption is enabled based on scheduler type
 type PreemptionConfig struct {
 	SystemSchedulerEnabled  bool
diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go
index 7ac48c0fdf6c..8abe7cdf57ff 100644
--- a/command/agent/operator_endpoint.go
+++ b/command/agent/operator_endpoint.go
@@ -14,6 +14,8 @@ import (
 	"github.com/hashicorp/raft"
 )
 
+// OperatorRequest is used route operator/raft API requests to the implementing
+// functions.
 func (s *HTTPServer) OperatorRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
 	path := strings.TrimPrefix(req.URL.Path, "/v1/operator/raft/")
 	switch {
@@ -251,7 +253,12 @@ func (s *HTTPServer) schedulerUpdateConfig(resp http.ResponseWriter, req *http.R
 		return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing scheduler config: %v", err))
 	}
 
+	if !structs.SchedulerAlgorithmIsValid(conf.SchedulerAlgorithm) {
+		return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Invalid scheduler algorithm selected."))
+	}
+
 	args.Config = structs.SchedulerConfiguration{
+		SchedulerAlgorithm: conf.SchedulerAlgorithm,
 		PreemptionConfig: structs.PreemptionConfig{
 			SystemSchedulerEnabled:  conf.PreemptionConfig.SystemSchedulerEnabled,
 			BatchSchedulerEnabled:   conf.PreemptionConfig.BatchSchedulerEnabled,
diff --git a/nomad/config.go b/nomad/config.go
index 8a0fb7a181b7..8b4fc1c2568a 100644
--- a/nomad/config.go
+++ b/nomad/config.go
@@ -403,6 +403,7 @@ func DefaultConfig() *Config {
 		ServerHealthInterval: 2 * time.Second,
 		AutopilotInterval:    10 * time.Second,
 		DefaultSchedulerConfig: structs.SchedulerConfiguration{
+			SchedulerAlgorithm: structs.SchedulerAlgorithmBinpack,
 			PreemptionConfig: structs.PreemptionConfig{
 				SystemSchedulerEnabled:  true,
 				BatchSchedulerEnabled:   false,
diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go
index 03d8caedb352..101ce63514f3 100644
--- a/nomad/structs/operator.go
+++ b/nomad/structs/operator.go
@@ -124,8 +124,37 @@ type AutopilotConfig struct {
 	ModifyIndex uint64
 }
 
+// SchedulerAlgorithm is an enum string that encapsulates the valid options for a
+// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the
+// scheduler to be user-selectable.
+type SchedulerAlgorithm string
+
+const (
+	// SchedulerAlgorithmBinpack indicates that the scheduler should spread
+	// allocations as evenly as possible over the available hardware.
+	SchedulerAlgorithmBinpack string = "binpack"
+
+	// SchedulerAlgorithmSpread indicates that the scheduler should spread
+	// allocations as evenly as possible over the available hardware.
+	SchedulerAlgorithmSpread string = "spread"
+)
+
+// SchedulerAlgorithmIsValid validates the given SchedulerAlgorithm string and
+// returns true only when a correct algorithm is specified.
+func SchedulerAlgorithmIsValid(alg string) bool {
+	switch alg {
+	case SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread:
+		return true
+	default:
+		return false
+	}
+}
+
 // SchedulerConfiguration is the config for controlling scheduler behavior
 type SchedulerConfiguration struct {
+	// SchedulerAlgorithm lets you select between available scheduling algorithms.
+	SchedulerAlgorithm string `hcl:"scheduler_algorithm"`
+
 	// PreemptionConfig specifies whether to enable eviction of lower
 	// priority jobs to place higher priority jobs.
 	PreemptionConfig PreemptionConfig `hcl:"preemption_config"`

From 1af6a2adf15a663991df194313ffe3f87b75dd4a Mon Sep 17 00:00:00 2001
From: Charlie Voiselle <464492+angrycub@users.noreply.github.com>
Date: Wed, 8 Apr 2020 15:59:16 -0400
Subject: [PATCH 2/4] Wiring algorithm to scheduler calls

---
 nomad/structs/funcs.go |  5 ++++-
 scheduler/rank.go      | 15 +++++++++------
 scheduler/stack.go     |  5 ++++-
 scheduler/stack_oss.go |  8 +++++++-
 4 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go
index cee0dd4067bc..bb5bb65e86cc 100644
--- a/nomad/structs/funcs.go
+++ b/nomad/structs/funcs.go
@@ -151,7 +151,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
 // ScoreFit is used to score the fit based on the Google work published here:
 // http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
 // This is equivalent to their BestFit v3
-func ScoreFit(node *Node, util *ComparableResources) float64 {
+func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 {
 	// COMPAT(0.11): Remove in 0.11
 	reserved := node.ComparableReservedResources()
 	res := node.ComparableResources()
@@ -176,6 +176,9 @@ func ScoreFit(node *Node, util *ComparableResources) float64 {
 	// score. Because the floor is 20, we simply use that as an anchor.
 	// This means at a perfect fit, we return 18 as the score.
 	score := 20.0 - total
+	if algorithm == "spread" {
+		score = total - 2
+	}
 
 	// Bound the score, just in case
 	// If the score is over 18, that means we've overfit the node.
diff --git a/scheduler/rank.go b/scheduler/rank.go
index e8633340f7c0..6556ab7af72c 100644
--- a/scheduler/rank.go
+++ b/scheduler/rank.go
@@ -151,19 +151,22 @@ type BinPackIterator struct {
 	source    RankIterator
 	evict     bool
 	priority  int
+	algorithm string
 	jobId     *structs.NamespacedID
 	taskGroup *structs.TaskGroup
 }
 
 // NewBinPackIterator returns a BinPackIterator which tries to fit tasks
 // potentially evicting other tasks based on a given priority.
-func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int) *BinPackIterator {
+func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm string) *BinPackIterator {
 	iter := &BinPackIterator{
-		ctx:      ctx,
-		source:   source,
-		evict:    evict,
-		priority: priority,
+		ctx:       ctx,
+		source:    source,
+		evict:     evict,
+		priority:  priority,
+		algorithm: algorithm,
 	}
+	iter.ctx.Logger().Named("binpack").Trace("NewBinPackIterator created", "algorithm", algorithm)
 	return iter
 }
 
@@ -436,7 +439,7 @@ OUTER:
 		}
 
 		// Score the fit normally otherwise
-		fitness := structs.ScoreFit(option.Node, util)
+		fitness := structs.ScoreFit(option.Node, util, iter.algorithm)
 		normalizedFit := fitness / binPackingMaxFitScore
 		option.Scores = append(option.Scores, normalizedFit)
 		iter.ctx.Metrics().ScoreNode(option.Node, "binpack", normalizedFit)
diff --git a/scheduler/stack.go b/scheduler/stack.go
index 6fd6df12200f..d1a484fd947c 100644
--- a/scheduler/stack.go
+++ b/scheduler/stack.go
@@ -238,10 +238,13 @@ func NewSystemStack(ctx Context) *SystemStack {
 	// priority.
 	_, schedConfig, _ := s.ctx.State().SchedulerConfig()
 	enablePreemption := true
+	schedulerAlgorithm := "binpack"
 	if schedConfig != nil {
 		enablePreemption = schedConfig.PreemptionConfig.SystemSchedulerEnabled
+		schedulerAlgorithm = schedConfig.SchedulerAlgorithm
 	}
-	s.binPack = NewBinPackIterator(ctx, rankSource, enablePreemption, 0)
+
+	s.binPack = NewBinPackIterator(ctx, rankSource, enablePreemption, 0, schedulerAlgorithm)
 
 	// Apply score normalization
 	s.scoreNorm = NewScoreNormalizationIterator(ctx, s.binPack)
diff --git a/scheduler/stack_oss.go b/scheduler/stack_oss.go
index 50ff5a523258..8aaa98031043 100644
--- a/scheduler/stack_oss.go
+++ b/scheduler/stack_oss.go
@@ -60,7 +60,13 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack {
 
 	// Apply the bin packing, this depends on the resources needed
 	// by a particular task group.
-	s.binPack = NewBinPackIterator(ctx, rankSource, false, 0)
+	_, schedConfig, _ := s.ctx.State().SchedulerConfig()
+	schedulerAlgorithm := "binpack"
+	if schedConfig != nil {
+		schedulerAlgorithm = schedConfig.SchedulerAlgorithm
+	}
+
+	s.binPack = NewBinPackIterator(ctx, rankSource, false, 0, schedulerAlgorithm)
 
 	// Apply the job anti-affinity iterator. This is to avoid placing
 	// multiple allocations on the same node for this job.

From 5078e0cfed88d1fc9f7704931ca4014b01a892f9 Mon Sep 17 00:00:00 2001
From: Mahmood Ali <mahmood@hashicorp.com>
Date: Fri, 24 Apr 2020 10:47:43 -0400
Subject: [PATCH 3/4] tests and some clean up

---
 api/operator.go                    |  7 ++-
 command/agent/agent_test.go        |  1 +
 command/agent/command.go           |  5 ++
 command/agent/config_parse_test.go |  1 +
 command/agent/operator_endpoint.go | 10 ++--
 command/agent/testdata/basic.hcl   |  2 +
 command/agent/testdata/basic.json  |  1 +
 nomad/structs/funcs.go             | 40 +++++++++++---
 nomad/structs/funcs_test.go        | 89 +++++++++++++++---------------
 nomad/structs/operator.go          | 40 +++++++++-----
 scheduler/preemption_test.go       |  2 +-
 scheduler/rank.go                  | 22 +++++---
 scheduler/rank_test.go             | 14 ++---
 scheduler/stack.go                 |  3 +-
 scheduler/stack_oss.go             |  5 +-
 15 files changed, 147 insertions(+), 95 deletions(-)

diff --git a/api/operator.go b/api/operator.go
index f46216f31049..5d4254442198 100644
--- a/api/operator.go
+++ b/api/operator.go
@@ -115,7 +115,7 @@ func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error {
 // SchedulerConfiguration is the config for controlling scheduler behavior
 type SchedulerConfiguration struct {
 	// SchedulerAlgorithm lets you select between available scheduling algorithms.
-	SchedulerAlgorithm string
+	SchedulerAlgorithm SchedulerAlgorithm
 
 	// PreemptionConfig specifies whether to enable eviction of lower
 	// priority jobs to place higher priority jobs.
@@ -149,6 +149,11 @@ type SchedulerSetConfigurationResponse struct {
 // scheduler to be user-selectable.
 type SchedulerAlgorithm string
 
+const (
+	SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack"
+	SchedulerAlgorithmSpread  SchedulerAlgorithm = "spread"
+)
+
 // PreemptionConfig specifies whether preemption is enabled based on scheduler type
 type PreemptionConfig struct {
 	SystemSchedulerEnabled  bool
diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go
index c5d5e232f81e..da278533b5e9 100644
--- a/command/agent/agent_test.go
+++ b/command/agent/agent_test.go
@@ -184,6 +184,7 @@ func TestAgent_ServerConfig_SchedulerFlags(t *testing.T) {
 			"default case",
 			nil,
 			structs.SchedulerConfiguration{
+				SchedulerAlgorithm: "binpack",
 				PreemptionConfig: structs.PreemptionConfig{
 					SystemSchedulerEnabled: true,
 				},
diff --git a/command/agent/command.go b/command/agent/command.go
index bb1a2cda2a08..7546738e0a94 100644
--- a/command/agent/command.go
+++ b/command/agent/command.go
@@ -377,6 +377,11 @@ func (c *Command) isValidConfig(config, cmdConfig *Config) bool {
 		c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
 	}
 
+	if err := config.Server.DefaultSchedulerConfig.Validate(); err != nil {
+		c.Ui.Error(err.Error())
+		return false
+	}
+
 	return true
 }
 
diff --git a/command/agent/config_parse_test.go b/command/agent/config_parse_test.go
index cd1284774d7f..157413ec0335 100644
--- a/command/agent/config_parse_test.go
+++ b/command/agent/config_parse_test.go
@@ -126,6 +126,7 @@ var basicConfig = &Config{
 			RetryMaxAttempts: 3,
 		},
 		DefaultSchedulerConfig: &structs.SchedulerConfiguration{
+			SchedulerAlgorithm: "spread",
 			PreemptionConfig: structs.PreemptionConfig{
 				SystemSchedulerEnabled:  true,
 				BatchSchedulerEnabled:   true,
diff --git a/command/agent/operator_endpoint.go b/command/agent/operator_endpoint.go
index 8abe7cdf57ff..64d0ea25874b 100644
--- a/command/agent/operator_endpoint.go
+++ b/command/agent/operator_endpoint.go
@@ -253,18 +253,18 @@ func (s *HTTPServer) schedulerUpdateConfig(resp http.ResponseWriter, req *http.R
 		return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Error parsing scheduler config: %v", err))
 	}
 
-	if !structs.SchedulerAlgorithmIsValid(conf.SchedulerAlgorithm) {
-		return nil, CodedError(http.StatusBadRequest, fmt.Sprintf("Invalid scheduler algorithm selected."))
-	}
-
 	args.Config = structs.SchedulerConfiguration{
-		SchedulerAlgorithm: conf.SchedulerAlgorithm,
+		SchedulerAlgorithm: structs.SchedulerAlgorithm(conf.SchedulerAlgorithm),
 		PreemptionConfig: structs.PreemptionConfig{
 			SystemSchedulerEnabled:  conf.PreemptionConfig.SystemSchedulerEnabled,
 			BatchSchedulerEnabled:   conf.PreemptionConfig.BatchSchedulerEnabled,
 			ServiceSchedulerEnabled: conf.PreemptionConfig.ServiceSchedulerEnabled},
 	}
 
+	if err := args.Config.Validate(); err != nil {
+		return nil, CodedError(http.StatusBadRequest, err.Error())
+	}
+
 	// Check for cas value
 	params := req.URL.Query()
 	if _, ok := params["cas"]; ok {
diff --git a/command/agent/testdata/basic.hcl b/command/agent/testdata/basic.hcl
index a4edcfef6559..78d8d63eff8a 100644
--- a/command/agent/testdata/basic.hcl
+++ b/command/agent/testdata/basic.hcl
@@ -135,6 +135,8 @@ server {
   }
 
   default_scheduler_config {
+    scheduler_algorithm = "spread"
+
     preemption_config {
       batch_scheduler_enabled   = true
       system_scheduler_enabled  = true
diff --git a/command/agent/testdata/basic.json b/command/agent/testdata/basic.json
index 0052b68622d2..b02e0db8868a 100644
--- a/command/agent/testdata/basic.json
+++ b/command/agent/testdata/basic.json
@@ -297,6 +297,7 @@
         "2.2.2.2"
       ],
       "default_scheduler_config": [{
+        "scheduler_algorithm": "spread",
         "preemption_config": [{
           "batch_scheduler_enabled": true,
           "system_scheduler_enabled": true,
diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go
index bb5bb65e86cc..855ab7ff0e10 100644
--- a/nomad/structs/funcs.go
+++ b/nomad/structs/funcs.go
@@ -148,10 +148,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
 	return true, "", used, nil
 }
 
-// ScoreFit is used to score the fit based on the Google work published here:
-// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
-// This is equivalent to their BestFit v3
-func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 {
+func computeFreePercentage(node *Node, util *ComparableResources) (freePctCpu, freePctRam float64) {
 	// COMPAT(0.11): Remove in 0.11
 	reserved := node.ComparableReservedResources()
 	res := node.ComparableResources()
@@ -165,8 +162,18 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 {
 	}
 
 	// Compute the free percentage
-	freePctCpu := 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu)
-	freePctRam := 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem)
+	freePctCpu = 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu)
+	freePctRam = 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem)
+	return freePctCpu, freePctRam
+}
+
+// ScoreFitBinPack computes a fit score to achieve pinbacking behavior.
+// Score is in [0, 18]
+//
+// It's the BestFit v3 on the Google work published here:
+// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
+func ScoreFitBinPack(node *Node, util *ComparableResources) float64 {
+	freePctCpu, freePctRam := computeFreePercentage(node, util)
 
 	// Total will be "maximized" the smaller the value is.
 	// At 100% utilization, the total is 2, while at 0% util it is 20.
@@ -176,9 +183,6 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 {
 	// score. Because the floor is 20, we simply use that as an anchor.
 	// This means at a perfect fit, we return 18 as the score.
 	score := 20.0 - total
-	if algorithm == "spread" {
-		score = total - 2
-	}
 
 	// Bound the score, just in case
 	// If the score is over 18, that means we've overfit the node.
@@ -190,6 +194,24 @@ func ScoreFit(node *Node, util *ComparableResources, algorithm string) float64 {
 	return score
 }
 
+// ScoreFitBinSpread computes a fit score to achieve spread behavior.
+// Score is in [0, 18]
+//
+// This is equivalent to Worst Fit of
+// http://www.columbia.edu/~cs2035/courses/ieor4405.S13/datacenter_scheduling.ppt
+func ScoreFitSpread(node *Node, util *ComparableResources) float64 {
+	freePctCpu, freePctRam := computeFreePercentage(node, util)
+	total := math.Pow(10, freePctCpu) + math.Pow(10, freePctRam)
+	score := total - 2
+
+	if score > 18.0 {
+		score = 18.0
+	} else if score < 0 {
+		score = 0
+	}
+	return score
+}
+
 func CopySliceConstraints(s []*Constraint) []*Constraint {
 	l := len(s)
 	if l == 0 {
diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go
index 4302164a61fc..62236f0cd0df 100644
--- a/nomad/structs/funcs_test.go
+++ b/nomad/structs/funcs_test.go
@@ -506,7 +506,7 @@ func TestAllocsFit_Devices(t *testing.T) {
 }
 
 // COMPAT(0.11): Remove in 0.11
-func TestScoreFit_Old(t *testing.T) {
+func TestScoreFitBinPack_Old(t *testing.T) {
 	node := &Node{}
 	node.Resources = &Resources{
 		CPU:      4096,
@@ -528,7 +528,7 @@ func TestScoreFit_Old(t *testing.T) {
 			},
 		},
 	}
-	score := ScoreFit(node, util)
+	score := ScoreFitBinPack(node, util)
 	if score != 18.0 {
 		t.Fatalf("bad: %v", score)
 	}
@@ -544,7 +544,7 @@ func TestScoreFit_Old(t *testing.T) {
 			},
 		},
 	}
-	score = ScoreFit(node, util)
+	score = ScoreFitBinPack(node, util)
 	if score != 0.0 {
 		t.Fatalf("bad: %v", score)
 	}
@@ -560,13 +560,13 @@ func TestScoreFit_Old(t *testing.T) {
 			},
 		},
 	}
-	score = ScoreFit(node, util)
+	score = ScoreFitBinPack(node, util)
 	if score < 10.0 || score > 16.0 {
 		t.Fatalf("bad: %v", score)
 	}
 }
 
-func TestScoreFit(t *testing.T) {
+func TestScoreFitBinPack(t *testing.T) {
 	node := &Node{}
 	node.NodeResources = &NodeResources{
 		Cpu: NodeCpuResources{
@@ -585,52 +585,53 @@ func TestScoreFit(t *testing.T) {
 		},
 	}
 
-	// Test a perfect fit
-	util := &ComparableResources{
-		Flattened: AllocatedTaskResources{
-			Cpu: AllocatedCpuResources{
-				CpuShares: 2048,
-			},
-			Memory: AllocatedMemoryResources{
-				MemoryMB: 4096,
+	cases := []struct {
+		name         string
+		flattened    AllocatedTaskResources
+		binPackScore float64
+		spreadScore  float64
+	}{
+		{
+			name: "almost filled node, but with just enough whole",
+			flattened: AllocatedTaskResources{
+				Cpu:    AllocatedCpuResources{CpuShares: 2048},
+				Memory: AllocatedMemoryResources{MemoryMB: 4096},
 			},
+			binPackScore: 18,
+			spreadScore:  0,
 		},
-	}
-	score := ScoreFit(node, util)
-	if score != 18.0 {
-		t.Fatalf("bad: %v", score)
-	}
-
-	// Test the worst fit
-	util = &ComparableResources{
-		Flattened: AllocatedTaskResources{
-			Cpu: AllocatedCpuResources{
-				CpuShares: 0,
-			},
-			Memory: AllocatedMemoryResources{
-				MemoryMB: 0,
+		{
+			name: "unutilized node",
+			flattened: AllocatedTaskResources{
+				Cpu:    AllocatedCpuResources{CpuShares: 0},
+				Memory: AllocatedMemoryResources{MemoryMB: 0},
 			},
+			binPackScore: 0,
+			spreadScore:  18,
 		},
-	}
-	score = ScoreFit(node, util)
-	if score != 0.0 {
-		t.Fatalf("bad: %v", score)
-	}
-
-	// Test a mid-case scenario
-	util = &ComparableResources{
-		Flattened: AllocatedTaskResources{
-			Cpu: AllocatedCpuResources{
-				CpuShares: 1024,
-			},
-			Memory: AllocatedMemoryResources{
-				MemoryMB: 2048,
+		{
+			name: "mid-case scnario",
+			flattened: AllocatedTaskResources{
+				Cpu:    AllocatedCpuResources{CpuShares: 1024},
+				Memory: AllocatedMemoryResources{MemoryMB: 2048},
 			},
+			binPackScore: 13.675,
+			spreadScore:  4.325,
 		},
 	}
-	score = ScoreFit(node, util)
-	if score < 10.0 || score > 16.0 {
-		t.Fatalf("bad: %v", score)
+
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			util := &ComparableResources{Flattened: c.flattened}
+
+			binPackScore := ScoreFitBinPack(node, util)
+			require.InDelta(t, c.binPackScore, binPackScore, 0.001, "binpack score")
+
+			spreadScore := ScoreFitSpread(node, util)
+			require.InDelta(t, c.spreadScore, spreadScore, 0.001, "spread score")
+
+			require.InDelta(t, 18, binPackScore+spreadScore, 0.001, "score sum")
+		})
 	}
 }
 
diff --git a/nomad/structs/operator.go b/nomad/structs/operator.go
index 101ce63514f3..38d181176000 100644
--- a/nomad/structs/operator.go
+++ b/nomad/structs/operator.go
@@ -1,6 +1,7 @@
 package structs
 
 import (
+	"fmt"
 	"time"
 
 	"github.com/hashicorp/raft"
@@ -132,28 +133,17 @@ type SchedulerAlgorithm string
 const (
 	// SchedulerAlgorithmBinpack indicates that the scheduler should spread
 	// allocations as evenly as possible over the available hardware.
-	SchedulerAlgorithmBinpack string = "binpack"
+	SchedulerAlgorithmBinpack SchedulerAlgorithm = "binpack"
 
 	// SchedulerAlgorithmSpread indicates that the scheduler should spread
 	// allocations as evenly as possible over the available hardware.
-	SchedulerAlgorithmSpread string = "spread"
+	SchedulerAlgorithmSpread SchedulerAlgorithm = "spread"
 )
 
-// SchedulerAlgorithmIsValid validates the given SchedulerAlgorithm string and
-// returns true only when a correct algorithm is specified.
-func SchedulerAlgorithmIsValid(alg string) bool {
-	switch alg {
-	case SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread:
-		return true
-	default:
-		return false
-	}
-}
-
 // SchedulerConfiguration is the config for controlling scheduler behavior
 type SchedulerConfiguration struct {
 	// SchedulerAlgorithm lets you select between available scheduling algorithms.
-	SchedulerAlgorithm string `hcl:"scheduler_algorithm"`
+	SchedulerAlgorithm SchedulerAlgorithm `hcl:"scheduler_algorithm"`
 
 	// PreemptionConfig specifies whether to enable eviction of lower
 	// priority jobs to place higher priority jobs.
@@ -164,6 +154,28 @@ type SchedulerConfiguration struct {
 	ModifyIndex uint64
 }
 
+func (s *SchedulerConfiguration) EffectiveSchedulerAlgorithm() SchedulerAlgorithm {
+	if s == nil || s.SchedulerAlgorithm == "" {
+		return SchedulerAlgorithmBinpack
+	}
+
+	return s.SchedulerAlgorithm
+}
+
+func (s *SchedulerConfiguration) Validate() error {
+	if s == nil {
+		return nil
+	}
+
+	switch s.SchedulerAlgorithm {
+	case "", SchedulerAlgorithmBinpack, SchedulerAlgorithmSpread:
+	default:
+		return fmt.Errorf("invalid scheduler algorithm: %v", s.SchedulerAlgorithm)
+	}
+
+	return nil
+}
+
 // SchedulerConfigurationResponse is the response object that wraps SchedulerConfiguration
 type SchedulerConfigurationResponse struct {
 	// SchedulerConfig contains scheduler config options
diff --git a/scheduler/preemption_test.go b/scheduler/preemption_test.go
index 798c3985cc94..f8a43d7d90f2 100644
--- a/scheduler/preemption_test.go
+++ b/scheduler/preemption_test.go
@@ -1349,7 +1349,7 @@ func TestPreemption(t *testing.T) {
 				ctx.plan.NodePreemptions[node.ID] = tc.currentPreemptions
 			}
 			static := NewStaticRankIterator(ctx, nodes)
-			binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority)
+			binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority, structs.SchedulerAlgorithmBinpack)
 			job := mock.Job()
 			job.Priority = tc.jobPriority
 			binPackIter.SetJob(job)
diff --git a/scheduler/rank.go b/scheduler/rank.go
index 6556ab7af72c..06ef9c452f98 100644
--- a/scheduler/rank.go
+++ b/scheduler/rank.go
@@ -151,20 +151,26 @@ type BinPackIterator struct {
 	source    RankIterator
 	evict     bool
 	priority  int
-	algorithm string
 	jobId     *structs.NamespacedID
 	taskGroup *structs.TaskGroup
+	scoreFit  func(*structs.Node, *structs.ComparableResources) float64
 }
 
 // NewBinPackIterator returns a BinPackIterator which tries to fit tasks
 // potentially evicting other tasks based on a given priority.
-func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm string) *BinPackIterator {
+func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm structs.SchedulerAlgorithm) *BinPackIterator {
+
+	scoreFn := structs.ScoreFitBinPack
+	if algorithm == structs.SchedulerAlgorithmSpread {
+		scoreFn = structs.ScoreFitSpread
+	}
+
 	iter := &BinPackIterator{
-		ctx:       ctx,
-		source:    source,
-		evict:     evict,
-		priority:  priority,
-		algorithm: algorithm,
+		ctx:      ctx,
+		source:   source,
+		evict:    evict,
+		priority: priority,
+		scoreFit: scoreFn,
 	}
 	iter.ctx.Logger().Named("binpack").Trace("NewBinPackIterator created", "algorithm", algorithm)
 	return iter
@@ -439,7 +445,7 @@ OUTER:
 		}
 
 		// Score the fit normally otherwise
-		fitness := structs.ScoreFit(option.Node, util, iter.algorithm)
+		fitness := iter.scoreFit(option.Node, util)
 		normalizedFit := fitness / binPackingMaxFitScore
 		option.Scores = append(option.Scores, normalizedFit)
 		iter.ctx.Metrics().ScoreNode(option.Node, "binpack", normalizedFit)
diff --git a/scheduler/rank_test.go b/scheduler/rank_test.go
index aa1ee01a1f02..7454d075a217 100644
--- a/scheduler/rank_test.go
+++ b/scheduler/rank_test.go
@@ -107,7 +107,7 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) {
 			},
 		},
 	}
-	binp := NewBinPackIterator(ctx, static, false, 0)
+	binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack)
 	binp.SetTaskGroup(taskGroup)
 
 	scoreNorm := NewScoreNormalizationIterator(ctx, binp)
@@ -220,7 +220,7 @@ func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) {
 			},
 		},
 	}
-	binp := NewBinPackIterator(ctx, static, false, 0)
+	binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack)
 	binp.SetTaskGroup(taskGroup)
 
 	scoreNorm := NewScoreNormalizationIterator(ctx, binp)
@@ -471,7 +471,7 @@ func TestBinPackIterator_Network_Failure(t *testing.T) {
 		},
 	}
 
-	binp := NewBinPackIterator(ctx, static, false, 0)
+	binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack)
 	binp.SetTaskGroup(taskGroup)
 
 	scoreNorm := NewScoreNormalizationIterator(ctx, binp)
@@ -569,7 +569,7 @@ func TestBinPackIterator_PlannedAlloc(t *testing.T) {
 		},
 	}
 
-	binp := NewBinPackIterator(ctx, static, false, 0)
+	binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack)
 	binp.SetTaskGroup(taskGroup)
 
 	scoreNorm := NewScoreNormalizationIterator(ctx, binp)
@@ -685,7 +685,7 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) {
 			},
 		},
 	}
-	binp := NewBinPackIterator(ctx, static, false, 0)
+	binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack)
 	binp.SetTaskGroup(taskGroup)
 
 	scoreNorm := NewScoreNormalizationIterator(ctx, binp)
@@ -805,7 +805,7 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) {
 		},
 	}
 
-	binp := NewBinPackIterator(ctx, static, false, 0)
+	binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack)
 	binp.SetTaskGroup(taskGroup)
 
 	scoreNorm := NewScoreNormalizationIterator(ctx, binp)
@@ -1110,7 +1110,7 @@ func TestBinPackIterator_Devices(t *testing.T) {
 			}
 
 			static := NewStaticRankIterator(ctx, []*RankedNode{{Node: c.Node}})
-			binp := NewBinPackIterator(ctx, static, false, 0)
+			binp := NewBinPackIterator(ctx, static, false, 0, structs.SchedulerAlgorithmBinpack)
 			binp.SetTaskGroup(c.TaskGroup)
 
 			out := binp.Next()
diff --git a/scheduler/stack.go b/scheduler/stack.go
index d1a484fd947c..b9ceb19bb31e 100644
--- a/scheduler/stack.go
+++ b/scheduler/stack.go
@@ -237,11 +237,10 @@ func NewSystemStack(ctx Context) *SystemStack {
 	// by a particular task group. Enable eviction as system jobs are high
 	// priority.
 	_, schedConfig, _ := s.ctx.State().SchedulerConfig()
+	schedulerAlgorithm := schedConfig.EffectiveSchedulerAlgorithm()
 	enablePreemption := true
-	schedulerAlgorithm := "binpack"
 	if schedConfig != nil {
 		enablePreemption = schedConfig.PreemptionConfig.SystemSchedulerEnabled
-		schedulerAlgorithm = schedConfig.SchedulerAlgorithm
 	}
 
 	s.binPack = NewBinPackIterator(ctx, rankSource, enablePreemption, 0, schedulerAlgorithm)
diff --git a/scheduler/stack_oss.go b/scheduler/stack_oss.go
index 8aaa98031043..a705b8c7395b 100644
--- a/scheduler/stack_oss.go
+++ b/scheduler/stack_oss.go
@@ -61,10 +61,7 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack {
 	// Apply the bin packing, this depends on the resources needed
 	// by a particular task group.
 	_, schedConfig, _ := s.ctx.State().SchedulerConfig()
-	schedulerAlgorithm := "binpack"
-	if schedConfig != nil {
-		schedulerAlgorithm = schedConfig.SchedulerAlgorithm
-	}
+	schedulerAlgorithm := schedConfig.EffectiveSchedulerAlgorithm()
 
 	s.binPack = NewBinPackIterator(ctx, rankSource, false, 0, schedulerAlgorithm)
 

From 9962b9fe7ac2969731477c888a79a1e7b4a3369f Mon Sep 17 00:00:00 2001
From: Mahmood Ali <mahmood@hashicorp.com>
Date: Fri, 1 May 2020 13:12:52 -0400
Subject: [PATCH 4/4] changelog and fix typo

---
 CHANGELOG.md                | 4 ++++
 nomad/structs/funcs_test.go | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e19855fb26bf..218d053bcca2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,10 @@
 FEATURES:
  * **Task dependencies UI**: task lifecycle charts and details
 
+IMPROVEMENTS:
+
+ * core: Allow spreading allocations as an alternative to binpacking [[GH-7810](https://github.com/hashicorp/nomad/issues/7810)]
+
 BUG FIXES:
 
  * api: autoscaling policies should not be returned for stopped jobs [[GH-7768](https://github.com/hashicorp/nomad/issues/7768)]
diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go
index 62236f0cd0df..725909abeccf 100644
--- a/nomad/structs/funcs_test.go
+++ b/nomad/structs/funcs_test.go
@@ -592,7 +592,7 @@ func TestScoreFitBinPack(t *testing.T) {
 		spreadScore  float64
 	}{
 		{
-			name: "almost filled node, but with just enough whole",
+			name: "almost filled node, but with just enough hole",
 			flattened: AllocatedTaskResources{
 				Cpu:    AllocatedCpuResources{CpuShares: 2048},
 				Memory: AllocatedMemoryResources{MemoryMB: 4096},