This PR sets a preferredDuringSchedulingIgnoredDuringExecution PodAff…

…inity with a HostName topology. This does a pretty decent job of grouping together GameServer Pods. It does tend to distribute more widely when large groups of GameServer Pods get created, but it's worth experimenting with the first, before going the more risky route of a custom scheduler (in which we've already found some issues). We may also find as GameServers shut down at the end of sessions, they start to group together when they reschedule, as at lower load, the scheduler tends to do a better job of packing. Working towards #368
googleforgames · Oct 25, 2018 · 597344e · 597344e
1 parent 8bbcecb
commit 597344e
Show file tree

Hide file tree

Showing 8 changed files with 175 additions and 59 deletions.
diff --git a/docs/scheduling_autoscaling.md b/docs/scheduling_autoscaling.md
@@ -4,7 +4,6 @@
 
 > Autoscaling is currently ongoing work within Agones. The work you see here is just the beginning.
 
-
 Table of Contents
 =================
 
@@ -37,6 +36,12 @@ To facilitate autoscaling, we need to combine several piece of concepts and func
 Allocation scheduling refers to the order in which `GameServers`, and specifically their backing `Pods` are chosen
 from across the Kubernetes cluster within a given `Fleet` when [allocation](./create_fleet.md#4-allocate-a-game-server-from-the-fleet) occurs.
 
+### Pod Scheduling
+
+Each `GameServer` is backed by a Kubernetes [`Pod`](https://kubernetes.io/docs/concepts/workloads/pods/pod/). Pod scheduling
+refers to the strategy that is in place that determines which node in the Kubernetes cluster the Pod is assigned to,
+when it is created.
+
 ## Fleet Scheduling
 
 There are two scheduling strategies for Fleets - each designed for different types of Kubernetes Environments.
@@ -77,6 +82,15 @@ also affect `GameServer` `Pod` scheduling, and `Fleet` scale down scheduling as
 Under the "Packed" strategy, allocation will prioritise allocating `GameServers` to nodes that are running on 
 Nodes that already have allocated `GameServers` running on them.
 
+#### Pod Scheduling Strategy
+
+Under the "Packed" strategy, Pods will be scheduled using the [`PodAffinity`](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#inter-pod-affinity-and-anti-affinity-beta-feature)
+with a `preferredDuringSchedulingIgnoredDuringExecution` affinity with [hostname](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels)
+topology. This attempts to group together `GameServer` Pods within as few nodes in the cluster as it can.
+
+> The default Kubernetes scheduler doesn't do a perfect job of packing, but it's a good enough job for what we need - 
+  at least at this stage. 
+
 ### Distributed
 
 ```yaml
@@ -111,3 +125,8 @@ also affect `GameServer` `Pod` scheduling, and `Fleet` scaledown scheduling as w
 
 Under the "Distributed" strategy, allocation will prioritise allocating `GameSerers` to nodes that have the least
 number of allocated `GameServers` on them.
+
+#### Pod Scheduling Strategy
+
+Under the "Distributed" strategy, `Pod` scheduling is provided by the default Kubernetes scheduler, which will attempt
+to distribute the `GameServer` `Pods` across as many nodes as possible.
diff --git a/pkg/apis/stable/v1alpha1/fleet.go b/pkg/apis/stable/v1alpha1/fleet.go
@@ -22,20 +22,6 @@ import (
 )
 
 const (
-	// Packed scheduling strategy will prioritise allocating GameServers
-	// on Nodes with the most Allocated, and then Ready GameServers
-	// to bin pack as many Allocated GameServers on a single node.
-	// This is most useful for dynamic Kubernetes clusters - such as on Cloud Providers.
-	// In future versions, this will also impact Fleet scale down, and Pod Scheduling.
-	Packed SchedulingStrategy = "Packed"
-
-	// Distributed scheduling strategy will prioritise allocating GameServers
-	// on Nodes with the least Allocated, and then Ready GameServers
-	// to distribute Allocated GameServers across many nodes.
-	// This is most useful for statically sized Kubernetes clusters - such as on physical hardware.
-	// In future versions, this will also impact Fleet scale down, and Pod Scheduling.
-	Distributed SchedulingStrategy = "Distributed"
-
 	// FleetGameServerSetLabel is the label that the name of the Fleet
 	// is set to on the GameServerSet the Fleet controls
 	FleetGameServerSetLabel = stable.GroupName + "/fleet"
@@ -93,7 +79,8 @@ func (f *Fleet) GameServerSet() *GameServerSet {
 	gsSet := &GameServerSet{
 		ObjectMeta: *f.Spec.Template.ObjectMeta.DeepCopy(),
 		Spec: GameServerSetSpec{
-			Template: f.Spec.Template,
+			Template:   f.Spec.Template,
+			Scheduling: f.Spec.Scheduling,
 		},
 	}
 

diff --git a/pkg/apis/stable/v1alpha1/fleet_test.go b/pkg/apis/stable/v1alpha1/fleet_test.go
@@ -31,7 +31,8 @@ func TestFleetGameServerSetGameServer(t *testing.T) {
 			UID:       "1234",
 		},
 		Spec: FleetSpec{
-			Replicas: 10,
+			Replicas:   10,
+			Scheduling: Packed,
 			Template: GameServerTemplateSpec{
 				Spec: GameServerSpec{
 					Ports: []GameServerPort{{ContainerPort: 1234}},
@@ -51,6 +52,7 @@ func TestFleetGameServerSetGameServer(t *testing.T) {
 	assert.Equal(t, f.ObjectMeta.Name+"-", gsSet.ObjectMeta.GenerateName)
 	assert.Equal(t, f.ObjectMeta.Name, gsSet.ObjectMeta.Labels[FleetGameServerSetLabel])
 	assert.Equal(t, int32(0), gsSet.Spec.Replicas)
+	assert.Equal(t, f.Spec.Scheduling, gsSet.Spec.Scheduling)
 	assert.Equal(t, f.Spec.Template, gsSet.Spec.Template)
 	assert.True(t, v1.IsControlledBy(gsSet, &f))
 }

diff --git a/pkg/apis/stable/v1alpha1/gameserver.go b/pkg/apis/stable/v1alpha1/gameserver.go
@@ -117,6 +117,8 @@ type GameServerSpec struct {
 	Ports []GameServerPort `json:"ports"`
 	// Health configures health checking
 	Health Health `json:"health,omitempty"`
+	// Scheduling strategy. Defaults to "Packed".
+	Scheduling SchedulingStrategy `json:"scheduling"`
 	// Template describes the Pod that will be created for the GameServer
 	Template corev1.PodTemplateSpec `json:"template"`
 }
@@ -182,6 +184,7 @@ func (gs *GameServer) ApplyDefaults() {
 	gs.applyPortDefaults()
 	gs.applyStateDefaults()
 	gs.applyHealthDefaults()
+	gs.applySchedulingDefaults()
 }
 
 // applyContainerDefaults applues the container defaults
@@ -230,6 +233,12 @@ func (gs *GameServer) applyPortDefaults() {
 	}
 }
 
+func (gs *GameServer) applySchedulingDefaults() {
+	if gs.Spec.Scheduling == "" {
+		gs.Spec.Scheduling = Packed
+	}
+}
+
 // Validate validates the GameServer configuration.
 // If a GameServer is invalid there will be > 0 values in
 // the returned array
@@ -289,18 +298,47 @@ func (gs *GameServer) Pod(sidecars ...corev1.Container) (*corev1.Pod, error) {
 		ObjectMeta: *gs.Spec.Template.ObjectMeta.DeepCopy(),
 		Spec:       *gs.Spec.Template.Spec.DeepCopy(),
 	}
+
+	gs.podObjectMeta(pod)
+
+	if pod.Spec.ServiceAccountName == "" {
+		pod.Spec.ServiceAccountName = SidecarServiceAccountName
+	}
+
+	i, gsContainer, err := gs.FindGameServerContainer()
+	// this shouldn't happen, but if it does.
+	if err != nil {
+		return pod, err
+	}
+
+	for _, p := range gs.Spec.Ports {
+		cp := corev1.ContainerPort{
+			ContainerPort: p.ContainerPort,
+			HostPort:      p.HostPort,
+			Protocol:      p.Protocol,
+		}
+		gsContainer.Ports = append(gsContainer.Ports, cp)
+	}
+	pod.Spec.Containers[i] = gsContainer
+
+	pod.Spec.Containers = append(pod.Spec.Containers, sidecars...)
+
+	gs.podScheduling(pod)
+
+	return pod, nil
+}
+
+// podObjectMeta configures the pod ObjectMeta details
+func (gs *GameServer) podObjectMeta(pod *corev1.Pod) {
 	// Switch to GenerateName, so that we always get a Unique name for the Pod, and there
 	// can be no collisions
 	pod.ObjectMeta.GenerateName = gs.ObjectMeta.Name + "-"
 	pod.ObjectMeta.Name = ""
 	// Pods for GameServers need to stay in the same namespace
 	pod.ObjectMeta.Namespace = gs.ObjectMeta.Namespace
 	// Make sure these are blank, just in case
-	pod.ResourceVersion = ""
-	if pod.Spec.ServiceAccountName == "" {
-		pod.Spec.ServiceAccountName = SidecarServiceAccountName
-	}
-	pod.UID = ""
+	pod.ObjectMeta.ResourceVersion = ""
+	pod.ObjectMeta.UID = ""
 	if pod.ObjectMeta.Labels == nil {
 		pod.ObjectMeta.Labels = make(map[string]string, 2)
 	}
@@ -312,28 +350,34 @@ func (gs *GameServer) Pod(sidecars ...corev1.Container) (*corev1.Pod, error) {
 	pod.ObjectMeta.Labels[GameServerPodLabel] = gs.ObjectMeta.Name
 	// store the GameServer container as an annotation, to make lookup at a Pod level easier
 	pod.ObjectMeta.Annotations[GameServerContainerAnnotation] = gs.Spec.Container
-
 	ref := metav1.NewControllerRef(gs, SchemeGroupVersion.WithKind("GameServer"))
 	pod.ObjectMeta.OwnerReferences = append(pod.ObjectMeta.OwnerReferences, *ref)
+}
 
-	i, gsContainer, err := gs.FindGameServerContainer()
-	// this shouldn't happen, but if it does.
-	if err != nil {
-		return pod, err
-	}
+// podScheduling applies the Fleet scheduling strategy to the passed in Pod
+// this sets the a PreferredDuringSchedulingIgnoredDuringExecution for GameServer
+// pods to a host topology. Basically doing a half decent job of packing GameServer
+// pods together.
+// TODO: update the scheduling doc
+func (gs *GameServer) podScheduling(pod *corev1.Pod) {
+	if gs.Spec.Scheduling == Packed {
+		if pod.Spec.Affinity == nil {
+			pod.Spec.Affinity = &corev1.Affinity{}
+		}
+		if pod.Spec.Affinity.PodAffinity == nil {
+			pod.Spec.Affinity.PodAffinity = &corev1.PodAffinity{}
+		}
 
-	for _, p := range gs.Spec.Ports {
-		cp := corev1.ContainerPort{
-			ContainerPort: p.ContainerPort,
-			HostPort:      p.HostPort,
-			Protocol:      p.Protocol,
+		wpat := corev1.WeightedPodAffinityTerm{
+			Weight: 100,
+			PodAffinityTerm: corev1.PodAffinityTerm{
+				TopologyKey:   "kubernetes.io/hostname",
+				LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{RoleLabel: GameServerLabelRole}},
+			},
 		}
-		gsContainer.Ports = append(gsContainer.Ports, cp)
-	}
-	pod.Spec.Containers[i] = gsContainer
 
-	pod.Spec.Containers = append(pod.Spec.Containers, sidecars...)
-	return pod, nil
+		pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution, wpat)
+	}
 }
 
 // HasPortPolicy checks if there is a port with a given

diff --git a/pkg/apis/stable/v1alpha1/gameserver_test.go b/pkg/apis/stable/v1alpha1/gameserver_test.go
@@ -53,10 +53,11 @@ func TestGameServerApplyDefaults(t *testing.T) {
 	t.Parallel()
 
 	type expected struct {
-		protocol corev1.Protocol
-		state    State
-		policy   PortPolicy
-		health   Health
+		protocol   corev1.Protocol
+		state      State
+		policy     PortPolicy
+		health     Health
+		scheduling SchedulingStrategy
 	}
 	data := map[string]struct {
 		gameServer GameServer
@@ -74,9 +75,10 @@ func TestGameServerApplyDefaults(t *testing.T) {
 			},
 			container: "testing",
 			expected: expected{
-				protocol: "UDP",
-				state:    PortAllocation,
-				policy:   Dynamic,
+				protocol:   "UDP",
+				state:      PortAllocation,
+				policy:     Dynamic,
+				scheduling: Packed,
 				health: Health{
 					Disabled:            false,
 					FailureThreshold:    3,
@@ -109,9 +111,10 @@ func TestGameServerApplyDefaults(t *testing.T) {
 				Status: GameServerStatus{State: "TestState"}},
 			container: "testing2",
 			expected: expected{
-				protocol: "TCP",
-				state:    "TestState",
-				policy:   Static,
+				protocol:   "TCP",
+				state:      "TestState",
+				policy:     Static,
+				scheduling: Packed,
 				health: Health{
 					Disabled:            false,
 					FailureThreshold:    10,
@@ -129,9 +132,10 @@ func TestGameServerApplyDefaults(t *testing.T) {
 			},
 			container: "testing",
 			expected: expected{
-				protocol: "UDP",
-				state:    Creating,
-				policy:   Static,
+				protocol:   "UDP",
+				state:      Creating,
+				policy:     Static,
+				scheduling: Packed,
 				health: Health{
 					Disabled:            false,
 					FailureThreshold:    3,
@@ -150,9 +154,10 @@ func TestGameServerApplyDefaults(t *testing.T) {
 			},
 			container: "testing",
 			expected: expected{
-				protocol: "UDP",
-				state:    PortAllocation,
-				policy:   Dynamic,
+				protocol:   "UDP",
+				state:      PortAllocation,
+				policy:     Dynamic,
+				scheduling: Packed,
 				health: Health{
 					Disabled: true,
 				},
@@ -175,10 +180,11 @@ func TestGameServerApplyDefaults(t *testing.T) {
 			},
 			container: "testing",
 			expected: expected{
-				protocol: corev1.ProtocolTCP,
-				state:    Creating,
-				policy:   Static,
-				health:   Health{Disabled: true},
+				protocol:   corev1.ProtocolTCP,
+				state:      Creating,
+				policy:     Static,
+				scheduling: Packed,
+				health:     Health{Disabled: true},
 			},
 		},
 	}
@@ -193,6 +199,7 @@ func TestGameServerApplyDefaults(t *testing.T) {
 			assert.Equal(t, test.expected.protocol, spec.Ports[0].Protocol)
 			assert.Equal(t, test.expected.state, test.gameServer.Status.State)
 			assert.Equal(t, test.expected.health, test.gameServer.Spec.Health)
+			assert.Equal(t, test.expected.scheduling, test.gameServer.Spec.Scheduling)
 		})
 	}
 }
@@ -278,6 +285,29 @@ func TestGameServerPod(t *testing.T) {
 	assert.True(t, metav1.IsControlledBy(pod, fixture))
 }
 
+func TestGameServerPodScheduling(t *testing.T) {
+	fixture := &corev1.Pod{Spec: corev1.PodSpec{}}
+
+	t.Run("packed", func(t *testing.T) {
+		gs := &GameServer{Spec: GameServerSpec{Scheduling: Packed}}
+		pod := fixture.DeepCopy()
+		gs.podScheduling(pod)
+
+		assert.Len(t, pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution, 1)
+		wpat := pod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0]
+		assert.Equal(t, int32(100), wpat.Weight)
+		assert.Contains(t, wpat.PodAffinityTerm.LabelSelector.String(), GameServerLabelRole)
+		assert.Contains(t, wpat.PodAffinityTerm.LabelSelector.String(), RoleLabel)
+	})
+
+	t.Run("distributed", func(t *testing.T) {
+		gs := &GameServer{Spec: GameServerSpec{Scheduling: Distributed}}
+		pod := fixture.DeepCopy()
+		gs.podScheduling(pod)
+		assert.Empty(t, pod.Spec.Affinity)
+	})
+}
+
 func TestGameServerCountPorts(t *testing.T) {
 	fixture := &GameServer{Spec: GameServerSpec{Ports: []GameServerPort{
 		{PortPolicy: Dynamic},

diff --git a/pkg/apis/stable/v1alpha1/gameserverset.go b/pkg/apis/stable/v1alpha1/gameserverset.go
@@ -56,6 +56,8 @@ type GameServerSetList struct {
 type GameServerSetSpec struct {
 	// Replicas are the number of GameServers that should be in this set
 	Replicas int32 `json:"replicas"`
+	// Scheduling strategy. Defaults to "Packed".
+	Scheduling SchedulingStrategy `json:"scheduling"`
 	// Template the GameServer template to apply for this GameServerSet
 	Template GameServerTemplateSpec `json:"template"`
 }
@@ -93,6 +95,8 @@ func (gsSet *GameServerSet) GameServer() *GameServer {
 		Spec:       *gsSet.Spec.Template.Spec.DeepCopy(),
 	}
 
+	gs.Spec.Scheduling = gsSet.Spec.Scheduling
+
 	// Switch to GenerateName, so that we always get a Unique name for the GameServer, and there
 	// can be no collisions
 	gs.ObjectMeta.GenerateName = gsSet.ObjectMeta.Name + "-"