Skip to content

Commit

Permalink
[Regression] Fleet scale down didn't adhere to Packed Scheduling
Browse files Browse the repository at this point in the history
The new Fleet scale up/scale down performance enhancements removed
the functionality that the `Packed` strategy is documented to
provide -- when scaling down, removing GameServers from least used Nodes
first.

To maintain performance as well, the set of GameServers only get sorted when
scaling down, and only for Packed strategy.
  • Loading branch information
markmandel committed Mar 14, 2019
1 parent b133e52 commit 5768ea3
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 114 deletions.
2 changes: 1 addition & 1 deletion cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func main() {
ctlConf.MinPort, ctlConf.MaxPort, ctlConf.SidecarImage, ctlConf.AlwaysPullSidecar,
ctlConf.SidecarCPURequest, ctlConf.SidecarCPULimit,
kubeClient, kubeInformerFactory, extClient, agonesClient, agonesInformerFactory)
gsSetController := gameserversets.NewController(wh, health,
gsSetController := gameserversets.NewController(wh, health, gsCounter,
kubeClient, extClient, agonesClient, agonesInformerFactory)
fleetController := fleets.NewController(wh, health, kubeClient, extClient, agonesClient, agonesInformerFactory)
faController := fleetallocation.NewController(wh, allocationMutex,
Expand Down
6 changes: 6 additions & 0 deletions pkg/gameserverallocations/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ import (
"k8s.io/client-go/tools/cache"
)

const (
defaultNs = "default"
n1 = "node1"
n2 = "node2"
)

var (
gvk = metav1.GroupVersionKind(v1alpha1.SchemeGroupVersion.WithKind("GameServerAllocation"))
)
Expand Down
21 changes: 0 additions & 21 deletions pkg/gameserverallocations/helper_test.go

This file was deleted.

File renamed without changes.
File renamed without changes.
43 changes: 29 additions & 14 deletions pkg/gameserversets/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package gameserversets

import (
"encoding/json"
"sort"
"sync"

"agones.dev/agones/pkg/apis/stable"
Expand All @@ -25,6 +24,7 @@ import (
getterv1alpha1 "agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1"
"agones.dev/agones/pkg/client/informers/externalversions"
listerv1alpha1 "agones.dev/agones/pkg/client/listers/stable/v1alpha1"
"agones.dev/agones/pkg/gameservers"
"agones.dev/agones/pkg/util/crd"
"agones.dev/agones/pkg/util/logfields"
"agones.dev/agones/pkg/util/runtime"
Expand Down Expand Up @@ -66,6 +66,7 @@ const (
// Controller is a the GameServerSet controller
type Controller struct {
baseLogger *logrus.Entry
counter *gameservers.PerNodeCounter
crdGetter v1beta1.CustomResourceDefinitionInterface
gameServerGetter getterv1alpha1.GameServersGetter
gameServerLister listerv1alpha1.GameServerLister
Expand All @@ -83,6 +84,7 @@ type Controller struct {
func NewController(
wh *webhooks.WebHook,
health healthcheck.Handler,
counter *gameservers.PerNodeCounter,
kubeClient kubernetes.Interface,
extClient extclientset.Interface,
agonesClient versioned.Interface,
Expand All @@ -95,6 +97,7 @@ func NewController(

c := &Controller{
crdGetter: extClient.ApiextensionsV1beta1().CustomResourceDefinitions(),
counter: counter,
gameServerGetter: agonesClient.StableV1alpha1(),
gameServerLister: gameServers.Lister(),
gameServerSynced: gsInformer.HasSynced,
Expand Down Expand Up @@ -305,7 +308,8 @@ func (c *Controller) syncGameServerSet(key string) error {

list = c.stateCache.forGameServerSet(gsSet).reconcileWithUpdatedServerList(list)

numServersToAdd, toDelete, isPartial := computeReconciliationAction(list, int(gsSet.Spec.Replicas), maxGameServerCreationsPerBatch, maxGameServerDeletionsPerBatch, maxPodPendingCount)
numServersToAdd, toDelete, isPartial := computeReconciliationAction(gsSet.Spec.Scheduling, list, c.counter.Counts(),
int(gsSet.Spec.Replicas), maxGameServerCreationsPerBatch, maxGameServerDeletionsPerBatch, maxPodPendingCount)
status := computeStatus(list)
fields := logrus.Fields{}

Expand Down Expand Up @@ -353,33 +357,39 @@ func (c *Controller) syncGameServerSet(key string) error {

// computeReconciliationAction computes the action to take to reconcile a game server set set given
// the list of game servers that were found and target replica count.
func computeReconciliationAction(list []*v1alpha1.GameServer, targetReplicaCount int, maxCreations int, maxDeletions int, maxPending int) (int, []*v1alpha1.GameServer, bool) {
var upCount int // up == Ready or will become ready
func computeReconciliationAction(strategy v1alpha1.SchedulingStrategy, list []*v1alpha1.GameServer,
counts map[string]gameservers.NodeCount, targetReplicaCount int, maxCreations int, maxDeletions int,
maxPending int) (int, []*v1alpha1.GameServer, bool) {
var upCount int // up == Ready or will become ready
var deleteCount int // number of gameservers to delete

// track the number of pods that are being created at any given moment by the GameServerSet
// so we can limit it at a throughput that Kubernetes can handle
var podPendingCount int // podPending == "up" but don't have a Pod running yet

var potentialDeletions []*v1alpha1.GameServer
var toDelete []*v1alpha1.GameServer

scheduleDeletion := func(gs *v1alpha1.GameServer) {
if gs.ObjectMeta.DeletionTimestamp.IsZero() {
toDelete = append(toDelete, gs)
}
toDelete = append(toDelete, gs)
deleteCount--
}

handleGameServerUp := func(gs *v1alpha1.GameServer) {
if upCount >= targetReplicaCount {
scheduleDeletion(gs)
deleteCount++
} else {
upCount++
}

// Track gameservers that could be potentially deleted
potentialDeletions = append(potentialDeletions, gs)
}

// pass 1 - count allocated servers only, since those can't be touched
for _, gs := range list {
if gs.IsAllocated() {
upCount++
continue
}
}

Expand Down Expand Up @@ -446,12 +456,17 @@ func computeReconciliationAction(list []*v1alpha1.GameServer, targetReplicaCount
}
}

if len(toDelete) > maxDeletions {
// we have to pick which GS to delete, let's delete the newest ones first.
sort.Slice(toDelete, func(i, j int) bool {
return toDelete[i].ObjectMeta.CreationTimestamp.After(toDelete[j].ObjectMeta.CreationTimestamp.Time)
})
if deleteCount > 0 {
if strategy == v1alpha1.Packed {
potentialDeletions = sortGameServersByLeastFullNodes(potentialDeletions, counts)
} else {
potentialDeletions = sortGameServersByNewFirst(potentialDeletions)
}

toDelete = append(toDelete, potentialDeletions[0:deleteCount]...)
}

if deleteCount > maxDeletions {
toDelete = toDelete[0:maxDeletions]
partialReconciliation = true
}
Expand Down
52 changes: 50 additions & 2 deletions pkg/gameserversets/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"time"

"agones.dev/agones/pkg/apis/stable/v1alpha1"
"agones.dev/agones/pkg/gameservers"
agtesting "agones.dev/agones/pkg/testing"
"agones.dev/agones/pkg/util/webhooks"
"github.com/heptiolabs/healthcheck"
Expand Down Expand Up @@ -159,13 +160,59 @@ func TestComputeReconciliationAction(t *testing.T) {

for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
toAdd, toDelete, isPartial := computeReconciliationAction(tc.list, tc.targetReplicaCount, maxTestCreationsPerBatch, maxTestDeletionsPerBatch, maxTestPendingPerBatch)
toAdd, toDelete, isPartial := computeReconciliationAction(v1alpha1.Distributed, tc.list, map[string]gameservers.NodeCount{},
tc.targetReplicaCount, maxTestCreationsPerBatch, maxTestDeletionsPerBatch, maxTestPendingPerBatch)

assert.Equal(t, tc.wantNumServersToAdd, toAdd, "# of GameServers to add")
assert.Len(t, toDelete, tc.wantNumServersToDelete, "# of GameServers to delete")
assert.Equal(t, tc.wantIsPartial, isPartial, "is partial reconciliation")
})
}

t.Run("test packed scale down", func(t *testing.T) {
list := []*v1alpha1.GameServer{
{ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{State: v1alpha1.GameServerStateReady, NodeName: "node3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{State: v1alpha1.GameServerStateReady, NodeName: "node1"}},
{ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{State: v1alpha1.GameServerStateReady, NodeName: "node3"}},
{ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{State: v1alpha1.GameServerStateReady, NodeName: ""}},
}

counts := map[string]gameservers.NodeCount{"node1": {Ready: 1}, "node3": {Ready: 2}}
toAdd, toDelete, isPartial := computeReconciliationAction(v1alpha1.Packed, list, counts, 2,
1000, 1000, 1000)

assert.Empty(t, toAdd)
assert.False(t, isPartial, "shouldn't be partial")

assert.Len(t, toDelete, 2)
assert.Equal(t, "gs4", toDelete[0].ObjectMeta.Name)
assert.Equal(t, "gs2", toDelete[1].ObjectMeta.Name)
})

t.Run("test distributed scale down", func(t *testing.T) {
now := metav1.Now()

list := []*v1alpha1.GameServer{
{ObjectMeta: metav1.ObjectMeta{Name: "gs1",
CreationTimestamp: metav1.Time{Time: now.Add(10 * time.Second)}}, Status: v1alpha1.GameServerStatus{State: v1alpha1.GameServerStateReady}},
{ObjectMeta: metav1.ObjectMeta{Name: "gs2",
CreationTimestamp: now}, Status: v1alpha1.GameServerStatus{State: v1alpha1.GameServerStateReady}},
{ObjectMeta: metav1.ObjectMeta{Name: "gs3",
CreationTimestamp: metav1.Time{Time: now.Add(40 * time.Second)}}, Status: v1alpha1.GameServerStatus{State: v1alpha1.GameServerStateReady}},
{ObjectMeta: metav1.ObjectMeta{Name: "gs4",
CreationTimestamp: metav1.Time{Time: now.Add(30 * time.Second)}}, Status: v1alpha1.GameServerStatus{State: v1alpha1.GameServerStateReady}},
}

toAdd, toDelete, isPartial := computeReconciliationAction(v1alpha1.Distributed, list, map[string]gameservers.NodeCount{},
2, 1000, 1000, 1000)

assert.Empty(t, toAdd)
assert.False(t, isPartial, "shouldn't be partial")

assert.Len(t, toDelete, 2)
assert.Equal(t, "gs2", toDelete[0].ObjectMeta.Name)
assert.Equal(t, "gs1", toDelete[1].ObjectMeta.Name)
})
}

func TestComputeStatus(t *testing.T) {
Expand Down Expand Up @@ -573,7 +620,8 @@ func createGameServers(gsSet *v1alpha1.GameServerSet, size int) []v1alpha1.GameS
func newFakeController() (*Controller, agtesting.Mocks) {
m := agtesting.NewMocks()
wh := webhooks.NewWebHook(http.NewServeMux())
c := NewController(wh, healthcheck.NewHandler(), m.KubeClient, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory)
counter := gameservers.NewPerNodeCounter(m.KubeInformerFactory, m.AgonesInformerFactory)
c := NewController(wh, healthcheck.NewHandler(), counter, m.KubeClient, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory)
c.recorder = m.FakeRecorder
return c, m
}
72 changes: 25 additions & 47 deletions pkg/gameserversets/gameserversets.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,66 +19,44 @@ import (

"agones.dev/agones/pkg/apis/stable/v1alpha1"
listerv1alpha1 "agones.dev/agones/pkg/client/listers/stable/v1alpha1"
"agones.dev/agones/pkg/gameservers"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)

// node is just a convenience data structure for
// keeping relevant GameServer information about Nodes
type node struct {
name string
total int64
ready []*v1alpha1.GameServer
}

// filterGameServersOnLeastFullNodes returns a limited list of GameServers, ordered by the nodes
// they are hosted on, with the least utilised Nodes being prioritised
func filterGameServersOnLeastFullNodes(gsList []*v1alpha1.GameServer, limit int32) []*v1alpha1.GameServer {
if limit <= 0 {
return nil
}

nodeMap := map[string]*node{}
var nodeList []*node

// count up the number of allocated and ready game servers that exist
// also, since we're already looping through, track all the deletable GameServers
// per node, so we can use this as a shortlist to delete from
for _, gs := range gsList {
if gs.DeletionTimestamp.IsZero() &&
(gs.Status.State == v1alpha1.GameServerStateAllocated || gs.Status.State == v1alpha1.GameServerStateReady) {
_, ok := nodeMap[gs.Status.NodeName]
if !ok {
node := &node{name: gs.Status.NodeName}
nodeMap[gs.Status.NodeName] = node
nodeList = append(nodeList, node)
}
// sortGameServersByLeastFullNodes sorts the list of gameservers by which gameservers reside on the least full nodes
func sortGameServersByLeastFullNodes(list []*v1alpha1.GameServer, count map[string]gameservers.NodeCount) []*v1alpha1.GameServer {
sort.Slice(list, func(i, j int) bool {
a := list[i]
b := list[j]
// not scheduled yet/node deleted, put them first
ac, ok := count[a.Status.NodeName]
if !ok {
return true
}

nodeMap[gs.Status.NodeName].total++
if gs.Status.State == v1alpha1.GameServerStateReady {
nodeMap[gs.Status.NodeName].ready = append(nodeMap[gs.Status.NodeName].ready, gs)
}
bc, ok := count[b.Status.NodeName]
if !ok {
return false
}
}

// sort our nodes, least to most
sort.Slice(nodeList, func(i, j int) bool {
return nodeList[i].total < nodeList[j].total
return (ac.Allocated + ac.Ready) < (bc.Allocated + bc.Ready)
})

// we need to get Ready GameServer until we equal or pass limit
result := make([]*v1alpha1.GameServer, 0, limit)
return list
}

for _, n := range nodeList {
result = append(result, n.ready...)
// sortGameServersByNewFirst sorts by newest gameservers first, and returns them
func sortGameServersByNewFirst(list []*v1alpha1.GameServer) []*v1alpha1.GameServer {
sort.Slice(list, func(i, j int) bool {
a := list[i]
b := list[j]

if int32(len(result)) >= limit {
return result
}
}
return a.ObjectMeta.CreationTimestamp.Before(&b.ObjectMeta.CreationTimestamp)
})

return result
return list
}

// ListGameServersByGameServerSetOwner lists the GameServers for a given GameServerSet
Expand Down
Loading

0 comments on commit 5768ea3

Please sign in to comment.