From d8a6985c07468ee8a0e84d22eefe36e542c6cf11 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 5 Mar 2019 09:26:26 -0800 Subject: [PATCH] [Regression] Fleet scale down didn't adhere to Packed Scheduling The new Fleet scale up/scale down performance enhancements removed the functionality that the `Packed` strategy is documented to provide -- when scaling down, removing GameServers from least used Nodes first. This change implements a library for GameServer sorting that doesn't use reflection, unlike sort.Slice (much faster), and can be used with any Comparator, as I expect we'll likely need this again. This also pulls out the GameServerCounter controller - which keeps a running total of GameServers per Node, to make this kind of sorting much faster and easier as we don't have to compute this on every iteration. To maintain performance as well, the set of GameServers only get sorted when scaling down, and only for Packed strategy. --- cmd/controller/main.go | 10 ++- pkg/apis/stable/v1alpha1/gameserversort.go | 50 ++++++++++++ .../stable/v1alpha1/gameserversort_test.go | 37 +++++++++ pkg/gameserverallocations/controller.go | 32 ++++---- pkg/gameserverallocations/controller_test.go | 24 ++++-- pkg/gameserverallocations/find.go | 10 ++- pkg/gameserverallocations/helper_test.go | 21 ----- .../gameservercounter.go} | 49 ++++++------ .../gameservercounter_test.go} | 76 ++++++++++--------- pkg/gameserversets/controller.go | 39 ++++++---- pkg/gameserversets/controller_test.go | 11 ++- pkg/gameserversets/gameserversets.go | 65 ++++------------ pkg/gameserversets/gameserversets_test.go | 44 ++++------- 13 files changed, 259 insertions(+), 209 deletions(-) create mode 100644 pkg/apis/stable/v1alpha1/gameserversort.go create mode 100644 pkg/apis/stable/v1alpha1/gameserversort_test.go delete mode 100644 pkg/gameserverallocations/helper_test.go rename pkg/{gameserverallocations/allocationcounter.go => gameservers/gameservercounter.go} (81%) rename pkg/{gameserverallocations/allocationcounter_test.go => gameservers/gameservercounter_test.go} (72%) diff --git a/cmd/controller/main.go b/cmd/controller/main.go index e1bc58a1fd..6c62cceb91 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -181,22 +181,24 @@ func main() { allocationMutex := &sync.Mutex{} + gsCounter := gameservers.NewGameServerCounter(kubeInformerFactory, agonesInformerFactory) + gsController := gameservers.NewController(wh, health, ctlConf.MinPort, ctlConf.MaxPort, ctlConf.SidecarImage, ctlConf.AlwaysPullSidecar, ctlConf.SidecarCPURequest, ctlConf.SidecarCPULimit, kubeClient, kubeInformerFactory, extClient, agonesClient, agonesInformerFactory) - gsSetController := gameserversets.NewController(wh, health, + gsSetController := gameserversets.NewController(wh, health, gsCounter, kubeClient, extClient, agonesClient, agonesInformerFactory) fleetController := fleets.NewController(wh, health, kubeClient, extClient, agonesClient, agonesInformerFactory) faController := fleetallocation.NewController(wh, allocationMutex, kubeClient, extClient, agonesClient, agonesInformerFactory) - gasController := gameserverallocations.NewController(wh, health, kubeClient, - kubeInformerFactory, extClient, agonesClient, agonesInformerFactory, topNGSForAllocation) + gasController := gameserverallocations.NewController(wh, health, gsCounter, topNGSForAllocation, + kubeClient, extClient, agonesClient, agonesInformerFactory) fasController := fleetautoscalers.NewController(wh, health, kubeClient, extClient, agonesClient, agonesInformerFactory) rs = append(rs, - wh, gsController, gsSetController, fleetController, faController, fasController, gasController, server) + wh, gsCounter, gsController, gsSetController, fleetController, faController, fasController, gasController, server) stop := signals.NewStopChannel() diff --git a/pkg/apis/stable/v1alpha1/gameserversort.go b/pkg/apis/stable/v1alpha1/gameserversort.go new file mode 100644 index 0000000000..19600522fb --- /dev/null +++ b/pkg/apis/stable/v1alpha1/gameserversort.go @@ -0,0 +1,50 @@ +// Copyright 2019 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1alpha1 + +import "sort" + +// GameServerSort sorts gameservers, based on a comparator +type GameServerSort struct { + // List is the GameServer list to sort + List []*GameServer + // Less is the comparator function to use as a sorting function. + // returns true if a < b + Comparator func(a, b *GameServer) bool +} + +// Sort sorts the underlying Array and returns it +func (g *GameServerSort) Sort() []*GameServer { + sort.Sort(g) + return g.List +} + +// Len is the Len function for sort.Interface +func (g *GameServerSort) Len() int { + return len(g.List) +} + +// Less is the Less function for sort.Interface +func (g *GameServerSort) Less(i, j int) bool { + a := g.List[i] + b := g.List[j] + + return g.Comparator(a, b) +} + +// Swap is the swap function for sort.Interface +func (g *GameServerSort) Swap(i, j int) { + g.List[j], g.List[i] = g.List[i], g.List[j] +} diff --git a/pkg/apis/stable/v1alpha1/gameserversort_test.go b/pkg/apis/stable/v1alpha1/gameserversort_test.go new file mode 100644 index 0000000000..4e9462c0b0 --- /dev/null +++ b/pkg/apis/stable/v1alpha1/gameserversort_test.go @@ -0,0 +1,37 @@ +// Copyright 2019 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1alpha1 + +import ( + "testing" + + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestGameServerSortSort(t *testing.T) { + list := []*GameServer{{ObjectMeta: metav1.ObjectMeta{Name: "c"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "b"}}, {ObjectMeta: metav1.ObjectMeta{Name: "a"}}} + + sort := GameServerSort{list, func(a, b *GameServer) bool { + return a.ObjectMeta.Name < b.ObjectMeta.Name + }} + + result := sort.Sort() + assert.Len(t, result, len(list)) + assert.Equal(t, "a", result[0].ObjectMeta.Name) + assert.Equal(t, "b", result[1].ObjectMeta.Name) + assert.Equal(t, "c", result[2].ObjectMeta.Name) +} diff --git a/pkg/gameserverallocations/controller.go b/pkg/gameserverallocations/controller.go index 3138a76d99..32c4172e0a 100644 --- a/pkg/gameserverallocations/controller.go +++ b/pkg/gameserverallocations/controller.go @@ -27,6 +27,7 @@ import ( getterv1alpha1 "agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1" "agones.dev/agones/pkg/client/informers/externalversions" listerv1alpha1 "agones.dev/agones/pkg/client/listers/stable/v1alpha1" + "agones.dev/agones/pkg/gameservers" "agones.dev/agones/pkg/util/crd" "agones.dev/agones/pkg/util/logfields" "agones.dev/agones/pkg/util/runtime" @@ -43,7 +44,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" @@ -61,8 +61,12 @@ var ( // Controller is a the GameServerAllocation controller type Controller struct { - baseLogger *logrus.Entry - counter *AllocationCounter + baseLogger *logrus.Entry + counter *gameservers.GameServerCounter + readyGameServers gameServerCacheEntry + // Instead of selecting the top one, controller selects a random one + // from the topNGameServerCount of Ready gameservers + topNGameServerCount int crdGetter v1beta1.CustomResourceDefinitionInterface gameServerSynced cache.InformerSynced gameServerGetter getterv1alpha1.GameServersGetter @@ -73,10 +77,6 @@ type Controller struct { workerqueue *workerqueue.WorkerQueue gsWorkerqueue *workerqueue.WorkerQueue recorder record.EventRecorder - readyGameServers gameServerCacheEntry - // Instead of selecting the top one, controller selects a random one - // from the topNGameServerCount of Ready gameservers - topNGameServerCount int } // gameserver cache to keep the Ready state gameserver. @@ -133,7 +133,7 @@ func (e *gameServerCacheEntry) Range(f func(key string, gs *v1alpha1.GameServer) // findComparator is a comparator function specifically for the // findReadyGameServerForAllocation method for determining // scheduling strategy -type findComparator func(bestCount, currentCount NodeCount) bool +type findComparator func(bestCount, currentCount gameservers.NodeCount) bool var allocationRetry = wait.Backoff{ Steps: 5, @@ -145,24 +145,24 @@ var allocationRetry = wait.Backoff{ // NewController returns a controller for a GameServerAllocation func NewController(wh *webhooks.WebHook, health healthcheck.Handler, + counter *gameservers.GameServerCounter, + topNGameServerCnt int, kubeClient kubernetes.Interface, - kubeInformerFactory informers.SharedInformerFactory, extClient extclientset.Interface, agonesClient versioned.Interface, agonesInformerFactory externalversions.SharedInformerFactory, - topNGameServerCnt int, ) *Controller { agonesInformer := agonesInformerFactory.Stable().V1alpha1() c := &Controller{ - counter: NewAllocationCounter(kubeInformerFactory, agonesInformerFactory), + counter: counter, + topNGameServerCount: topNGameServerCnt, crdGetter: extClient.ApiextensionsV1beta1().CustomResourceDefinitions(), gameServerSynced: agonesInformer.GameServers().Informer().HasSynced, gameServerGetter: agonesClient.StableV1alpha1(), gameServerLister: agonesInformer.GameServers().Lister(), gameServerAllocationSynced: agonesInformer.GameServerAllocations().Informer().HasSynced, gameServerAllocationGetter: agonesClient.StableV1alpha1(), - topNGameServerCount: topNGameServerCnt, } c.baseLogger = runtime.NewLoggerWithType(c) c.workerqueue = workerqueue.NewWorkerQueue(c.syncDelete, c.baseLogger, logfields.GameServerAllocationKey, stable.GroupName+".GameServerAllocationController") @@ -217,13 +217,7 @@ func (c *Controller) Run(workers int, stop <-chan struct{}) error { return err } - err = c.counter.Run(stop) - if err != nil { - return err - } - c.stop = stop - c.baseLogger.Info("Wait for cache sync") if !cache.WaitForCacheSync(stop, c.gameServerAllocationSynced, c.gameServerSynced) { return errors.New("failed to wait for caches to sync") @@ -444,7 +438,7 @@ func (c *Controller) syncDelete(key string) error { // preferred selectors, as well as the passed in comparator func (c *Controller) findReadyGameServerForAllocation(gsa *v1alpha1.GameServerAllocation, comparator findComparator) (*v1alpha1.GameServer, error) { // track the best node count - var bestCount *NodeCount + var bestCount *gameservers.NodeCount // the current GameServer from the node with the most GameServers (allocated, ready) var bestGS *v1alpha1.GameServer diff --git a/pkg/gameserverallocations/controller_test.go b/pkg/gameserverallocations/controller_test.go index e39c6bea86..2d1f8ca8e9 100644 --- a/pkg/gameserverallocations/controller_test.go +++ b/pkg/gameserverallocations/controller_test.go @@ -23,6 +23,7 @@ import ( "time" "agones.dev/agones/pkg/apis/stable/v1alpha1" + "agones.dev/agones/pkg/gameservers" agtesting "agones.dev/agones/pkg/testing" "agones.dev/agones/pkg/util/webhooks" applypatch "github.com/evanphx/json-patch" @@ -37,6 +38,12 @@ import ( "k8s.io/client-go/tools/cache" ) +const ( + defaultNs = "default" + n1 = "node1" + n2 = "node2" +) + var ( gvk = metav1.GroupVersionKind(v1alpha1.SchemeGroupVersion.WithKind("GameServerAllocation")) ) @@ -185,7 +192,7 @@ func TestControllerAllocate(t *testing.T) { err := c.syncReadyGSServerCache() assert.Nil(t, err) - err = c.counter.Run(stop) + err = c.counter.Run(0, stop) assert.Nil(t, err) gsa := v1alpha1.GameServerAllocation{ObjectMeta: metav1.ObjectMeta{Name: "gsa-1"}, @@ -262,7 +269,7 @@ func TestControllerAllocatePriority(t *testing.T) { err := c.syncReadyGSServerCache() assert.Nil(t, err) - err = c.counter.Run(stop) + err = c.counter.Run(0, stop) assert.Nil(t, err) gas := &v1alpha1.GameServerAllocation{ObjectMeta: metav1.ObjectMeta{Name: "fa-1"}, @@ -350,7 +357,7 @@ func TestControllerWithoutAllocateMutex(t *testing.T) { err := c.syncReadyGSServerCache() assert.Nil(t, err) - err = c.counter.Run(stop) + err = c.counter.Run(0, stop) assert.Nil(t, err) wg := sync.WaitGroup{} @@ -416,7 +423,7 @@ func TestControllerFindPackedReadyGameServer(t *testing.T) { err := c.syncReadyGSServerCache() assert.Nil(t, err) - err = c.counter.Run(stop) + err = c.counter.Run(0, stop) assert.Nil(t, err) gs, err := c.findReadyGameServerForAllocation(gsa, packedComparator) @@ -478,7 +485,7 @@ func TestControllerFindPackedReadyGameServer(t *testing.T) { err := c.syncReadyGSServerCache() assert.Nil(t, err) - err = c.counter.Run(stop) + err = c.counter.Run(0, stop) assert.Nil(t, err) gs, err := c.findReadyGameServerForAllocation(prefGsa, packedComparator) @@ -543,7 +550,7 @@ func TestControllerFindPackedReadyGameServer(t *testing.T) { err := c.syncReadyGSServerCache() assert.Nil(t, err) - err = c.counter.Run(stop) + err = c.counter.Run(0, stop) assert.Nil(t, err) gs, err := c.findReadyGameServerForAllocation(gsa, packedComparator) @@ -600,7 +607,7 @@ func TestControllerFindDistributedReadyGameServer(t *testing.T) { err := c.syncReadyGSServerCache() assert.Nil(t, err) - err = c.counter.Run(stop) + err = c.counter.Run(0, stop) assert.Nil(t, err) gs, err := c.findReadyGameServerForAllocation(gsa, distributedComparator) @@ -807,7 +814,8 @@ func defaultFixtures(gsLen int) (*v1alpha1.Fleet, *v1alpha1.GameServerSet, []v1a func newFakeController() (*Controller, agtesting.Mocks) { m := agtesting.NewMocks() wh := webhooks.NewWebHook("", "") - c := NewController(wh, healthcheck.NewHandler(), m.KubeClient, m.KubeInformerFactory, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory, 1) + counter := gameservers.NewGameServerCounter(m.KubeInformerFactory, m.AgonesInformerFactory) + c := NewController(wh, healthcheck.NewHandler(), counter, 1, m.KubeClient, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory) c.recorder = m.FakeRecorder return c, m } diff --git a/pkg/gameserverallocations/find.go b/pkg/gameserverallocations/find.go index 88df2083d9..99cd257662 100644 --- a/pkg/gameserverallocations/find.go +++ b/pkg/gameserverallocations/find.go @@ -14,12 +14,14 @@ package gameserverallocations +import "agones.dev/agones/pkg/gameservers" + // packedComparator prioritises Nodes with GameServers that are allocated, and then Nodes with the most // Ready GameServers -- this will bin pack allocated game servers together. -func packedComparator(bestCount, currentCount NodeCount) bool { - if currentCount.allocated == bestCount.allocated && currentCount.ready > bestCount.ready { +func packedComparator(bestCount, currentCount gameservers.NodeCount) bool { + if currentCount.Allocated == bestCount.Allocated && currentCount.Ready > bestCount.Ready { return true - } else if currentCount.allocated > bestCount.allocated { + } else if currentCount.Allocated > bestCount.Allocated { return true } @@ -28,6 +30,6 @@ func packedComparator(bestCount, currentCount NodeCount) bool { // distributedComparator is the inverse of the packed comparator, // looking to distribute allocated gameservers on as many nodes as possible. -func distributedComparator(bestCount, currentCount NodeCount) bool { +func distributedComparator(bestCount, currentCount gameservers.NodeCount) bool { return !packedComparator(bestCount, currentCount) } diff --git a/pkg/gameserverallocations/helper_test.go b/pkg/gameserverallocations/helper_test.go deleted file mode 100644 index 275c2a144d..0000000000 --- a/pkg/gameserverallocations/helper_test.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2018 Google Inc. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gameserverallocations - -const ( - defaultNs = "default" - n1 = "node1" - n2 = "node2" -) diff --git a/pkg/gameserverallocations/allocationcounter.go b/pkg/gameservers/gameservercounter.go similarity index 81% rename from pkg/gameserverallocations/allocationcounter.go rename to pkg/gameservers/gameservercounter.go index f5a22a4182..a685cc2d3f 100644 --- a/pkg/gameserverallocations/allocationcounter.go +++ b/pkg/gameservers/gameservercounter.go @@ -1,4 +1,4 @@ -// Copyright 2018 Google Inc. All Rights Reserved. +// Copyright 2019 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package gameserverallocations +package gameservers import ( "sync" @@ -29,11 +29,11 @@ import ( "k8s.io/client-go/tools/cache" ) -// AllocationCounter counts how many Allocated and +// GameServerCounter counts how many Allocated and // Ready GameServers currently exist on each node. -// This is useful for scheduling allocations on the -// right Nodes. -type AllocationCounter struct { +// This is useful for scheduling allocations, fleet management +// mostly under a Packed strategy +type GameServerCounter struct { logger *logrus.Entry gameServerSynced cache.InformerSynced gameServerLister listerv1alpha1.GameServerLister @@ -44,19 +44,21 @@ type AllocationCounter struct { // NodeCount is just a convenience data structure for // keeping relevant GameServer counts about Nodes type NodeCount struct { - ready int64 - allocated int64 + // Ready is ready count + Ready int64 + // Allocated is allocated out + Allocated int64 } -// NewAllocationCounter returns a new AllocationCounter -func NewAllocationCounter( +// NewGameServerCounter returns a new GameServerCounter +func NewGameServerCounter( kubeInformerFactory informers.SharedInformerFactory, - agonesInformerFactory externalversions.SharedInformerFactory) *AllocationCounter { + agonesInformerFactory externalversions.SharedInformerFactory) *GameServerCounter { gameServers := agonesInformerFactory.Stable().V1alpha1().GameServers() gsInformer := gameServers.Informer() - ac := &AllocationCounter{ + ac := &GameServerCounter{ gameServerSynced: gsInformer.HasSynced, gameServerLister: gameServers.Lister(), countMutex: sync.RWMutex{}, @@ -131,7 +133,8 @@ func NewAllocationCounter( } // Run sets up the current state GameServer counts across nodes -func (ac *AllocationCounter) Run(stop <-chan struct{}) error { +// non blocking Run function. +func (ac *GameServerCounter) Run(_ int, stop <-chan struct{}) error { ac.countMutex.Lock() defer ac.countMutex.Unlock() @@ -155,9 +158,9 @@ func (ac *AllocationCounter) Run(stop <-chan struct{}) error { switch gs.Status.State { case v1alpha1.GameServerStateReady: - counts[gs.Status.NodeName].ready++ + counts[gs.Status.NodeName].Ready++ case v1alpha1.GameServerStateAllocated: - counts[gs.Status.NodeName].allocated++ + counts[gs.Status.NodeName].Allocated++ } } @@ -166,7 +169,7 @@ func (ac *AllocationCounter) Run(stop <-chan struct{}) error { } // Counts returns the NodeCount map in a thread safe way -func (ac *AllocationCounter) Counts() map[string]NodeCount { +func (ac *GameServerCounter) Counts() map[string]NodeCount { ac.countMutex.RLock() defer ac.countMutex.RUnlock() @@ -180,7 +183,7 @@ func (ac *AllocationCounter) Counts() map[string]NodeCount { return result } -func (ac *AllocationCounter) inc(gs *v1alpha1.GameServer, ready, allocated int64) { +func (ac *GameServerCounter) inc(gs *v1alpha1.GameServer, ready, allocated int64) { ac.countMutex.Lock() defer ac.countMutex.Unlock() @@ -189,15 +192,15 @@ func (ac *AllocationCounter) inc(gs *v1alpha1.GameServer, ready, allocated int64 ac.counts[gs.Status.NodeName] = &NodeCount{} } - ac.counts[gs.Status.NodeName].allocated += allocated - ac.counts[gs.Status.NodeName].ready += ready + ac.counts[gs.Status.NodeName].Allocated += allocated + ac.counts[gs.Status.NodeName].Ready += ready // just in case - if ac.counts[gs.Status.NodeName].allocated < 0 { - ac.counts[gs.Status.NodeName].allocated = 0 + if ac.counts[gs.Status.NodeName].Allocated < 0 { + ac.counts[gs.Status.NodeName].Allocated = 0 } - if ac.counts[gs.Status.NodeName].ready < 0 { - ac.counts[gs.Status.NodeName].ready = 0 + if ac.counts[gs.Status.NodeName].Ready < 0 { + ac.counts[gs.Status.NodeName].Ready = 0 } } diff --git a/pkg/gameserverallocations/allocationcounter_test.go b/pkg/gameservers/gameservercounter_test.go similarity index 72% rename from pkg/gameserverallocations/allocationcounter_test.go rename to pkg/gameservers/gameservercounter_test.go index 772196b6b3..1d1ca29a32 100644 --- a/pkg/gameserverallocations/allocationcounter_test.go +++ b/pkg/gameservers/gameservercounter_test.go @@ -1,4 +1,4 @@ -// Copyright 2018 Google Inc. All Rights Reserved. +// Copyright 2019 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package gameserverallocations +package gameservers import ( "testing" @@ -29,10 +29,16 @@ import ( "k8s.io/client-go/tools/cache" ) -func TestAllocationCounterGameServerEvents(t *testing.T) { +const ( + defaultNs = "default" + name1 = "node1" + name2 = "node2" +) + +func TestGameServerCounterGameServerEvents(t *testing.T) { t.Parallel() - ac, m := newFakeAllocationCounter() + ac, m := newFakeGameServerCounter() watch := watch.NewFake() m.AgonesClient.AddWatchReactor("gameservers", k8stesting.DefaultWatchReactor(watch, nil)) @@ -46,7 +52,7 @@ func TestAllocationCounterGameServerEvents(t *testing.T) { gs := &v1alpha1.GameServer{ ObjectMeta: metav1.ObjectMeta{Name: "gs1", Namespace: defaultNs}, Status: v1alpha1.GameServerStatus{ - State: v1alpha1.GameServerStateStarting, NodeName: n1}} + State: v1alpha1.GameServerStateStarting, NodeName: name1}} watch.Add(gs.DeepCopy()) cache.WaitForCacheSync(stop, hasSynced) @@ -59,8 +65,8 @@ func TestAllocationCounterGameServerEvents(t *testing.T) { counts := ac.Counts() assert.Len(t, counts, 1) - assert.Equal(t, int64(1), counts[n1].ready) - assert.Equal(t, int64(0), counts[n1].allocated) + assert.Equal(t, int64(1), counts[name1].Ready) + assert.Equal(t, int64(0), counts[name1].Allocated) gs.Status.State = v1alpha1.GameServerStateAllocated watch.Add(gs.DeepCopy()) @@ -68,8 +74,8 @@ func TestAllocationCounterGameServerEvents(t *testing.T) { counts = ac.Counts() assert.Len(t, counts, 1) - assert.Equal(t, int64(0), counts[n1].ready) - assert.Equal(t, int64(1), counts[n1].allocated) + assert.Equal(t, int64(0), counts[name1].Ready) + assert.Equal(t, int64(1), counts[name1].Allocated) gs.Status.State = v1alpha1.GameServerStateShutdown watch.Add(gs.DeepCopy()) @@ -77,43 +83,43 @@ func TestAllocationCounterGameServerEvents(t *testing.T) { counts = ac.Counts() assert.Len(t, counts, 1) - assert.Equal(t, int64(0), counts[n1].ready) - assert.Equal(t, int64(0), counts[n1].allocated) + assert.Equal(t, int64(0), counts[name1].Ready) + assert.Equal(t, int64(0), counts[name1].Allocated) gs.ObjectMeta.Name = "gs2" gs.Status.State = v1alpha1.GameServerStateReady - gs.Status.NodeName = n2 + gs.Status.NodeName = name2 watch.Add(gs.DeepCopy()) cache.WaitForCacheSync(stop, hasSynced) counts = ac.Counts() assert.Len(t, counts, 2) - assert.Equal(t, int64(0), counts[n1].ready) - assert.Equal(t, int64(0), counts[n1].allocated) - assert.Equal(t, int64(1), counts[n2].ready) - assert.Equal(t, int64(0), counts[n2].allocated) + assert.Equal(t, int64(0), counts[name1].Ready) + assert.Equal(t, int64(0), counts[name1].Allocated) + assert.Equal(t, int64(1), counts[name2].Ready) + assert.Equal(t, int64(0), counts[name2].Allocated) gs.ObjectMeta.Name = "gs3" // not likely, but to test the flow gs.Status.State = v1alpha1.GameServerStateAllocated - gs.Status.NodeName = n2 + gs.Status.NodeName = name2 watch.Add(gs.DeepCopy()) cache.WaitForCacheSync(stop, hasSynced) counts = ac.Counts() assert.Len(t, counts, 2) - assert.Equal(t, int64(0), counts[n1].ready) - assert.Equal(t, int64(0), counts[n1].allocated) - assert.Equal(t, int64(1), counts[n2].ready) - assert.Equal(t, int64(1), counts[n2].allocated) + assert.Equal(t, int64(0), counts[name1].Ready) + assert.Equal(t, int64(0), counts[name1].Allocated) + assert.Equal(t, int64(1), counts[name2].Ready) + assert.Equal(t, int64(1), counts[name2].Allocated) } func TestAllocationCountNodeEvents(t *testing.T) { t.Parallel() - ac, m := newFakeAllocationCounter() + ac, m := newFakeGameServerCounter() gsWatch := watch.NewFake() nodeWatch := watch.NewFake() @@ -131,8 +137,8 @@ func TestAllocationCountNodeEvents(t *testing.T) { gs := &v1alpha1.GameServer{ ObjectMeta: metav1.ObjectMeta{Name: "gs1", Namespace: defaultNs}, Status: v1alpha1.GameServerStatus{ - State: v1alpha1.GameServerStateReady, NodeName: n1}} - node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Namespace: defaultNs, Name: n1}} + State: v1alpha1.GameServerStateReady, NodeName: name1}} + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Namespace: defaultNs, Name: name1}} gsWatch.Add(gs.DeepCopy()) nodeWatch.Add(node.DeepCopy()) @@ -144,14 +150,14 @@ func TestAllocationCountNodeEvents(t *testing.T) { assert.Empty(t, ac.Counts()) } -func TestAllocationCounterRun(t *testing.T) { +func TestGameServerCounterRun(t *testing.T) { t.Parallel() - ac, m := newFakeAllocationCounter() + ac, m := newFakeGameServerCounter() gs1 := &v1alpha1.GameServer{ ObjectMeta: metav1.ObjectMeta{Name: "gs1", Namespace: defaultNs}, Status: v1alpha1.GameServerStatus{ - State: v1alpha1.GameServerStateReady, NodeName: n1}} + State: v1alpha1.GameServerStateReady, NodeName: name1}} gs2 := gs1.DeepCopy() gs2.ObjectMeta.Name = "gs2" @@ -160,7 +166,7 @@ func TestAllocationCounterRun(t *testing.T) { gs3 := gs1.DeepCopy() gs3.ObjectMeta.Name = "gs3" gs3.Status.State = v1alpha1.GameServerStateStarting - gs3.Status.NodeName = n2 + gs3.Status.NodeName = name2 gs4 := gs1.DeepCopy() gs4.ObjectMeta.Name = "gs4" @@ -173,21 +179,21 @@ func TestAllocationCounterRun(t *testing.T) { stop, cancel := agtesting.StartInformers(m) defer cancel() - err := ac.Run(stop) + err := ac.Run(0, stop) assert.Nil(t, err) counts := ac.Counts() assert.Len(t, counts, 2) - assert.Equal(t, int64(1), counts[n1].ready) - assert.Equal(t, int64(2), counts[n1].allocated) - assert.Equal(t, int64(0), counts[n2].ready) - assert.Equal(t, int64(0), counts[n2].allocated) + assert.Equal(t, int64(1), counts[name1].Ready) + assert.Equal(t, int64(2), counts[name1].Allocated) + assert.Equal(t, int64(0), counts[name2].Ready) + assert.Equal(t, int64(0), counts[name2].Allocated) } // newFakeController returns a controller, backed by the fake Clientset -func newFakeAllocationCounter() (*AllocationCounter, agtesting.Mocks) { +func newFakeGameServerCounter() (*GameServerCounter, agtesting.Mocks) { m := agtesting.NewMocks() - c := NewAllocationCounter(m.KubeInformerFactory, m.AgonesInformerFactory) + c := NewGameServerCounter(m.KubeInformerFactory, m.AgonesInformerFactory) return c, m } diff --git a/pkg/gameserversets/controller.go b/pkg/gameserversets/controller.go index 3b74adcd61..faf2933d76 100644 --- a/pkg/gameserversets/controller.go +++ b/pkg/gameserversets/controller.go @@ -16,7 +16,6 @@ package gameserversets import ( "encoding/json" - "sort" "sync" "agones.dev/agones/pkg/apis/stable" @@ -25,6 +24,7 @@ import ( getterv1alpha1 "agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1" "agones.dev/agones/pkg/client/informers/externalversions" listerv1alpha1 "agones.dev/agones/pkg/client/listers/stable/v1alpha1" + "agones.dev/agones/pkg/gameservers" "agones.dev/agones/pkg/util/crd" "agones.dev/agones/pkg/util/logfields" "agones.dev/agones/pkg/util/runtime" @@ -66,6 +66,7 @@ const ( // Controller is a the GameServerSet controller type Controller struct { baseLogger *logrus.Entry + counter *gameservers.GameServerCounter crdGetter v1beta1.CustomResourceDefinitionInterface gameServerGetter getterv1alpha1.GameServersGetter gameServerLister listerv1alpha1.GameServerLister @@ -83,6 +84,7 @@ type Controller struct { func NewController( wh *webhooks.WebHook, health healthcheck.Handler, + counter *gameservers.GameServerCounter, kubeClient kubernetes.Interface, extClient extclientset.Interface, agonesClient versioned.Interface, @@ -95,6 +97,7 @@ func NewController( c := &Controller{ crdGetter: extClient.ApiextensionsV1beta1().CustomResourceDefinitions(), + counter: counter, gameServerGetter: agonesClient.StableV1alpha1(), gameServerLister: gameServers.Lister(), gameServerSynced: gsInformer.HasSynced, @@ -305,7 +308,7 @@ func (c *Controller) syncGameServerSet(key string) error { list = c.stateCache.forGameServerSet(gsSet).reconcileWithUpdatedServerList(list) - numServersToAdd, toDelete, isPartial := computeReconciliationAction(list, int(gsSet.Spec.Replicas), maxGameServerCreationsPerBatch, maxGameServerDeletionsPerBatch, maxPodPendingCount) + numServersToAdd, toDelete, isPartial := c.computeReconciliationAction(gsSet, list, int(gsSet.Spec.Replicas), maxGameServerCreationsPerBatch, maxGameServerDeletionsPerBatch, maxPodPendingCount) status := computeStatus(list) fields := logrus.Fields{} @@ -353,33 +356,38 @@ func (c *Controller) syncGameServerSet(key string) error { // computeReconciliationAction computes the action to take to reconcile a game server set set given // the list of game servers that were found and target replica count. -func computeReconciliationAction(list []*v1alpha1.GameServer, targetReplicaCount int, maxCreations int, maxDeletions int, maxPending int) (int, []*v1alpha1.GameServer, bool) { - var upCount int // up == Ready or will become ready +func (c *Controller) computeReconciliationAction(gsSet *v1alpha1.GameServerSet, list []*v1alpha1.GameServer, + targetReplicaCount int, maxCreations int, maxDeletions int, maxPending int) (int, []*v1alpha1.GameServer, bool) { + var upCount int // up == Ready or will become ready + var deleteCount int // number of gameservers to delete // track the number of pods that are being created at any given moment by the GameServerSet // so we can limit it at a throughput that Kubernetes can handle var podPendingCount int // podPending == "up" but don't have a Pod running yet + + var potentialDeletions []*v1alpha1.GameServer var toDelete []*v1alpha1.GameServer scheduleDeletion := func(gs *v1alpha1.GameServer) { - if gs.ObjectMeta.DeletionTimestamp.IsZero() { - toDelete = append(toDelete, gs) - } + toDelete = append(toDelete, gs) + deleteCount-- } handleGameServerUp := func(gs *v1alpha1.GameServer) { if upCount >= targetReplicaCount { - scheduleDeletion(gs) + deleteCount++ } else { upCount++ } + + // Track gameservers that could be potentially deleted + potentialDeletions = append(potentialDeletions, gs) } // pass 1 - count allocated servers only, since those can't be touched for _, gs := range list { if gs.IsAllocated() { upCount++ - continue } } @@ -446,12 +454,15 @@ func computeReconciliationAction(list []*v1alpha1.GameServer, targetReplicaCount } } - if len(toDelete) > maxDeletions { - // we have to pick which GS to delete, let's delete the newest ones first. - sort.Slice(toDelete, func(i, j int) bool { - return toDelete[i].ObjectMeta.CreationTimestamp.After(toDelete[j].ObjectMeta.CreationTimestamp.Time) - }) + if deleteCount > 0 { + if gsSet.Spec.Scheduling == v1alpha1.Packed { + potentialDeletions = sortGameServersByLeastFullNodes(potentialDeletions, c.counter.Counts()) + } + + toDelete = append(toDelete, potentialDeletions[0:deleteCount]...) + } + if deleteCount > maxDeletions { toDelete = toDelete[0:maxDeletions] partialReconciliation = true } diff --git a/pkg/gameserversets/controller_test.go b/pkg/gameserversets/controller_test.go index 3da11a1045..fd463c0e66 100644 --- a/pkg/gameserversets/controller_test.go +++ b/pkg/gameserversets/controller_test.go @@ -21,6 +21,7 @@ import ( "time" "agones.dev/agones/pkg/apis/stable/v1alpha1" + "agones.dev/agones/pkg/gameservers" agtesting "agones.dev/agones/pkg/testing" "agones.dev/agones/pkg/util/webhooks" "github.com/heptiolabs/healthcheck" @@ -158,7 +159,12 @@ func TestComputeReconciliationAction(t *testing.T) { for _, tc := range cases { t.Run(tc.desc, func(t *testing.T) { - toAdd, toDelete, isPartial := computeReconciliationAction(tc.list, tc.targetReplicaCount, maxTestCreationsPerBatch, maxTestDeletionsPerBatch, maxTestPendingPerBatch) + c, _ := newFakeController() + + gsSet := &v1alpha1.GameServerSet{} + gsSet.Spec.Scheduling = v1alpha1.Packed + + toAdd, toDelete, isPartial := c.computeReconciliationAction(gsSet, tc.list, tc.targetReplicaCount, maxTestCreationsPerBatch, maxTestDeletionsPerBatch, maxTestPendingPerBatch) assert.Equal(t, tc.wantNumServersToAdd, toAdd, "# of GameServers to add") assert.Len(t, toDelete, tc.wantNumServersToDelete, "# of GameServers to delete") @@ -572,7 +578,8 @@ func createGameServers(gsSet *v1alpha1.GameServerSet, size int) []v1alpha1.GameS func newFakeController() (*Controller, agtesting.Mocks) { m := agtesting.NewMocks() wh := webhooks.NewWebHook("", "") - c := NewController(wh, healthcheck.NewHandler(), m.KubeClient, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory) + counter := gameservers.NewGameServerCounter(m.KubeInformerFactory, m.AgonesInformerFactory) + c := NewController(wh, healthcheck.NewHandler(), counter, m.KubeClient, m.ExtClient, m.AgonesClient, m.AgonesInformerFactory) c.recorder = m.FakeRecorder return c, m } diff --git a/pkg/gameserversets/gameserversets.go b/pkg/gameserversets/gameserversets.go index effc52965f..4dd72b2a93 100644 --- a/pkg/gameserversets/gameserversets.go +++ b/pkg/gameserversets/gameserversets.go @@ -15,70 +15,35 @@ package gameserversets import ( - "sort" - "agones.dev/agones/pkg/apis/stable/v1alpha1" listerv1alpha1 "agones.dev/agones/pkg/client/listers/stable/v1alpha1" + "agones.dev/agones/pkg/gameservers" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" ) -// node is just a convenience data structure for -// keeping relevant GameServer information about Nodes -type node struct { - name string - total int64 - ready []*v1alpha1.GameServer -} - -// filterGameServersOnLeastFullNodes returns a limited list of GameServers, ordered by the nodes -// they are hosted on, with the least utilised Nodes being prioritised -func filterGameServersOnLeastFullNodes(gsList []*v1alpha1.GameServer, limit int32) []*v1alpha1.GameServer { - if limit <= 0 { - return nil - } - - nodeMap := map[string]*node{} - var nodeList []*node - - // count up the number of allocated and ready game servers that exist - // also, since we're already looping through, track all the deletable GameServers - // per node, so we can use this as a shortlist to delete from - for _, gs := range gsList { - if gs.DeletionTimestamp.IsZero() && - (gs.Status.State == v1alpha1.GameServerStateAllocated || gs.Status.State == v1alpha1.GameServerStateReady) { - _, ok := nodeMap[gs.Status.NodeName] +// sortGameServersByLeastFullNodes sorts the list of gameservers by which gameservers reside on the least full nodes +func sortGameServersByLeastFullNodes(list []*v1alpha1.GameServer, count map[string]gameservers.NodeCount) []*v1alpha1.GameServer { + sort := v1alpha1.GameServerSort{ + List: list, + Comparator: func(a, b *v1alpha1.GameServer) bool { + // not scheduled yet/node deleted, put them first + ac, ok := count[a.Status.NodeName] if !ok { - node := &node{name: gs.Status.NodeName} - nodeMap[gs.Status.NodeName] = node - nodeList = append(nodeList, node) + return true } - nodeMap[gs.Status.NodeName].total++ - if gs.Status.State == v1alpha1.GameServerStateReady { - nodeMap[gs.Status.NodeName].ready = append(nodeMap[gs.Status.NodeName].ready, gs) + bc, ok := count[b.Status.NodeName] + if !ok { + return false } - } - } - - // sort our nodes, least to most - sort.Slice(nodeList, func(i, j int) bool { - return nodeList[i].total < nodeList[j].total - }) - // we need to get Ready GameServer until we equal or pass limit - result := make([]*v1alpha1.GameServer, 0, limit) - - for _, n := range nodeList { - result = append(result, n.ready...) - - if int32(len(result)) >= limit { - return result - } + return (ac.Allocated + ac.Ready) < (bc.Allocated + bc.Ready) + }, } - return result + return sort.Sort() } // ListGameServersByGameServerSetOwner lists the GameServers for a given GameServerSet diff --git a/pkg/gameserversets/gameserversets_test.go b/pkg/gameserversets/gameserversets_test.go index 58487e32f7..8f84d1cc86 100644 --- a/pkg/gameserversets/gameserversets_test.go +++ b/pkg/gameserversets/gameserversets_test.go @@ -19,6 +19,7 @@ import ( "testing" "agones.dev/agones/pkg/apis/stable/v1alpha1" + "agones.dev/agones/pkg/gameservers" agtesting "agones.dev/agones/pkg/testing" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -26,41 +27,26 @@ import ( k8stesting "k8s.io/client-go/testing" ) -func TestFilterGameServersOnLeastFullNodes(t *testing.T) { +func TestSortGameServersByLeastFullNodes(t *testing.T) { t.Parallel() - gsList := []*v1alpha1.GameServer{ - {ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{NodeName: "n1", State: v1alpha1.GameServerStateReady}}, - {ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{NodeName: "n1", State: v1alpha1.GameServerStateStarting}}, - {ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{NodeName: "n2", State: v1alpha1.GameServerStateReady}}, - {ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{NodeName: "n2", State: v1alpha1.GameServerStateReady}}, - {ObjectMeta: metav1.ObjectMeta{Name: "gs5"}, Status: v1alpha1.GameServerStatus{NodeName: "n3", State: v1alpha1.GameServerStateAllocated}}, - {ObjectMeta: metav1.ObjectMeta{Name: "gs6"}, Status: v1alpha1.GameServerStatus{NodeName: "n3", State: v1alpha1.GameServerStateAllocated}}, - {ObjectMeta: metav1.ObjectMeta{Name: "gs7"}, Status: v1alpha1.GameServerStatus{NodeName: "n3", State: v1alpha1.GameServerStateReady}}, + nc := map[string]gameservers.NodeCount{ + "n1": {Ready: 1, Allocated: 0}, + "n2": {Ready: 0, Allocated: 2}, } - t.Run("normal", func(t *testing.T) { - limit := 4 - result := filterGameServersOnLeastFullNodes(gsList, int32(limit)) - assert.Len(t, result, limit) - assert.Equal(t, "gs1", result[0].Name) - assert.Equal(t, "n2", result[1].Status.NodeName) - assert.Equal(t, "n2", result[2].Status.NodeName) - assert.Equal(t, "n3", result[3].Status.NodeName) - }) + list := []*v1alpha1.GameServer{ + {ObjectMeta: metav1.ObjectMeta{Name: "g1"}, Status: v1alpha1.GameServerStatus{NodeName: "n2"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "g2"}, Status: v1alpha1.GameServerStatus{NodeName: ""}}, + {ObjectMeta: metav1.ObjectMeta{Name: "g3"}, Status: v1alpha1.GameServerStatus{NodeName: "n1"}}, + } - t.Run("zero", func(t *testing.T) { - limit := 0 - result := filterGameServersOnLeastFullNodes(gsList, int32(limit)) - assert.Len(t, result, limit) - }) + result := sortGameServersByLeastFullNodes(list, nc) - t.Run("negative", func(t *testing.T) { - limit := -1 - result := filterGameServersOnLeastFullNodes(gsList, int32(limit)) - assert.Len(t, result, 0) - assert.Empty(t, result) - }) + assert.Len(t, result, len(list)) + assert.Equal(t, "g2", result[0].ObjectMeta.Name) + assert.Equal(t, "g3", result[1].ObjectMeta.Name) + assert.Equal(t, "g1", result[2].ObjectMeta.Name) } func TestListGameServersByGameServerSetOwner(t *testing.T) {