From 0cf65652b2537486e03faeff5b7c9ccc1520be18 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 5 Jan 2022 15:39:34 +0100
Subject: [PATCH 01/12] e2e test to reproduce issue#2397

---
 test/e2e/fleet_test.go          | 46 ++++++++++++++++++++++++++++-----
 test/e2e/framework/framework.go | 32 +++++++++++++++++++++++
 2 files changed, 71 insertions(+), 7 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index 9ce1f79521..c7ff98a4eb 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -223,14 +223,16 @@ func TestFleetRollingUpdate(t *testing.T) {
 	t.Parallel()
 	ctx := context.Background()
 	// Use scaleFleetPatch (true) or scaleFleetSubresource (false)
-	fixtures := []bool{true, false}
-	maxSurge := []string{"25%", "10%"}
+	fixtures := []bool{true}    //, false} // TODO Enable these again
+	maxSurge := []string{"25%"} //, "10%"} // TODO
+	doCycle := true             // TODO: fixture?
 
 	for _, usePatch := range fixtures {
 		for _, maxSurgeParam := range maxSurge {
 			usePatch := usePatch
 			maxSurgeParam := maxSurgeParam
-			t.Run(fmt.Sprintf("Use fleet Patch %t %s", usePatch, maxSurgeParam), func(t *testing.T) {
+			doCycleParam := doCycle
+			t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, doCycleParam), func(t *testing.T) {
 				t.Parallel()
 
 				client := framework.AgonesClient.AgonesV1()
@@ -267,10 +269,33 @@ func TestFleetRollingUpdate(t *testing.T) {
 				flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{})
 				assert.NoError(t, err)
 
+				done := make(chan bool, 1)
+				defer close(done)
+				if doCycleParam {
+					// Repeatedly cycle allocations to keep ~half of the GameServers Allocated, spread over both GSSets.
+					// Simulates a rolling update on a live Fleet that continuously receives new allocations,
+					// and reproduces an issue where this causes a rolling update to get stuck.
+					const halfScale = targetScale / 2
+					go framework.CycleAllocations(t, flt, time.Second*3, time.Second*halfScale*3, done)
+
+					// Wait for at least half of the fleet to have be cycled (either Allocated or shutting down)
+					// before updating the fleet.
+					err = framework.WaitForFleetCondition(t, flt, func(entry *logrus.Entry, fleet *agonesv1.Fleet) bool {
+						return fleet.Status.ReadyReplicas < halfScale
+					})
+				}
+
 				// Change ContainerPort to trigger creating a new GSSet
-				fltCopy := flt.DeepCopy()
-				fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++
-				flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{})
+				err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
+					flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.GetName(), metav1.GetOptions{})
+					if err != nil {
+						return err
+					}
+					fltCopy := flt.DeepCopy()
+					fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++
+					flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{})
+					return err
+				})
 				assert.NoError(t, err)
 
 				selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name})
@@ -308,7 +333,9 @@ func TestFleetRollingUpdate(t *testing.T) {
 					assert.Nil(t, err)
 
 					expectedTotal := targetScale + maxSurge + maxUnavailable + shift
-					if len(list.Items) > expectedTotal {
+					if len(list.Items) > expectedTotal && !doCycleParam {
+						// This fails when Allocation cycling is enabled as there's a number of additional gameservers
+						// shutting down.
 						err = fmt.Errorf("new replicas should be less than target + maxSurge + maxUnavailable + shift. Replicas: %d, Expected: %d", len(list.Items), expectedTotal)
 					}
 					if err != nil {
@@ -324,6 +351,11 @@ func TestFleetRollingUpdate(t *testing.T) {
 
 				assert.NoError(t, err)
 
+				// Stop cycling Allocations.
+				// The AssertFleetConditions below will wait until the Allocation cycling has
+				// fully stopped (when all Allocated GameServers are shut down).
+				done <- true
+
 				// scale down, with allocation
 				const scaleDownTarget = 1
 				if usePatch {
diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go
index bbfdd556c3..7bc09b6a16 100644
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@@ -278,6 +278,38 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer
 		state, gs.Namespace, gs.Name)
 }
 
+func (f *Framework) GetGameServer(t *testing.T, namespace string, name string) *agonesv1.GameServer {
+	gs, err := f.AgonesClient.AgonesV1().GameServers(namespace).Get(context.Background(), name, metav1.GetOptions{})
+	require.NoError(t, err, "failed to get gameserver: %s/%s", namespace, name)
+	return gs
+}
+
+// CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period.
+// Each Allocated GameServer gets deleted allocDuration after it was Allocated.
+// GameServers will continue to be Allocated until a message is passed to the done channel.
+func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, done <-chan bool) {
+	ticker := time.NewTicker(period)
+	for {
+		select {
+		case <-done:
+			return
+		case <-ticker.C:
+			gsa := GetAllocation(flt)
+			gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{})
+			if err != nil || gsa.Status.State != allocationv1.GameServerAllocationAllocated {
+				continue
+			}
+
+			// Deallocate after allocDuration.
+			go func(gsa *allocationv1.GameServerAllocation) {
+				time.Sleep(allocDuration)
+				err := f.AgonesClient.AgonesV1().GameServers(gsa.Namespace).Delete(context.Background(), gsa.Status.GameServerName, metav1.DeleteOptions{})
+				require.NoError(t, err)
+			}(gsa)
+		}
+	}
+}
+
 // AssertFleetCondition waits for the Fleet to be in a specific condition or fails the test if the condition can't be met in 5 minutes.
 func (f *Framework) AssertFleetCondition(t *testing.T, flt *agonesv1.Fleet, condition func(*logrus.Entry, *agonesv1.Fleet) bool) {
 	err := f.WaitForFleetCondition(t, flt, condition)

From 914910ef7cefa5f6e19b424a38d8cb96258b1357 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 5 Jan 2022 17:21:16 +0100
Subject: [PATCH 02/12] lint

---
 test/e2e/fleet_test.go          | 4 ++--
 test/e2e/framework/framework.go | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index c7ff98a4eb..b4b79bc40c 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -223,8 +223,8 @@ func TestFleetRollingUpdate(t *testing.T) {
 	t.Parallel()
 	ctx := context.Background()
 	// Use scaleFleetPatch (true) or scaleFleetSubresource (false)
-	fixtures := []bool{true}    //, false} // TODO Enable these again
-	maxSurge := []string{"25%"} //, "10%"} // TODO
+	fixtures := []bool{true}    // , false} // TODO Enable these again
+	maxSurge := []string{"25%"} // , "10%"} // TODO
 	doCycle := true             // TODO: fixture?
 
 	for _, usePatch := range fixtures {
diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go
index 7bc09b6a16..70c69d0f3c 100644
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@@ -278,6 +278,7 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer
 		state, gs.Namespace, gs.Name)
 }
 
+// GetGameServer gets the specified GameServer by namespace/name or fails the test with an error.
 func (f *Framework) GetGameServer(t *testing.T, namespace string, name string) *agonesv1.GameServer {
 	gs, err := f.AgonesClient.AgonesV1().GameServers(namespace).Get(context.Background(), name, metav1.GetOptions{})
 	require.NoError(t, err, "failed to get gameserver: %s/%s", namespace, name)

From 7dba592f89c3c6795be029a16a14e921f913c54c Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Mon, 14 Feb 2022 16:25:50 +0000
Subject: [PATCH 03/12] Fix: consider allocated replicas in rolling update

---
 pkg/fleets/controller.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/fleets/controller.go b/pkg/fleets/controller.go
index e61db5d5e7..1e3ca7ddd3 100644
--- a/pkg/fleets/controller.go
+++ b/pkg/fleets/controller.go
@@ -529,7 +529,7 @@ func (c *Controller) rollingUpdateRestFixedOnReady(ctx context.Context, fleet *a
 
 	// Check if we are ready to scale down
 	allPodsCount := agonesv1.SumSpecReplicas(allGSS)
-	newGSSUnavailablePodCount := active.Spec.Replicas - active.Status.ReadyReplicas
+	newGSSUnavailablePodCount := active.Spec.Replicas - active.Status.ReadyReplicas - active.Status.AllocatedReplicas
 	maxScaledDown := allPodsCount - minAvailable - newGSSUnavailablePodCount
 
 	if maxScaledDown <= 0 {

From 9212fca5ef158fb4f0f9edd58f29c4c35f4be6aa Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Fri, 11 Mar 2022 12:34:26 +0000
Subject: [PATCH 04/12] Fix test sometimes using empty fleet name

---
 test/e2e/fleet_test.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index b4b79bc40c..701a5bd9c9 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -276,7 +276,8 @@ func TestFleetRollingUpdate(t *testing.T) {
 					// Simulates a rolling update on a live Fleet that continuously receives new allocations,
 					// and reproduces an issue where this causes a rolling update to get stuck.
 					const halfScale = targetScale / 2
-					go framework.CycleAllocations(t, flt, time.Second*3, time.Second*halfScale*3, done)
+					const period = 3 * time.Second
+					go framework.CycleAllocations(t, flt, period, period*halfScale, done)
 
 					// Wait for at least half of the fleet to have be cycled (either Allocated or shutting down)
 					// before updating the fleet.
@@ -286,8 +287,9 @@ func TestFleetRollingUpdate(t *testing.T) {
 				}
 
 				// Change ContainerPort to trigger creating a new GSSet
+				fltName := flt.GetName()
 				err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
-					flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.GetName(), metav1.GetOptions{})
+					flt, err = client.Fleets(framework.Namespace).Get(ctx, fltName, metav1.GetOptions{})
 					if err != nil {
 						return err
 					}

From 194f571bbd0f2cef8e2dc20b9031b3b5d04292b0 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Fri, 11 Mar 2022 14:47:54 +0000
Subject: [PATCH 05/12] Cleanup

---
 test/e2e/fleet_test.go | 300 +++++++++++++++++++++++------------------
 1 file changed, 168 insertions(+), 132 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index 701a5bd9c9..478f944bf6 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -222,157 +222,193 @@ func TestFleetScaleUpEditAndScaleDown(t *testing.T) {
 func TestFleetRollingUpdate(t *testing.T) {
 	t.Parallel()
 	ctx := context.Background()
-	// Use scaleFleetPatch (true) or scaleFleetSubresource (false)
-	fixtures := []bool{true}    // , false} // TODO Enable these again
-	maxSurge := []string{"25%"} // , "10%"} // TODO
-	doCycle := true             // TODO: fixture?
+	fixtures := []struct {
+		// Use scaleFleetPatch (true) or scaleFleetSubresource (false)
+		usePatch bool
+		maxSurge string
+		// If true, create and cycle GS Allocations when triggering a rolling update:
+		// - Repeatedly allocate and shutdown GameServers (keeping ~50% of the Fleet in an Allocated state).
+		// - Once 50% of the Fleet is Allocated, trigger the rolling update.
+		// - Keep allocating/shutting down GameServers, to allocate in both the old and new GSSets.
+		// - Verify the rolling update completes.
+		// This simulates updating a Fleet that is live/in-use, and reproduces an issue where allocated GameServers
+		// causes a rolling update to get stuck and keep the old GameServerSet up.
+		cycleAllocations bool
+	}{
+		{
+			usePatch:         true,
+			maxSurge:         "25%",
+			cycleAllocations: false,
+		},
+		{
+			usePatch:         true,
+			maxSurge:         "10%",
+			cycleAllocations: false,
+		},
+		{
+			usePatch:         false,
+			maxSurge:         "25%",
+			cycleAllocations: false,
+		},
+		{
+			usePatch:         false,
+			maxSurge:         "10%",
+			cycleAllocations: false,
+		},
+		{
+			usePatch:         true,
+			maxSurge:         "25%",
+			cycleAllocations: true,
+		},
+	}
+	for _, fixture := range fixtures {
+		usePatch := fixture.usePatch
+		maxSurgeParam := fixture.maxSurge
+		cycleAllocations := fixture.cycleAllocations
+		t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, cycleAllocations), func(t *testing.T) {
+			t.Parallel()
 
-	for _, usePatch := range fixtures {
-		for _, maxSurgeParam := range maxSurge {
-			usePatch := usePatch
-			maxSurgeParam := maxSurgeParam
-			doCycleParam := doCycle
-			t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, doCycleParam), func(t *testing.T) {
-				t.Parallel()
-
-				client := framework.AgonesClient.AgonesV1()
-
-				flt := defaultFleet(framework.Namespace)
-				flt.ApplyDefaults()
-				flt.Spec.Replicas = 1
-				rollingUpdatePercent := intstr.FromString(maxSurgeParam)
-				flt.Spec.Strategy.RollingUpdate.MaxSurge = &rollingUpdatePercent
-				flt.Spec.Strategy.RollingUpdate.MaxUnavailable = &rollingUpdatePercent
-
-				flt, err := client.Fleets(framework.Namespace).Create(ctx, flt, metav1.CreateOptions{})
-				if assert.Nil(t, err) {
-					defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck
-				}
+			client := framework.AgonesClient.AgonesV1()
 
-				assert.Equal(t, int32(1), flt.Spec.Replicas)
-				assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal)
-				assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxUnavailable.StrVal)
+			flt := defaultFleet(framework.Namespace)
+			flt.ApplyDefaults()
+			flt.Spec.Replicas = 1
+			rollingUpdatePercent := intstr.FromString(maxSurgeParam)
+			flt.Spec.Strategy.RollingUpdate.MaxSurge = &rollingUpdatePercent
+			flt.Spec.Strategy.RollingUpdate.MaxUnavailable = &rollingUpdatePercent
 
-				framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(flt.Spec.Replicas))
+			flt, err := client.Fleets(framework.Namespace).Create(ctx, flt, metav1.CreateOptions{})
+			if assert.Nil(t, err) {
+				defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck
+			}
 
-				// scale up
-				const targetScale = 8
-				if usePatch {
-					flt = scaleFleetPatch(ctx, t, flt, targetScale)
-					assert.Equal(t, int32(targetScale), flt.Spec.Replicas)
-				} else {
-					flt = scaleFleetSubresource(ctx, t, flt, targetScale)
-				}
+			assert.Equal(t, int32(1), flt.Spec.Replicas)
+			assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal)
+			assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxUnavailable.StrVal)
 
-				framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(targetScale))
+			framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(flt.Spec.Replicas))
 
-				flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{})
-				assert.NoError(t, err)
+			// scale up
+			const targetScale = 8
+			if usePatch {
+				flt = scaleFleetPatch(ctx, t, flt, targetScale)
+				assert.Equal(t, int32(targetScale), flt.Spec.Replicas)
+			} else {
+				flt = scaleFleetSubresource(ctx, t, flt, targetScale)
+			}
 
-				done := make(chan bool, 1)
-				defer close(done)
-				if doCycleParam {
-					// Repeatedly cycle allocations to keep ~half of the GameServers Allocated, spread over both GSSets.
-					// Simulates a rolling update on a live Fleet that continuously receives new allocations,
-					// and reproduces an issue where this causes a rolling update to get stuck.
-					const halfScale = targetScale / 2
-					const period = 3 * time.Second
-					go framework.CycleAllocations(t, flt, period, period*halfScale, done)
-
-					// Wait for at least half of the fleet to have be cycled (either Allocated or shutting down)
-					// before updating the fleet.
-					err = framework.WaitForFleetCondition(t, flt, func(entry *logrus.Entry, fleet *agonesv1.Fleet) bool {
-						return fleet.Status.ReadyReplicas < halfScale
-					})
-				}
+			framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(targetScale))
 
-				// Change ContainerPort to trigger creating a new GSSet
-				fltName := flt.GetName()
-				err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
-					flt, err = client.Fleets(framework.Namespace).Get(ctx, fltName, metav1.GetOptions{})
-					if err != nil {
-						return err
-					}
-					fltCopy := flt.DeepCopy()
-					fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++
-					flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{})
-					return err
-				})
-				assert.NoError(t, err)
+			flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{})
+			assert.NoError(t, err)
 
-				selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name})
-				// New GSS was created
-				err = wait.PollImmediate(1*time.Second, 30*time.Second, func() (bool, error) {
-					gssList, err := framework.AgonesClient.AgonesV1().GameServerSets(framework.Namespace).List(ctx,
-						metav1.ListOptions{LabelSelector: selector.String()})
-					if err != nil {
-						return false, err
-					}
-					return len(gssList.Items) == 2, nil
+			done := make(chan bool, 1)
+			defer close(done)
+			if cycleAllocations {
+				// Repeatedly cycle allocations to keep ~half of the GameServers Allocated. Repeatedly Allocate and
+				// delete such that both the old and new GSSet contain allocated GameServers.
+				const halfScale = targetScale / 2
+				const period = 3 * time.Second
+				go framework.CycleAllocations(t, flt, period, period*halfScale, done)
+
+				// Wait for at least half of the fleet to have be cycled (either Allocated or shutting down)
+				// before updating the fleet.
+				err = framework.WaitForFleetCondition(t, flt, func(entry *logrus.Entry, fleet *agonesv1.Fleet) bool {
+					return fleet.Status.ReadyReplicas < halfScale
 				})
-				assert.NoError(t, err)
-				// Check that total number of gameservers in the system does not exceed the RollingUpdate
-				// parameters (creating no more than maxSurge, deleting maxUnavailable servers at a time)
-				// Wait for old GSSet to be deleted
-				err = wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) {
-					list, err := framework.AgonesClient.AgonesV1().GameServers(framework.Namespace).List(ctx,
-						metav1.ListOptions{LabelSelector: selector.String()})
-					if err != nil {
-						return false, err
-					}
+			}
+
+			// Change ContainerPort to trigger creating a new GSSet
+			fltName := flt.GetName()
+			err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
+				flt, err = client.Fleets(framework.Namespace).Get(ctx, fltName, metav1.GetOptions{})
+				if err != nil {
+					return err
+				}
+				fltCopy := flt.DeepCopy()
+				fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++
+				flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{})
+				return err
+			})
+			assert.NoError(t, err)
 
-					maxSurge, err := intstr.GetValueFromIntOrPercent(flt.Spec.Strategy.RollingUpdate.MaxSurge, int(flt.Spec.Replicas), true)
-					assert.Nil(t, err)
+			selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name})
+			// New GSS was created
+			err = wait.PollImmediate(1*time.Second, 30*time.Second, func() (bool, error) {
+				gssList, err := framework.AgonesClient.AgonesV1().GameServerSets(framework.Namespace).List(ctx,
+					metav1.ListOptions{LabelSelector: selector.String()})
+				if err != nil {
+					return false, err
+				}
+				return len(gssList.Items) == 2, nil
+			})
+			assert.NoError(t, err)
+			// Check that total number of gameservers in the system does not exceed the RollingUpdate
+			// parameters (creating no more than maxSurge, deleting maxUnavailable servers at a time)
+			// Wait for old GSSet to be deleted
+			err = wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) {
+				list, err := framework.AgonesClient.AgonesV1().GameServers(framework.Namespace).List(ctx,
+					metav1.ListOptions{LabelSelector: selector.String()})
+				if err != nil {
+					return false, err
+				}
 
-					roundUp := false
-					maxUnavailable, err := intstr.GetValueFromIntOrPercent(flt.Spec.Strategy.RollingUpdate.MaxUnavailable, int(flt.Spec.Replicas), roundUp)
+				maxSurge, err := intstr.GetValueFromIntOrPercent(flt.Spec.Strategy.RollingUpdate.MaxSurge, int(flt.Spec.Replicas), true)
+				assert.Nil(t, err)
 
-					if maxUnavailable == 0 {
-						maxUnavailable = 1
-					}
-					// This difference is inevitable, also could be seen with Deployments and ReplicaSets
-					shift := maxUnavailable
-					assert.Nil(t, err)
-
-					expectedTotal := targetScale + maxSurge + maxUnavailable + shift
-					if len(list.Items) > expectedTotal && !doCycleParam {
-						// This fails when Allocation cycling is enabled as there's a number of additional gameservers
-						// shutting down.
-						err = fmt.Errorf("new replicas should be less than target + maxSurge + maxUnavailable + shift. Replicas: %d, Expected: %d", len(list.Items), expectedTotal)
-					}
-					if err != nil {
-						return false, err
-					}
-					gssList, err := framework.AgonesClient.AgonesV1().GameServerSets(framework.Namespace).List(ctx,
-						metav1.ListOptions{LabelSelector: selector.String()})
-					if err != nil {
-						return false, err
-					}
-					return len(gssList.Items) == 1, nil
-				})
+				roundUp := false
+				maxUnavailable, err := intstr.GetValueFromIntOrPercent(flt.Spec.Strategy.RollingUpdate.MaxUnavailable, int(flt.Spec.Replicas), roundUp)
 
-				assert.NoError(t, err)
+				if maxUnavailable == 0 {
+					maxUnavailable = 1
+				}
+				// This difference is inevitable, also could be seen with Deployments and ReplicaSets
+				shift := maxUnavailable
+				assert.Nil(t, err)
 
-				// Stop cycling Allocations.
-				// The AssertFleetConditions below will wait until the Allocation cycling has
-				// fully stopped (when all Allocated GameServers are shut down).
-				done <- true
-
-				// scale down, with allocation
-				const scaleDownTarget = 1
-				if usePatch {
-					flt = scaleFleetPatch(ctx, t, flt, scaleDownTarget)
-				} else {
-					flt = scaleFleetSubresource(ctx, t, flt, scaleDownTarget)
+				// Ignore any GameServers that are shutting down (resulting from Allocation cycling).
+				shuttingDown := 0
+				for _, gs := range list.Items {
+					if gs.IsBeingDeleted() {
+						shuttingDown++
+					}
+				}
+				expectedTotal := targetScale + maxSurge + maxUnavailable + shift + shuttingDown
+				if len(list.Items) > expectedTotal {
+					err = fmt.Errorf("new replicas should be less than target + maxSurge + maxUnavailable + shift + shuttingDown. Replicas: %d, Expected: %d", len(list.Items), expectedTotal)
+				}
+				if err != nil {
+					return false, err
+				}
+				gssList, err := framework.AgonesClient.AgonesV1().GameServerSets(framework.Namespace).List(ctx,
+					metav1.ListOptions{LabelSelector: selector.String()})
+				if err != nil {
+					return false, err
 				}
+				return len(gssList.Items) == 1, nil
+			})
 
-				framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(1))
+			assert.NoError(t, err)
 
-				framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
-					return fleet.Status.AllocatedReplicas == 0
-				})
+			// Stop cycling Allocations.
+			// The AssertFleetConditions below will wait until the Allocation cycling has
+			// fully stopped (when all Allocated GameServers are shut down).
+			done <- true
+
+			// scale down, with allocation
+			const scaleDownTarget = 1
+			if usePatch {
+				flt = scaleFleetPatch(ctx, t, flt, scaleDownTarget)
+			} else {
+				flt = scaleFleetSubresource(ctx, t, flt, scaleDownTarget)
+			}
+
+			framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(1))
+
+			framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
+				return fleet.Status.AllocatedReplicas == 0
 			})
-		}
+		})
 	}
 }
 

From 2577696b79a73d9357d4209643e791005f9463d8 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 16 Mar 2022 16:38:58 +0000
Subject: [PATCH 06/12] Remove GetGameServer, refactor to use wait.PollUntil

---
 test/e2e/fleet_test.go          |  8 +++---
 test/e2e/framework/framework.go | 45 +++++++++++++--------------------
 2 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index 478f944bf6..dead48e6ab 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -302,14 +302,14 @@ func TestFleetRollingUpdate(t *testing.T) {
 			flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{})
 			assert.NoError(t, err)
 
-			done := make(chan bool, 1)
-			defer close(done)
+			stopCh := make(chan struct{})
+			defer close(stopCh)
 			if cycleAllocations {
 				// Repeatedly cycle allocations to keep ~half of the GameServers Allocated. Repeatedly Allocate and
 				// delete such that both the old and new GSSet contain allocated GameServers.
 				const halfScale = targetScale / 2
 				const period = 3 * time.Second
-				go framework.CycleAllocations(t, flt, period, period*halfScale, done)
+				go framework.CycleAllocations(t, flt, period, period*halfScale, stopCh)
 
 				// Wait for at least half of the fleet to have be cycled (either Allocated or shutting down)
 				// before updating the fleet.
@@ -393,7 +393,7 @@ func TestFleetRollingUpdate(t *testing.T) {
 			// Stop cycling Allocations.
 			// The AssertFleetConditions below will wait until the Allocation cycling has
 			// fully stopped (when all Allocated GameServers are shut down).
-			done <- true
+			close(stopCh)
 
 			// scale down, with allocation
 			const scaleDownTarget = 1
diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go
index 632041a416..3006c31f72 100644
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@@ -278,37 +278,28 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer
 		state, gs.Namespace, gs.Name)
 }
 
-// GetGameServer gets the specified GameServer by namespace/name or fails the test with an error.
-func (f *Framework) GetGameServer(t *testing.T, namespace string, name string) *agonesv1.GameServer {
-	gs, err := f.AgonesClient.AgonesV1().GameServers(namespace).Get(context.Background(), name, metav1.GetOptions{})
-	require.NoError(t, err, "failed to get gameserver: %s/%s", namespace, name)
-	return gs
-}
-
 // CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period.
 // Each Allocated GameServer gets deleted allocDuration after it was Allocated.
 // GameServers will continue to be Allocated until a message is passed to the done channel.
-func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, done <-chan bool) {
-	ticker := time.NewTicker(period)
-	for {
-		select {
-		case <-done:
-			return
-		case <-ticker.C:
-			gsa := GetAllocation(flt)
-			gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{})
-			if err != nil || gsa.Status.State != allocationv1.GameServerAllocationAllocated {
-				continue
-			}
-
-			// Deallocate after allocDuration.
-			go func(gsa *allocationv1.GameServerAllocation) {
-				time.Sleep(allocDuration)
-				err := f.AgonesClient.AgonesV1().GameServers(gsa.Namespace).Delete(context.Background(), gsa.Status.GameServerName, metav1.DeleteOptions{})
-				require.NoError(t, err)
-			}(gsa)
+func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, stopCh <-chan struct{}) {
+	err := wait.PollImmediateUntil(period, func() (bool, error) {
+		gsa := GetAllocation(flt)
+		gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{})
+		if err != nil || gsa.Status.State != allocationv1.GameServerAllocationAllocated {
+			// Ignore error. Could be that the buffer was empty, will try again next cycle.
+			return false, nil
 		}
-	}
+
+		// Deallocate after allocDuration.
+		go func(gsa *allocationv1.GameServerAllocation) {
+			time.Sleep(allocDuration)
+			err := f.AgonesClient.AgonesV1().GameServers(gsa.Namespace).Delete(context.Background(), gsa.Status.GameServerName, metav1.DeleteOptions{})
+			require.NoError(t, err)
+		}(gsa)
+
+		return false, nil
+	}, stopCh)
+	require.NoError(t, err)
 }
 
 // AssertFleetCondition waits for the Fleet to be in a specific condition or fails the test if the condition can't be met in 5 minutes.

From f4ad90c5954761c8778b7b31f8018e7a0b29d3c2 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 16 Mar 2022 16:42:05 +0000
Subject: [PATCH 07/12] Reference fixture directly

---
 test/e2e/fleet_test.go | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index dead48e6ab..1fd6963651 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -261,11 +261,9 @@ func TestFleetRollingUpdate(t *testing.T) {
 			cycleAllocations: true,
 		},
 	}
-	for _, fixture := range fixtures {
-		usePatch := fixture.usePatch
-		maxSurgeParam := fixture.maxSurge
-		cycleAllocations := fixture.cycleAllocations
-		t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, cycleAllocations), func(t *testing.T) {
+	for i := range fixtures {
+		fixture := fixtures[i]
+		t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", fixture.usePatch, fixture.maxSurge, fixture.cycleAllocations), func(t *testing.T) {
 			t.Parallel()
 
 			client := framework.AgonesClient.AgonesV1()
@@ -273,7 +271,7 @@ func TestFleetRollingUpdate(t *testing.T) {
 			flt := defaultFleet(framework.Namespace)
 			flt.ApplyDefaults()
 			flt.Spec.Replicas = 1
-			rollingUpdatePercent := intstr.FromString(maxSurgeParam)
+			rollingUpdatePercent := intstr.FromString(fixture.maxSurge)
 			flt.Spec.Strategy.RollingUpdate.MaxSurge = &rollingUpdatePercent
 			flt.Spec.Strategy.RollingUpdate.MaxUnavailable = &rollingUpdatePercent
 
@@ -283,14 +281,14 @@ func TestFleetRollingUpdate(t *testing.T) {
 			}
 
 			assert.Equal(t, int32(1), flt.Spec.Replicas)
-			assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal)
-			assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxUnavailable.StrVal)
+			assert.Equal(t, fixture.maxSurge, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal)
+			assert.Equal(t, fixture.maxSurge, flt.Spec.Strategy.RollingUpdate.MaxUnavailable.StrVal)
 
 			framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(flt.Spec.Replicas))
 
 			// scale up
 			const targetScale = 8
-			if usePatch {
+			if fixture.usePatch {
 				flt = scaleFleetPatch(ctx, t, flt, targetScale)
 				assert.Equal(t, int32(targetScale), flt.Spec.Replicas)
 			} else {
@@ -304,7 +302,7 @@ func TestFleetRollingUpdate(t *testing.T) {
 
 			stopCh := make(chan struct{})
 			defer close(stopCh)
-			if cycleAllocations {
+			if fixture.cycleAllocations {
 				// Repeatedly cycle allocations to keep ~half of the GameServers Allocated. Repeatedly Allocate and
 				// delete such that both the old and new GSSet contain allocated GameServers.
 				const halfScale = targetScale / 2
@@ -397,7 +395,7 @@ func TestFleetRollingUpdate(t *testing.T) {
 
 			// scale down, with allocation
 			const scaleDownTarget = 1
-			if usePatch {
+			if fixture.usePatch {
 				flt = scaleFleetPatch(ctx, t, flt, scaleDownTarget)
 			} else {
 				flt = scaleFleetSubresource(ctx, t, flt, scaleDownTarget)

From cdc7d7e9c70457d58ec0778c82acc63ac597a9cf Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 16 Mar 2022 17:03:30 +0000
Subject: [PATCH 08/12] Use require.NoError

---
 test/e2e/fleet_test.go | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index 1fd6963651..c2a5d8fc49 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -276,9 +276,8 @@ func TestFleetRollingUpdate(t *testing.T) {
 			flt.Spec.Strategy.RollingUpdate.MaxUnavailable = &rollingUpdatePercent
 
 			flt, err := client.Fleets(framework.Namespace).Create(ctx, flt, metav1.CreateOptions{})
-			if assert.Nil(t, err) {
-				defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck
-			}
+			require.NoError(t, err)
+			defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck
 
 			assert.Equal(t, int32(1), flt.Spec.Replicas)
 			assert.Equal(t, fixture.maxSurge, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal)
@@ -298,7 +297,7 @@ func TestFleetRollingUpdate(t *testing.T) {
 			framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(targetScale))
 
 			flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{})
-			assert.NoError(t, err)
+			require.NoError(t, err)
 
 			stopCh := make(chan struct{})
 			defer close(stopCh)
@@ -328,7 +327,7 @@ func TestFleetRollingUpdate(t *testing.T) {
 				flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{})
 				return err
 			})
-			assert.NoError(t, err)
+			require.NoError(t, err)
 
 			selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name})
 			// New GSS was created

From a9b191eee3d00286ae289ba40d9e45404d229433 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 16 Mar 2022 17:06:25 +0000
Subject: [PATCH 09/12] Use require.Eventually

---
 test/e2e/fleet_test.go | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index c2a5d8fc49..9044b26220 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -315,19 +315,16 @@ func TestFleetRollingUpdate(t *testing.T) {
 				})
 			}
 
-			// Change ContainerPort to trigger creating a new GSSet
+			// Change ContainerPort to trigger creating a new GSSet. Retry in case of a conflict.
 			fltName := flt.GetName()
-			err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
+			require.Eventually(t, func() bool {
 				flt, err = client.Fleets(framework.Namespace).Get(ctx, fltName, metav1.GetOptions{})
-				if err != nil {
-					return err
-				}
+				require.NoError(t, err)
 				fltCopy := flt.DeepCopy()
 				fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++
 				flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{})
-				return err
-			})
-			require.NoError(t, err)
+				return err == nil
+			}, time.Minute, time.Second)
 
 			selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name})
 			// New GSS was created

From e67ceff3d217890f08366a7078dc591cc4dcb773 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 16 Mar 2022 17:10:27 +0000
Subject: [PATCH 10/12] Use context + cancel instead of chan

---
 test/e2e/fleet_test.go          | 8 ++++----
 test/e2e/framework/framework.go | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index 9044b26220..a9fd233f61 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -299,14 +299,14 @@ func TestFleetRollingUpdate(t *testing.T) {
 			flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{})
 			require.NoError(t, err)
 
-			stopCh := make(chan struct{})
-			defer close(stopCh)
+			cycleCtx, cancelCycle := context.WithCancel(ctx)
+			defer cancelCycle()
 			if fixture.cycleAllocations {
 				// Repeatedly cycle allocations to keep ~half of the GameServers Allocated. Repeatedly Allocate and
 				// delete such that both the old and new GSSet contain allocated GameServers.
 				const halfScale = targetScale / 2
 				const period = 3 * time.Second
-				go framework.CycleAllocations(t, flt, period, period*halfScale, stopCh)
+				go framework.CycleAllocations(t, cycleCtx, flt, period, period*halfScale)
 
 				// Wait for at least half of the fleet to have be cycled (either Allocated or shutting down)
 				// before updating the fleet.
@@ -387,7 +387,7 @@ func TestFleetRollingUpdate(t *testing.T) {
 			// Stop cycling Allocations.
 			// The AssertFleetConditions below will wait until the Allocation cycling has
 			// fully stopped (when all Allocated GameServers are shut down).
-			close(stopCh)
+			cancelCycle()
 
 			// scale down, with allocation
 			const scaleDownTarget = 1
diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go
index 3006c31f72..5aab9e7773 100644
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@@ -281,7 +281,7 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer
 // CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period.
 // Each Allocated GameServer gets deleted allocDuration after it was Allocated.
 // GameServers will continue to be Allocated until a message is passed to the done channel.
-func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, stopCh <-chan struct{}) {
+func (f *Framework) CycleAllocations(t *testing.T, ctx context.Context, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration) {
 	err := wait.PollImmediateUntil(period, func() (bool, error) {
 		gsa := GetAllocation(flt)
 		gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{})
@@ -298,7 +298,7 @@ func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period t
 		}(gsa)
 
 		return false, nil
-	}, stopCh)
+	}, ctx.Done())
 	require.NoError(t, err)
 }
 

From 452d292ee5a09fbb9e1ca29f6dffd905132b25a9 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 16 Mar 2022 17:23:48 +0000
Subject: [PATCH 11/12] Ctx first param

---
 test/e2e/fleet_test.go          | 2 +-
 test/e2e/framework/framework.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
index a9fd233f61..078b6b6417 100644
--- a/test/e2e/fleet_test.go
+++ b/test/e2e/fleet_test.go
@@ -306,7 +306,7 @@ func TestFleetRollingUpdate(t *testing.T) {
 				// delete such that both the old and new GSSet contain allocated GameServers.
 				const halfScale = targetScale / 2
 				const period = 3 * time.Second
-				go framework.CycleAllocations(t, cycleCtx, flt, period, period*halfScale)
+				go framework.CycleAllocations(cycleCtx, t, flt, period, period*halfScale)
 
 				// Wait for at least half of the fleet to have be cycled (either Allocated or shutting down)
 				// before updating the fleet.
diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go
index 5aab9e7773..23ece59239 100644
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@@ -281,7 +281,7 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer
 // CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period.
 // Each Allocated GameServer gets deleted allocDuration after it was Allocated.
 // GameServers will continue to be Allocated until a message is passed to the done channel.
-func (f *Framework) CycleAllocations(t *testing.T, ctx context.Context, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration) {
+func (f *Framework) CycleAllocations(ctx context.Context, t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration) {
 	err := wait.PollImmediateUntil(period, func() (bool, error) {
 		gsa := GetAllocation(flt)
 		gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{})

From 57bcd5f9cded1f9444e4bbcf501f07b87687dad6 Mon Sep 17 00:00:00 2001
From: WVerlaek <wouter@improbable.io>
Date: Wed, 16 Mar 2022 18:00:43 +0000
Subject: [PATCH 12/12] Ignore wait timeout

---
 test/e2e/framework/framework.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go
index 23ece59239..53fa76cc75 100644
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@@ -299,7 +299,10 @@ func (f *Framework) CycleAllocations(ctx context.Context, t *testing.T, flt *ago
 
 		return false, nil
 	}, ctx.Done())
-	require.NoError(t, err)
+	// Ignore wait timeout error, will always be returned when the context is cancelled at the end of the test.
+	if err != wait.ErrWaitTimeout {
+		require.NoError(t, err)
+	}
 }
 
 // AssertFleetCondition waits for the Fleet to be in a specific condition or fails the test if the condition can't be met in 5 minutes.