From 0cf65652b2537486e03faeff5b7c9ccc1520be18 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 5 Jan 2022 15:39:34 +0100 Subject: [PATCH 01/12] e2e test to reproduce issue#2397 --- test/e2e/fleet_test.go | 46 ++++++++++++++++++++++++++++----- test/e2e/framework/framework.go | 32 +++++++++++++++++++++++ 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index 9ce1f79521..c7ff98a4eb 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -223,14 +223,16 @@ func TestFleetRollingUpdate(t *testing.T) { t.Parallel() ctx := context.Background() // Use scaleFleetPatch (true) or scaleFleetSubresource (false) - fixtures := []bool{true, false} - maxSurge := []string{"25%", "10%"} + fixtures := []bool{true} //, false} // TODO Enable these again + maxSurge := []string{"25%"} //, "10%"} // TODO + doCycle := true // TODO: fixture? for _, usePatch := range fixtures { for _, maxSurgeParam := range maxSurge { usePatch := usePatch maxSurgeParam := maxSurgeParam - t.Run(fmt.Sprintf("Use fleet Patch %t %s", usePatch, maxSurgeParam), func(t *testing.T) { + doCycleParam := doCycle + t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, doCycleParam), func(t *testing.T) { t.Parallel() client := framework.AgonesClient.AgonesV1() @@ -267,10 +269,33 @@ func TestFleetRollingUpdate(t *testing.T) { flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{}) assert.NoError(t, err) + done := make(chan bool, 1) + defer close(done) + if doCycleParam { + // Repeatedly cycle allocations to keep ~half of the GameServers Allocated, spread over both GSSets. + // Simulates a rolling update on a live Fleet that continuously receives new allocations, + // and reproduces an issue where this causes a rolling update to get stuck. + const halfScale = targetScale / 2 + go framework.CycleAllocations(t, flt, time.Second*3, time.Second*halfScale*3, done) + + // Wait for at least half of the fleet to have be cycled (either Allocated or shutting down) + // before updating the fleet. + err = framework.WaitForFleetCondition(t, flt, func(entry *logrus.Entry, fleet *agonesv1.Fleet) bool { + return fleet.Status.ReadyReplicas < halfScale + }) + } + // Change ContainerPort to trigger creating a new GSSet - fltCopy := flt.DeepCopy() - fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++ - flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{}) + err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { + flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.GetName(), metav1.GetOptions{}) + if err != nil { + return err + } + fltCopy := flt.DeepCopy() + fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++ + flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{}) + return err + }) assert.NoError(t, err) selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name}) @@ -308,7 +333,9 @@ func TestFleetRollingUpdate(t *testing.T) { assert.Nil(t, err) expectedTotal := targetScale + maxSurge + maxUnavailable + shift - if len(list.Items) > expectedTotal { + if len(list.Items) > expectedTotal && !doCycleParam { + // This fails when Allocation cycling is enabled as there's a number of additional gameservers + // shutting down. err = fmt.Errorf("new replicas should be less than target + maxSurge + maxUnavailable + shift. Replicas: %d, Expected: %d", len(list.Items), expectedTotal) } if err != nil { @@ -324,6 +351,11 @@ func TestFleetRollingUpdate(t *testing.T) { assert.NoError(t, err) + // Stop cycling Allocations. + // The AssertFleetConditions below will wait until the Allocation cycling has + // fully stopped (when all Allocated GameServers are shut down). + done <- true + // scale down, with allocation const scaleDownTarget = 1 if usePatch { diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index bbfdd556c3..7bc09b6a16 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -278,6 +278,38 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer state, gs.Namespace, gs.Name) } +func (f *Framework) GetGameServer(t *testing.T, namespace string, name string) *agonesv1.GameServer { + gs, err := f.AgonesClient.AgonesV1().GameServers(namespace).Get(context.Background(), name, metav1.GetOptions{}) + require.NoError(t, err, "failed to get gameserver: %s/%s", namespace, name) + return gs +} + +// CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period. +// Each Allocated GameServer gets deleted allocDuration after it was Allocated. +// GameServers will continue to be Allocated until a message is passed to the done channel. +func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, done <-chan bool) { + ticker := time.NewTicker(period) + for { + select { + case <-done: + return + case <-ticker.C: + gsa := GetAllocation(flt) + gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{}) + if err != nil || gsa.Status.State != allocationv1.GameServerAllocationAllocated { + continue + } + + // Deallocate after allocDuration. + go func(gsa *allocationv1.GameServerAllocation) { + time.Sleep(allocDuration) + err := f.AgonesClient.AgonesV1().GameServers(gsa.Namespace).Delete(context.Background(), gsa.Status.GameServerName, metav1.DeleteOptions{}) + require.NoError(t, err) + }(gsa) + } + } +} + // AssertFleetCondition waits for the Fleet to be in a specific condition or fails the test if the condition can't be met in 5 minutes. func (f *Framework) AssertFleetCondition(t *testing.T, flt *agonesv1.Fleet, condition func(*logrus.Entry, *agonesv1.Fleet) bool) { err := f.WaitForFleetCondition(t, flt, condition) From 914910ef7cefa5f6e19b424a38d8cb96258b1357 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 5 Jan 2022 17:21:16 +0100 Subject: [PATCH 02/12] lint --- test/e2e/fleet_test.go | 4 ++-- test/e2e/framework/framework.go | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index c7ff98a4eb..b4b79bc40c 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -223,8 +223,8 @@ func TestFleetRollingUpdate(t *testing.T) { t.Parallel() ctx := context.Background() // Use scaleFleetPatch (true) or scaleFleetSubresource (false) - fixtures := []bool{true} //, false} // TODO Enable these again - maxSurge := []string{"25%"} //, "10%"} // TODO + fixtures := []bool{true} // , false} // TODO Enable these again + maxSurge := []string{"25%"} // , "10%"} // TODO doCycle := true // TODO: fixture? for _, usePatch := range fixtures { diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 7bc09b6a16..70c69d0f3c 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -278,6 +278,7 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer state, gs.Namespace, gs.Name) } +// GetGameServer gets the specified GameServer by namespace/name or fails the test with an error. func (f *Framework) GetGameServer(t *testing.T, namespace string, name string) *agonesv1.GameServer { gs, err := f.AgonesClient.AgonesV1().GameServers(namespace).Get(context.Background(), name, metav1.GetOptions{}) require.NoError(t, err, "failed to get gameserver: %s/%s", namespace, name) From 7dba592f89c3c6795be029a16a14e921f913c54c Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Mon, 14 Feb 2022 16:25:50 +0000 Subject: [PATCH 03/12] Fix: consider allocated replicas in rolling update --- pkg/fleets/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/fleets/controller.go b/pkg/fleets/controller.go index e61db5d5e7..1e3ca7ddd3 100644 --- a/pkg/fleets/controller.go +++ b/pkg/fleets/controller.go @@ -529,7 +529,7 @@ func (c *Controller) rollingUpdateRestFixedOnReady(ctx context.Context, fleet *a // Check if we are ready to scale down allPodsCount := agonesv1.SumSpecReplicas(allGSS) - newGSSUnavailablePodCount := active.Spec.Replicas - active.Status.ReadyReplicas + newGSSUnavailablePodCount := active.Spec.Replicas - active.Status.ReadyReplicas - active.Status.AllocatedReplicas maxScaledDown := allPodsCount - minAvailable - newGSSUnavailablePodCount if maxScaledDown <= 0 { From 9212fca5ef158fb4f0f9edd58f29c4c35f4be6aa Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Fri, 11 Mar 2022 12:34:26 +0000 Subject: [PATCH 04/12] Fix test sometimes using empty fleet name --- test/e2e/fleet_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index b4b79bc40c..701a5bd9c9 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -276,7 +276,8 @@ func TestFleetRollingUpdate(t *testing.T) { // Simulates a rolling update on a live Fleet that continuously receives new allocations, // and reproduces an issue where this causes a rolling update to get stuck. const halfScale = targetScale / 2 - go framework.CycleAllocations(t, flt, time.Second*3, time.Second*halfScale*3, done) + const period = 3 * time.Second + go framework.CycleAllocations(t, flt, period, period*halfScale, done) // Wait for at least half of the fleet to have be cycled (either Allocated or shutting down) // before updating the fleet. @@ -286,8 +287,9 @@ func TestFleetRollingUpdate(t *testing.T) { } // Change ContainerPort to trigger creating a new GSSet + fltName := flt.GetName() err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { - flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.GetName(), metav1.GetOptions{}) + flt, err = client.Fleets(framework.Namespace).Get(ctx, fltName, metav1.GetOptions{}) if err != nil { return err } From 194f571bbd0f2cef8e2dc20b9031b3b5d04292b0 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Fri, 11 Mar 2022 14:47:54 +0000 Subject: [PATCH 05/12] Cleanup --- test/e2e/fleet_test.go | 300 +++++++++++++++++++++++------------------ 1 file changed, 168 insertions(+), 132 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index 701a5bd9c9..478f944bf6 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -222,157 +222,193 @@ func TestFleetScaleUpEditAndScaleDown(t *testing.T) { func TestFleetRollingUpdate(t *testing.T) { t.Parallel() ctx := context.Background() - // Use scaleFleetPatch (true) or scaleFleetSubresource (false) - fixtures := []bool{true} // , false} // TODO Enable these again - maxSurge := []string{"25%"} // , "10%"} // TODO - doCycle := true // TODO: fixture? + fixtures := []struct { + // Use scaleFleetPatch (true) or scaleFleetSubresource (false) + usePatch bool + maxSurge string + // If true, create and cycle GS Allocations when triggering a rolling update: + // - Repeatedly allocate and shutdown GameServers (keeping ~50% of the Fleet in an Allocated state). + // - Once 50% of the Fleet is Allocated, trigger the rolling update. + // - Keep allocating/shutting down GameServers, to allocate in both the old and new GSSets. + // - Verify the rolling update completes. + // This simulates updating a Fleet that is live/in-use, and reproduces an issue where allocated GameServers + // causes a rolling update to get stuck and keep the old GameServerSet up. + cycleAllocations bool + }{ + { + usePatch: true, + maxSurge: "25%", + cycleAllocations: false, + }, + { + usePatch: true, + maxSurge: "10%", + cycleAllocations: false, + }, + { + usePatch: false, + maxSurge: "25%", + cycleAllocations: false, + }, + { + usePatch: false, + maxSurge: "10%", + cycleAllocations: false, + }, + { + usePatch: true, + maxSurge: "25%", + cycleAllocations: true, + }, + } + for _, fixture := range fixtures { + usePatch := fixture.usePatch + maxSurgeParam := fixture.maxSurge + cycleAllocations := fixture.cycleAllocations + t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, cycleAllocations), func(t *testing.T) { + t.Parallel() - for _, usePatch := range fixtures { - for _, maxSurgeParam := range maxSurge { - usePatch := usePatch - maxSurgeParam := maxSurgeParam - doCycleParam := doCycle - t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, doCycleParam), func(t *testing.T) { - t.Parallel() - - client := framework.AgonesClient.AgonesV1() - - flt := defaultFleet(framework.Namespace) - flt.ApplyDefaults() - flt.Spec.Replicas = 1 - rollingUpdatePercent := intstr.FromString(maxSurgeParam) - flt.Spec.Strategy.RollingUpdate.MaxSurge = &rollingUpdatePercent - flt.Spec.Strategy.RollingUpdate.MaxUnavailable = &rollingUpdatePercent - - flt, err := client.Fleets(framework.Namespace).Create(ctx, flt, metav1.CreateOptions{}) - if assert.Nil(t, err) { - defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck - } + client := framework.AgonesClient.AgonesV1() - assert.Equal(t, int32(1), flt.Spec.Replicas) - assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal) - assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxUnavailable.StrVal) + flt := defaultFleet(framework.Namespace) + flt.ApplyDefaults() + flt.Spec.Replicas = 1 + rollingUpdatePercent := intstr.FromString(maxSurgeParam) + flt.Spec.Strategy.RollingUpdate.MaxSurge = &rollingUpdatePercent + flt.Spec.Strategy.RollingUpdate.MaxUnavailable = &rollingUpdatePercent - framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(flt.Spec.Replicas)) + flt, err := client.Fleets(framework.Namespace).Create(ctx, flt, metav1.CreateOptions{}) + if assert.Nil(t, err) { + defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck + } - // scale up - const targetScale = 8 - if usePatch { - flt = scaleFleetPatch(ctx, t, flt, targetScale) - assert.Equal(t, int32(targetScale), flt.Spec.Replicas) - } else { - flt = scaleFleetSubresource(ctx, t, flt, targetScale) - } + assert.Equal(t, int32(1), flt.Spec.Replicas) + assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal) + assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxUnavailable.StrVal) - framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(targetScale)) + framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(flt.Spec.Replicas)) - flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{}) - assert.NoError(t, err) + // scale up + const targetScale = 8 + if usePatch { + flt = scaleFleetPatch(ctx, t, flt, targetScale) + assert.Equal(t, int32(targetScale), flt.Spec.Replicas) + } else { + flt = scaleFleetSubresource(ctx, t, flt, targetScale) + } - done := make(chan bool, 1) - defer close(done) - if doCycleParam { - // Repeatedly cycle allocations to keep ~half of the GameServers Allocated, spread over both GSSets. - // Simulates a rolling update on a live Fleet that continuously receives new allocations, - // and reproduces an issue where this causes a rolling update to get stuck. - const halfScale = targetScale / 2 - const period = 3 * time.Second - go framework.CycleAllocations(t, flt, period, period*halfScale, done) - - // Wait for at least half of the fleet to have be cycled (either Allocated or shutting down) - // before updating the fleet. - err = framework.WaitForFleetCondition(t, flt, func(entry *logrus.Entry, fleet *agonesv1.Fleet) bool { - return fleet.Status.ReadyReplicas < halfScale - }) - } + framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(targetScale)) - // Change ContainerPort to trigger creating a new GSSet - fltName := flt.GetName() - err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { - flt, err = client.Fleets(framework.Namespace).Get(ctx, fltName, metav1.GetOptions{}) - if err != nil { - return err - } - fltCopy := flt.DeepCopy() - fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++ - flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{}) - return err - }) - assert.NoError(t, err) + flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{}) + assert.NoError(t, err) - selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name}) - // New GSS was created - err = wait.PollImmediate(1*time.Second, 30*time.Second, func() (bool, error) { - gssList, err := framework.AgonesClient.AgonesV1().GameServerSets(framework.Namespace).List(ctx, - metav1.ListOptions{LabelSelector: selector.String()}) - if err != nil { - return false, err - } - return len(gssList.Items) == 2, nil + done := make(chan bool, 1) + defer close(done) + if cycleAllocations { + // Repeatedly cycle allocations to keep ~half of the GameServers Allocated. Repeatedly Allocate and + // delete such that both the old and new GSSet contain allocated GameServers. + const halfScale = targetScale / 2 + const period = 3 * time.Second + go framework.CycleAllocations(t, flt, period, period*halfScale, done) + + // Wait for at least half of the fleet to have be cycled (either Allocated or shutting down) + // before updating the fleet. + err = framework.WaitForFleetCondition(t, flt, func(entry *logrus.Entry, fleet *agonesv1.Fleet) bool { + return fleet.Status.ReadyReplicas < halfScale }) - assert.NoError(t, err) - // Check that total number of gameservers in the system does not exceed the RollingUpdate - // parameters (creating no more than maxSurge, deleting maxUnavailable servers at a time) - // Wait for old GSSet to be deleted - err = wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { - list, err := framework.AgonesClient.AgonesV1().GameServers(framework.Namespace).List(ctx, - metav1.ListOptions{LabelSelector: selector.String()}) - if err != nil { - return false, err - } + } + + // Change ContainerPort to trigger creating a new GSSet + fltName := flt.GetName() + err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { + flt, err = client.Fleets(framework.Namespace).Get(ctx, fltName, metav1.GetOptions{}) + if err != nil { + return err + } + fltCopy := flt.DeepCopy() + fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++ + flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{}) + return err + }) + assert.NoError(t, err) - maxSurge, err := intstr.GetValueFromIntOrPercent(flt.Spec.Strategy.RollingUpdate.MaxSurge, int(flt.Spec.Replicas), true) - assert.Nil(t, err) + selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name}) + // New GSS was created + err = wait.PollImmediate(1*time.Second, 30*time.Second, func() (bool, error) { + gssList, err := framework.AgonesClient.AgonesV1().GameServerSets(framework.Namespace).List(ctx, + metav1.ListOptions{LabelSelector: selector.String()}) + if err != nil { + return false, err + } + return len(gssList.Items) == 2, nil + }) + assert.NoError(t, err) + // Check that total number of gameservers in the system does not exceed the RollingUpdate + // parameters (creating no more than maxSurge, deleting maxUnavailable servers at a time) + // Wait for old GSSet to be deleted + err = wait.PollImmediate(1*time.Second, 5*time.Minute, func() (bool, error) { + list, err := framework.AgonesClient.AgonesV1().GameServers(framework.Namespace).List(ctx, + metav1.ListOptions{LabelSelector: selector.String()}) + if err != nil { + return false, err + } - roundUp := false - maxUnavailable, err := intstr.GetValueFromIntOrPercent(flt.Spec.Strategy.RollingUpdate.MaxUnavailable, int(flt.Spec.Replicas), roundUp) + maxSurge, err := intstr.GetValueFromIntOrPercent(flt.Spec.Strategy.RollingUpdate.MaxSurge, int(flt.Spec.Replicas), true) + assert.Nil(t, err) - if maxUnavailable == 0 { - maxUnavailable = 1 - } - // This difference is inevitable, also could be seen with Deployments and ReplicaSets - shift := maxUnavailable - assert.Nil(t, err) - - expectedTotal := targetScale + maxSurge + maxUnavailable + shift - if len(list.Items) > expectedTotal && !doCycleParam { - // This fails when Allocation cycling is enabled as there's a number of additional gameservers - // shutting down. - err = fmt.Errorf("new replicas should be less than target + maxSurge + maxUnavailable + shift. Replicas: %d, Expected: %d", len(list.Items), expectedTotal) - } - if err != nil { - return false, err - } - gssList, err := framework.AgonesClient.AgonesV1().GameServerSets(framework.Namespace).List(ctx, - metav1.ListOptions{LabelSelector: selector.String()}) - if err != nil { - return false, err - } - return len(gssList.Items) == 1, nil - }) + roundUp := false + maxUnavailable, err := intstr.GetValueFromIntOrPercent(flt.Spec.Strategy.RollingUpdate.MaxUnavailable, int(flt.Spec.Replicas), roundUp) - assert.NoError(t, err) + if maxUnavailable == 0 { + maxUnavailable = 1 + } + // This difference is inevitable, also could be seen with Deployments and ReplicaSets + shift := maxUnavailable + assert.Nil(t, err) - // Stop cycling Allocations. - // The AssertFleetConditions below will wait until the Allocation cycling has - // fully stopped (when all Allocated GameServers are shut down). - done <- true - - // scale down, with allocation - const scaleDownTarget = 1 - if usePatch { - flt = scaleFleetPatch(ctx, t, flt, scaleDownTarget) - } else { - flt = scaleFleetSubresource(ctx, t, flt, scaleDownTarget) + // Ignore any GameServers that are shutting down (resulting from Allocation cycling). + shuttingDown := 0 + for _, gs := range list.Items { + if gs.IsBeingDeleted() { + shuttingDown++ + } + } + expectedTotal := targetScale + maxSurge + maxUnavailable + shift + shuttingDown + if len(list.Items) > expectedTotal { + err = fmt.Errorf("new replicas should be less than target + maxSurge + maxUnavailable + shift + shuttingDown. Replicas: %d, Expected: %d", len(list.Items), expectedTotal) + } + if err != nil { + return false, err + } + gssList, err := framework.AgonesClient.AgonesV1().GameServerSets(framework.Namespace).List(ctx, + metav1.ListOptions{LabelSelector: selector.String()}) + if err != nil { + return false, err } + return len(gssList.Items) == 1, nil + }) - framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(1)) + assert.NoError(t, err) - framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool { - return fleet.Status.AllocatedReplicas == 0 - }) + // Stop cycling Allocations. + // The AssertFleetConditions below will wait until the Allocation cycling has + // fully stopped (when all Allocated GameServers are shut down). + done <- true + + // scale down, with allocation + const scaleDownTarget = 1 + if usePatch { + flt = scaleFleetPatch(ctx, t, flt, scaleDownTarget) + } else { + flt = scaleFleetSubresource(ctx, t, flt, scaleDownTarget) + } + + framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(1)) + + framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool { + return fleet.Status.AllocatedReplicas == 0 }) - } + }) } } From 2577696b79a73d9357d4209643e791005f9463d8 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 16 Mar 2022 16:38:58 +0000 Subject: [PATCH 06/12] Remove GetGameServer, refactor to use wait.PollUntil --- test/e2e/fleet_test.go | 8 +++--- test/e2e/framework/framework.go | 45 +++++++++++++-------------------- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index 478f944bf6..dead48e6ab 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -302,14 +302,14 @@ func TestFleetRollingUpdate(t *testing.T) { flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{}) assert.NoError(t, err) - done := make(chan bool, 1) - defer close(done) + stopCh := make(chan struct{}) + defer close(stopCh) if cycleAllocations { // Repeatedly cycle allocations to keep ~half of the GameServers Allocated. Repeatedly Allocate and // delete such that both the old and new GSSet contain allocated GameServers. const halfScale = targetScale / 2 const period = 3 * time.Second - go framework.CycleAllocations(t, flt, period, period*halfScale, done) + go framework.CycleAllocations(t, flt, period, period*halfScale, stopCh) // Wait for at least half of the fleet to have be cycled (either Allocated or shutting down) // before updating the fleet. @@ -393,7 +393,7 @@ func TestFleetRollingUpdate(t *testing.T) { // Stop cycling Allocations. // The AssertFleetConditions below will wait until the Allocation cycling has // fully stopped (when all Allocated GameServers are shut down). - done <- true + close(stopCh) // scale down, with allocation const scaleDownTarget = 1 diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 632041a416..3006c31f72 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -278,37 +278,28 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer state, gs.Namespace, gs.Name) } -// GetGameServer gets the specified GameServer by namespace/name or fails the test with an error. -func (f *Framework) GetGameServer(t *testing.T, namespace string, name string) *agonesv1.GameServer { - gs, err := f.AgonesClient.AgonesV1().GameServers(namespace).Get(context.Background(), name, metav1.GetOptions{}) - require.NoError(t, err, "failed to get gameserver: %s/%s", namespace, name) - return gs -} - // CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period. // Each Allocated GameServer gets deleted allocDuration after it was Allocated. // GameServers will continue to be Allocated until a message is passed to the done channel. -func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, done <-chan bool) { - ticker := time.NewTicker(period) - for { - select { - case <-done: - return - case <-ticker.C: - gsa := GetAllocation(flt) - gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{}) - if err != nil || gsa.Status.State != allocationv1.GameServerAllocationAllocated { - continue - } - - // Deallocate after allocDuration. - go func(gsa *allocationv1.GameServerAllocation) { - time.Sleep(allocDuration) - err := f.AgonesClient.AgonesV1().GameServers(gsa.Namespace).Delete(context.Background(), gsa.Status.GameServerName, metav1.DeleteOptions{}) - require.NoError(t, err) - }(gsa) +func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, stopCh <-chan struct{}) { + err := wait.PollImmediateUntil(period, func() (bool, error) { + gsa := GetAllocation(flt) + gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{}) + if err != nil || gsa.Status.State != allocationv1.GameServerAllocationAllocated { + // Ignore error. Could be that the buffer was empty, will try again next cycle. + return false, nil } - } + + // Deallocate after allocDuration. + go func(gsa *allocationv1.GameServerAllocation) { + time.Sleep(allocDuration) + err := f.AgonesClient.AgonesV1().GameServers(gsa.Namespace).Delete(context.Background(), gsa.Status.GameServerName, metav1.DeleteOptions{}) + require.NoError(t, err) + }(gsa) + + return false, nil + }, stopCh) + require.NoError(t, err) } // AssertFleetCondition waits for the Fleet to be in a specific condition or fails the test if the condition can't be met in 5 minutes. From f4ad90c5954761c8778b7b31f8018e7a0b29d3c2 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 16 Mar 2022 16:42:05 +0000 Subject: [PATCH 07/12] Reference fixture directly --- test/e2e/fleet_test.go | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index dead48e6ab..1fd6963651 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -261,11 +261,9 @@ func TestFleetRollingUpdate(t *testing.T) { cycleAllocations: true, }, } - for _, fixture := range fixtures { - usePatch := fixture.usePatch - maxSurgeParam := fixture.maxSurge - cycleAllocations := fixture.cycleAllocations - t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", usePatch, maxSurgeParam, cycleAllocations), func(t *testing.T) { + for i := range fixtures { + fixture := fixtures[i] + t.Run(fmt.Sprintf("Use fleet Patch %t %s cycle %t", fixture.usePatch, fixture.maxSurge, fixture.cycleAllocations), func(t *testing.T) { t.Parallel() client := framework.AgonesClient.AgonesV1() @@ -273,7 +271,7 @@ func TestFleetRollingUpdate(t *testing.T) { flt := defaultFleet(framework.Namespace) flt.ApplyDefaults() flt.Spec.Replicas = 1 - rollingUpdatePercent := intstr.FromString(maxSurgeParam) + rollingUpdatePercent := intstr.FromString(fixture.maxSurge) flt.Spec.Strategy.RollingUpdate.MaxSurge = &rollingUpdatePercent flt.Spec.Strategy.RollingUpdate.MaxUnavailable = &rollingUpdatePercent @@ -283,14 +281,14 @@ func TestFleetRollingUpdate(t *testing.T) { } assert.Equal(t, int32(1), flt.Spec.Replicas) - assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal) - assert.Equal(t, maxSurgeParam, flt.Spec.Strategy.RollingUpdate.MaxUnavailable.StrVal) + assert.Equal(t, fixture.maxSurge, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal) + assert.Equal(t, fixture.maxSurge, flt.Spec.Strategy.RollingUpdate.MaxUnavailable.StrVal) framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(flt.Spec.Replicas)) // scale up const targetScale = 8 - if usePatch { + if fixture.usePatch { flt = scaleFleetPatch(ctx, t, flt, targetScale) assert.Equal(t, int32(targetScale), flt.Spec.Replicas) } else { @@ -304,7 +302,7 @@ func TestFleetRollingUpdate(t *testing.T) { stopCh := make(chan struct{}) defer close(stopCh) - if cycleAllocations { + if fixture.cycleAllocations { // Repeatedly cycle allocations to keep ~half of the GameServers Allocated. Repeatedly Allocate and // delete such that both the old and new GSSet contain allocated GameServers. const halfScale = targetScale / 2 @@ -397,7 +395,7 @@ func TestFleetRollingUpdate(t *testing.T) { // scale down, with allocation const scaleDownTarget = 1 - if usePatch { + if fixture.usePatch { flt = scaleFleetPatch(ctx, t, flt, scaleDownTarget) } else { flt = scaleFleetSubresource(ctx, t, flt, scaleDownTarget) From cdc7d7e9c70457d58ec0778c82acc63ac597a9cf Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 16 Mar 2022 17:03:30 +0000 Subject: [PATCH 08/12] Use require.NoError --- test/e2e/fleet_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index 1fd6963651..c2a5d8fc49 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -276,9 +276,8 @@ func TestFleetRollingUpdate(t *testing.T) { flt.Spec.Strategy.RollingUpdate.MaxUnavailable = &rollingUpdatePercent flt, err := client.Fleets(framework.Namespace).Create(ctx, flt, metav1.CreateOptions{}) - if assert.Nil(t, err) { - defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck - } + require.NoError(t, err) + defer client.Fleets(framework.Namespace).Delete(ctx, flt.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint:errcheck assert.Equal(t, int32(1), flt.Spec.Replicas) assert.Equal(t, fixture.maxSurge, flt.Spec.Strategy.RollingUpdate.MaxSurge.StrVal) @@ -298,7 +297,7 @@ func TestFleetRollingUpdate(t *testing.T) { framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(targetScale)) flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{}) - assert.NoError(t, err) + require.NoError(t, err) stopCh := make(chan struct{}) defer close(stopCh) @@ -328,7 +327,7 @@ func TestFleetRollingUpdate(t *testing.T) { flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{}) return err }) - assert.NoError(t, err) + require.NoError(t, err) selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name}) // New GSS was created From a9b191eee3d00286ae289ba40d9e45404d229433 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 16 Mar 2022 17:06:25 +0000 Subject: [PATCH 09/12] Use require.Eventually --- test/e2e/fleet_test.go | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index c2a5d8fc49..9044b26220 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -315,19 +315,16 @@ func TestFleetRollingUpdate(t *testing.T) { }) } - // Change ContainerPort to trigger creating a new GSSet + // Change ContainerPort to trigger creating a new GSSet. Retry in case of a conflict. fltName := flt.GetName() - err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { + require.Eventually(t, func() bool { flt, err = client.Fleets(framework.Namespace).Get(ctx, fltName, metav1.GetOptions{}) - if err != nil { - return err - } + require.NoError(t, err) fltCopy := flt.DeepCopy() fltCopy.Spec.Template.Spec.Ports[0].ContainerPort++ flt, err = client.Fleets(framework.Namespace).Update(ctx, fltCopy, metav1.UpdateOptions{}) - return err - }) - require.NoError(t, err) + return err == nil + }, time.Minute, time.Second) selector := labels.SelectorFromSet(labels.Set{agonesv1.FleetNameLabel: flt.ObjectMeta.Name}) // New GSS was created From e67ceff3d217890f08366a7078dc591cc4dcb773 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 16 Mar 2022 17:10:27 +0000 Subject: [PATCH 10/12] Use context + cancel instead of chan --- test/e2e/fleet_test.go | 8 ++++---- test/e2e/framework/framework.go | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index 9044b26220..a9fd233f61 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -299,14 +299,14 @@ func TestFleetRollingUpdate(t *testing.T) { flt, err = client.Fleets(framework.Namespace).Get(ctx, flt.ObjectMeta.GetName(), metav1.GetOptions{}) require.NoError(t, err) - stopCh := make(chan struct{}) - defer close(stopCh) + cycleCtx, cancelCycle := context.WithCancel(ctx) + defer cancelCycle() if fixture.cycleAllocations { // Repeatedly cycle allocations to keep ~half of the GameServers Allocated. Repeatedly Allocate and // delete such that both the old and new GSSet contain allocated GameServers. const halfScale = targetScale / 2 const period = 3 * time.Second - go framework.CycleAllocations(t, flt, period, period*halfScale, stopCh) + go framework.CycleAllocations(t, cycleCtx, flt, period, period*halfScale) // Wait for at least half of the fleet to have be cycled (either Allocated or shutting down) // before updating the fleet. @@ -387,7 +387,7 @@ func TestFleetRollingUpdate(t *testing.T) { // Stop cycling Allocations. // The AssertFleetConditions below will wait until the Allocation cycling has // fully stopped (when all Allocated GameServers are shut down). - close(stopCh) + cancelCycle() // scale down, with allocation const scaleDownTarget = 1 diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 3006c31f72..5aab9e7773 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -281,7 +281,7 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer // CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period. // Each Allocated GameServer gets deleted allocDuration after it was Allocated. // GameServers will continue to be Allocated until a message is passed to the done channel. -func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration, stopCh <-chan struct{}) { +func (f *Framework) CycleAllocations(t *testing.T, ctx context.Context, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration) { err := wait.PollImmediateUntil(period, func() (bool, error) { gsa := GetAllocation(flt) gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{}) @@ -298,7 +298,7 @@ func (f *Framework) CycleAllocations(t *testing.T, flt *agonesv1.Fleet, period t }(gsa) return false, nil - }, stopCh) + }, ctx.Done()) require.NoError(t, err) } From 452d292ee5a09fbb9e1ca29f6dffd905132b25a9 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 16 Mar 2022 17:23:48 +0000 Subject: [PATCH 11/12] Ctx first param --- test/e2e/fleet_test.go | 2 +- test/e2e/framework/framework.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index a9fd233f61..078b6b6417 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -306,7 +306,7 @@ func TestFleetRollingUpdate(t *testing.T) { // delete such that both the old and new GSSet contain allocated GameServers. const halfScale = targetScale / 2 const period = 3 * time.Second - go framework.CycleAllocations(t, cycleCtx, flt, period, period*halfScale) + go framework.CycleAllocations(cycleCtx, t, flt, period, period*halfScale) // Wait for at least half of the fleet to have be cycled (either Allocated or shutting down) // before updating the fleet. diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 5aab9e7773..23ece59239 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -281,7 +281,7 @@ func (f *Framework) WaitForGameServerState(t *testing.T, gs *agonesv1.GameServer // CycleAllocations repeatedly Allocates a GameServer in the Fleet (if one is available), once every specified period. // Each Allocated GameServer gets deleted allocDuration after it was Allocated. // GameServers will continue to be Allocated until a message is passed to the done channel. -func (f *Framework) CycleAllocations(t *testing.T, ctx context.Context, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration) { +func (f *Framework) CycleAllocations(ctx context.Context, t *testing.T, flt *agonesv1.Fleet, period time.Duration, allocDuration time.Duration) { err := wait.PollImmediateUntil(period, func() (bool, error) { gsa := GetAllocation(flt) gsa, err := f.AgonesClient.AllocationV1().GameServerAllocations(flt.Namespace).Create(context.Background(), gsa, metav1.CreateOptions{}) From 57bcd5f9cded1f9444e4bbcf501f07b87687dad6 Mon Sep 17 00:00:00 2001 From: WVerlaek Date: Wed, 16 Mar 2022 18:00:43 +0000 Subject: [PATCH 12/12] Ignore wait timeout --- test/e2e/framework/framework.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index 23ece59239..53fa76cc75 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -299,7 +299,10 @@ func (f *Framework) CycleAllocations(ctx context.Context, t *testing.T, flt *ago return false, nil }, ctx.Done()) - require.NoError(t, err) + // Ignore wait timeout error, will always be returned when the context is cancelled at the end of the test. + if err != wait.ErrWaitTimeout { + require.NoError(t, err) + } } // AssertFleetCondition waits for the Fleet to be in a specific condition or fails the test if the condition can't be met in 5 minutes.