From 19265024f9fb4ca7ebe85f3fdaf37761eeb32273 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Fri, 15 May 2020 21:03:27 -0700 Subject: [PATCH] Flaky: TestGameServerReserve Trying to test transitive state while it is actively changing is fraught with peril. Tests flake, hair is pulled, and screams echo throughout the lands. This PR allows one to send a duration to the RESERVE command to udp-simple, as well as update TestGameServerReserve to only confirm concrete states, which therefore can't flake. Closes #1543 --- build/Makefile | 2 +- examples/simple-udp/Makefile | 2 +- examples/simple-udp/main.go | 19 ++++++-- test/e2e/gameserver_test.go | 85 +++++++++++++----------------------- 4 files changed, 48 insertions(+), 60 deletions(-) diff --git a/build/Makefile b/build/Makefile index 3000eb1759..1daa04a8a4 100644 --- a/build/Makefile +++ b/build/Makefile @@ -57,7 +57,7 @@ KIND_PROFILE ?= agones KIND_CONTAINER_NAME=$(KIND_PROFILE)-control-plane # Game Server image to use while doing end-to-end tests -GS_TEST_IMAGE ?= gcr.io/agones-images/udp-server:0.20 +GS_TEST_IMAGE ?= gcr.io/agones-images/udp-server:0.21 ALL_FEATURE_GATES ?= "PlayerTracking=true&ContainerPortAllocation=true" diff --git a/examples/simple-udp/Makefile b/examples/simple-udp/Makefile index c2c9e99332..da627692ca 100644 --- a/examples/simple-udp/Makefile +++ b/examples/simple-udp/Makefile @@ -27,7 +27,7 @@ REPOSITORY = gcr.io/agones-images mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST))) project_path := $(dir $(mkfile_path)) -server_tag = $(REPOSITORY)/udp-server:0.20 +server_tag = $(REPOSITORY)/udp-server:0.21 root_path = $(realpath $(project_path)/../..) # _____ _ diff --git a/examples/simple-udp/main.go b/examples/simple-udp/main.go index 25f1a20bed..e4f17c4b45 100644 --- a/examples/simple-udp/main.go +++ b/examples/simple-udp/main.go @@ -18,6 +18,7 @@ package main import ( "encoding/json" "flag" + "fmt" "log" "net" "os" @@ -144,7 +145,17 @@ func readWriteLoop(conn net.PacketConn, stop chan struct{}, s *sdk.SDK) { allocate(s) case "RESERVE": - reserve(s) + if len(parts) == 2 { + if dur, err := time.ParseDuration(parts[1]); err != nil { + respond(conn, sender, fmt.Sprintf("ERROR: %s\n", err)) + continue + } else { + reserve(s, dur) + } + } else { + respond(conn, sender, "ERROR: Invalid RESERVE, should have 1 argument\n") + continue + } case "WATCH": watchGameServerEvents(s) @@ -183,7 +194,7 @@ func readWriteLoop(conn net.PacketConn, stop chan struct{}, s *sdk.SDK) { continue case 2: if cap, err := strconv.Atoi(parts[1]); err != nil { - respond(conn, sender, err.Error()+"\n") + respond(conn, sender, fmt.Sprintf("ERROR: %s\n", err)) continue } else { setPlayerCapacity(s, int64(cap)) @@ -245,8 +256,8 @@ func allocate(s *sdk.SDK) { } // reserve for 10 seconds -func reserve(s *sdk.SDK) { - if err := s.Reserve(10 * time.Second); err != nil { +func reserve(s *sdk.SDK, duration time.Duration) { + if err := s.Reserve(duration); err != nil { log.Fatalf("could not reserve gameserver: %v", err) } } diff --git a/test/e2e/gameserver_test.go b/test/e2e/gameserver_test.go index cc5e5f5ea7..22ac1c0e46 100644 --- a/test/e2e/gameserver_test.go +++ b/test/e2e/gameserver_test.go @@ -24,7 +24,6 @@ import ( agonesv1 "agones.dev/agones/pkg/apis/agones/v1" "agones.dev/agones/pkg/util/runtime" e2eframework "agones.dev/agones/test/e2e/framework" - "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" @@ -32,7 +31,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes/scheme" ) const ( @@ -560,7 +558,17 @@ func TestGameServerWithPortsMappedToMultipleContainers(t *testing.T) { func TestGameServerReserve(t *testing.T) { t.Parallel() - logger := logrus.WithField("test", t.Name()) + + /* + We are deliberately not trying to test the transition between Reserved -> Ready. + + We have found that trying to catch the GameServer in the Reserved state can be flaky, + as we can't control the speed in which the Kubernetes API is going to reply to request, + and we could sometimes miss when the GameServer is in the Reserved State before it goes to Ready. + + Therefore we are going to test for concrete states that we don't need to catch while + in a transitive state. + */ gs := framework.DefaultGameServer(defaultNs) gs, err := framework.CreateGameServerAndWaitUntilReady(defaultNs, gs) @@ -570,65 +578,34 @@ func TestGameServerReserve(t *testing.T) { defer framework.AgonesClient.AgonesV1().GameServers(defaultNs).Delete(gs.ObjectMeta.Name, nil) // nolint: errcheck assert.Equal(t, gs.Status.State, agonesv1.GameServerStateReady) - logger.Info("sending RESERVE command") - reply, err := e2eframework.SendGameServerUDP(gs, "RESERVE") + reply, err := e2eframework.SendGameServerUDP(gs, "RESERVE 0") if !assert.NoError(t, err) { assert.FailNow(t, "Could not message GameServer") } - logger.Info("Received response") - assert.Equal(t, "ACK: RESERVE\n", reply) + assert.Equal(t, "ACK: RESERVE 0\n", reply) - // might as well Sleep, nothing else going to happen for at least 10 seconds. Let other things work. - logger.Info("Waiting for 10 seconds") - time.Sleep(10 * time.Second) - - // Since polling the backing GameServer can sometimes pause for longer than 10 seconds, - // we are instead going to look at the event stream for the GameServer to determine that the requisite change to - // Reserved and back to Ready has taken place. - // - // There is a possibility that Events may get dropped if the Kubernetes cluster gets overwhelmed, or - // time out after a period. So if this test becomes flaky because due to these potential issues, we will - // need to find an alternate approach. At this stage through, it seems to be working consistently. - err = wait.PollImmediate(time.Second, 2*time.Minute, func() (bool, error) { - logger.Info("checking gameserver events") - list, err := framework.KubeClient.CoreV1().Events(defaultNs).Search(scheme.Scheme, gs) - if err != nil { - return false, err - } + gs, err = framework.WaitForGameServerState(gs, agonesv1.GameServerStateReserved, 3*time.Minute) + assert.NoError(t, err) - var readyEvent corev1.Event - var reserverdEvent corev1.Event + reply, err = e2eframework.SendGameServerUDP(gs, "ALLOCATE") + if !assert.NoError(t, err) { + assert.FailNow(t, "Could not message GameServer") + } + assert.Equal(t, "ACK: ALLOCATE\n", reply) - for _, e := range list.Items { - if e.Reason == string(agonesv1.GameServerStateReady) { - readyEvent = e - } - if e.Reason == string(agonesv1.GameServerStateReserved) { - reserverdEvent = e - } - } - if readyEvent.Reason == "" || reserverdEvent.Reason == "" { - return false, nil - } + // put it in a totally different state, just to reset things. + gs, err = framework.WaitForGameServerState(gs, agonesv1.GameServerStateAllocated, 3*time.Minute) + assert.NoError(t, err) - // debug once we have both a Ready and Reserved event - for _, e := range list.Items { - logger.WithField("first-time", e.FirstTimestamp).WithField("count", e.Count). - WithField("last-time", e.LastTimestamp). - WithField("name", e.Name). - WithField("reason", e.Reason).WithField("message", e.Message).Info("gs event details") - } + reply, err = e2eframework.SendGameServerUDP(gs, "RESERVE 5s") + if !assert.NoError(t, err) { + assert.FailNow(t, "Could not message GameServer") + } + assert.Equal(t, "ACK: RESERVE 5s\n", reply) - if readyEvent.Count != 2 { - return false, nil - } - diff := readyEvent.LastTimestamp.Sub(reserverdEvent.FirstTimestamp.Time).Seconds() - // allow for some variation - if diff >= 10 && diff <= 20 { - return true, nil - } - return true, errors.Errorf("difference of %v seconds was not between 10 and 20", diff) - }) + // sleep, since we're going to wait for the Ready response. + time.Sleep(5 * time.Second) + _, err = framework.WaitForGameServerState(gs, agonesv1.GameServerStateReady, 3*time.Minute) assert.NoError(t, err) }