Skip to content

Commit

Permalink
Flaky: TestGameServerReserve
Browse files Browse the repository at this point in the history
Trying to test transitive state while it is actively changing is
fraught with peril. Tests flake, hair is pulled, and screams echo
throughout the lands.

This PR allows one to send a duration to the RESERVE command to
udp-simple, as well as update TestGameServerReserve to only confirm
concrete states, which therefore can't flake.

Closes #1543
  • Loading branch information
markmandel committed May 18, 2020
1 parent 0e67862 commit 1926502
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 60 deletions.
2 changes: 1 addition & 1 deletion build/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ KIND_PROFILE ?= agones
KIND_CONTAINER_NAME=$(KIND_PROFILE)-control-plane

# Game Server image to use while doing end-to-end tests
GS_TEST_IMAGE ?= gcr.io/agones-images/udp-server:0.20
GS_TEST_IMAGE ?= gcr.io/agones-images/udp-server:0.21

ALL_FEATURE_GATES ?= "PlayerTracking=true&ContainerPortAllocation=true"

Expand Down
2 changes: 1 addition & 1 deletion examples/simple-udp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ REPOSITORY = gcr.io/agones-images

mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
project_path := $(dir $(mkfile_path))
server_tag = $(REPOSITORY)/udp-server:0.20
server_tag = $(REPOSITORY)/udp-server:0.21
root_path = $(realpath $(project_path)/../..)

# _____ _
Expand Down
19 changes: 15 additions & 4 deletions examples/simple-udp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package main
import (
"encoding/json"
"flag"
"fmt"
"log"
"net"
"os"
Expand Down Expand Up @@ -144,7 +145,17 @@ func readWriteLoop(conn net.PacketConn, stop chan struct{}, s *sdk.SDK) {
allocate(s)

case "RESERVE":
reserve(s)
if len(parts) == 2 {
if dur, err := time.ParseDuration(parts[1]); err != nil {
respond(conn, sender, fmt.Sprintf("ERROR: %s\n", err))
continue
} else {
reserve(s, dur)
}
} else {
respond(conn, sender, "ERROR: Invalid RESERVE, should have 1 argument\n")
continue
}

case "WATCH":
watchGameServerEvents(s)
Expand Down Expand Up @@ -183,7 +194,7 @@ func readWriteLoop(conn net.PacketConn, stop chan struct{}, s *sdk.SDK) {
continue
case 2:
if cap, err := strconv.Atoi(parts[1]); err != nil {
respond(conn, sender, err.Error()+"\n")
respond(conn, sender, fmt.Sprintf("ERROR: %s\n", err))
continue
} else {
setPlayerCapacity(s, int64(cap))
Expand Down Expand Up @@ -245,8 +256,8 @@ func allocate(s *sdk.SDK) {
}

// reserve for 10 seconds
func reserve(s *sdk.SDK) {
if err := s.Reserve(10 * time.Second); err != nil {
func reserve(s *sdk.SDK, duration time.Duration) {
if err := s.Reserve(duration); err != nil {
log.Fatalf("could not reserve gameserver: %v", err)
}
}
Expand Down
85 changes: 31 additions & 54 deletions test/e2e/gameserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,13 @@ import (
agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
"agones.dev/agones/pkg/util/runtime"
e2eframework "agones.dev/agones/test/e2e/framework"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes/scheme"
)

const (
Expand Down Expand Up @@ -560,7 +558,17 @@ func TestGameServerWithPortsMappedToMultipleContainers(t *testing.T) {

func TestGameServerReserve(t *testing.T) {
t.Parallel()
logger := logrus.WithField("test", t.Name())

/*
We are deliberately not trying to test the transition between Reserved -> Ready.
We have found that trying to catch the GameServer in the Reserved state can be flaky,
as we can't control the speed in which the Kubernetes API is going to reply to request,
and we could sometimes miss when the GameServer is in the Reserved State before it goes to Ready.
Therefore we are going to test for concrete states that we don't need to catch while
in a transitive state.
*/

gs := framework.DefaultGameServer(defaultNs)
gs, err := framework.CreateGameServerAndWaitUntilReady(defaultNs, gs)
Expand All @@ -570,65 +578,34 @@ func TestGameServerReserve(t *testing.T) {
defer framework.AgonesClient.AgonesV1().GameServers(defaultNs).Delete(gs.ObjectMeta.Name, nil) // nolint: errcheck
assert.Equal(t, gs.Status.State, agonesv1.GameServerStateReady)

logger.Info("sending RESERVE command")
reply, err := e2eframework.SendGameServerUDP(gs, "RESERVE")
reply, err := e2eframework.SendGameServerUDP(gs, "RESERVE 0")
if !assert.NoError(t, err) {
assert.FailNow(t, "Could not message GameServer")
}
logger.Info("Received response")
assert.Equal(t, "ACK: RESERVE\n", reply)
assert.Equal(t, "ACK: RESERVE 0\n", reply)

// might as well Sleep, nothing else going to happen for at least 10 seconds. Let other things work.
logger.Info("Waiting for 10 seconds")
time.Sleep(10 * time.Second)

// Since polling the backing GameServer can sometimes pause for longer than 10 seconds,
// we are instead going to look at the event stream for the GameServer to determine that the requisite change to
// Reserved and back to Ready has taken place.
//
// There is a possibility that Events may get dropped if the Kubernetes cluster gets overwhelmed, or
// time out after a period. So if this test becomes flaky because due to these potential issues, we will
// need to find an alternate approach. At this stage through, it seems to be working consistently.
err = wait.PollImmediate(time.Second, 2*time.Minute, func() (bool, error) {
logger.Info("checking gameserver events")
list, err := framework.KubeClient.CoreV1().Events(defaultNs).Search(scheme.Scheme, gs)
if err != nil {
return false, err
}
gs, err = framework.WaitForGameServerState(gs, agonesv1.GameServerStateReserved, 3*time.Minute)
assert.NoError(t, err)

var readyEvent corev1.Event
var reserverdEvent corev1.Event
reply, err = e2eframework.SendGameServerUDP(gs, "ALLOCATE")
if !assert.NoError(t, err) {
assert.FailNow(t, "Could not message GameServer")
}
assert.Equal(t, "ACK: ALLOCATE\n", reply)

for _, e := range list.Items {
if e.Reason == string(agonesv1.GameServerStateReady) {
readyEvent = e
}
if e.Reason == string(agonesv1.GameServerStateReserved) {
reserverdEvent = e
}
}
if readyEvent.Reason == "" || reserverdEvent.Reason == "" {
return false, nil
}
// put it in a totally different state, just to reset things.
gs, err = framework.WaitForGameServerState(gs, agonesv1.GameServerStateAllocated, 3*time.Minute)
assert.NoError(t, err)

// debug once we have both a Ready and Reserved event
for _, e := range list.Items {
logger.WithField("first-time", e.FirstTimestamp).WithField("count", e.Count).
WithField("last-time", e.LastTimestamp).
WithField("name", e.Name).
WithField("reason", e.Reason).WithField("message", e.Message).Info("gs event details")
}
reply, err = e2eframework.SendGameServerUDP(gs, "RESERVE 5s")
if !assert.NoError(t, err) {
assert.FailNow(t, "Could not message GameServer")
}
assert.Equal(t, "ACK: RESERVE 5s\n", reply)

if readyEvent.Count != 2 {
return false, nil
}
diff := readyEvent.LastTimestamp.Sub(reserverdEvent.FirstTimestamp.Time).Seconds()
// allow for some variation
if diff >= 10 && diff <= 20 {
return true, nil
}
return true, errors.Errorf("difference of %v seconds was not between 10 and 20", diff)
})
// sleep, since we're going to wait for the Ready response.
time.Sleep(5 * time.Second)
_, err = framework.WaitForGameServerState(gs, agonesv1.GameServerStateReady, 3*time.Minute)
assert.NoError(t, err)
}

Expand Down

0 comments on commit 1926502

Please sign in to comment.