Skip to content

Commit

Permalink
Flaky: TestGameServerReserve (googleforgames#1414)
Browse files Browse the repository at this point in the history
After testing, it seems that the K8s API servers can get blocked for an
indeterminate period during tests - a period that can be longer than the
10 seconds in which our GameServer goes from Ready->Reserverd->Ready -
which caused the previous version of the test to fail.

Updated the test to only look at the event stream to ensure the e2e test
state is verified. While it would be ideal to look at the state itself,
we will assume that unit tests cover that the Event stream is accurate
to the GameServer state being changed.

Fixes googleforgames#1276
  • Loading branch information
markmandel authored and ilkercelikyilmaz committed Oct 23, 2020
1 parent 13f8b28 commit 6d5c7a2
Showing 1 changed file with 47 additions and 26 deletions.
73 changes: 47 additions & 26 deletions test/e2e/gameserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
e2eframework "agones.dev/agones/test/e2e/framework"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -554,38 +555,58 @@ func TestGameServerReserve(t *testing.T) {
logger.Info("Received response")
assert.Equal(t, "ACK: RESERVE\n", reply)

gs, err = framework.WaitForGameServerStateWithLogger(logger, gs, agonesv1.GameServerStateReserved, time.Minute)
assert.NoError(t, err, fmt.Sprintf("GameServer Name: %s", gs.ObjectMeta.Name))
assert.Equal(t, agonesv1.GameServerStateReserved, gs.Status.State, fmt.Sprintf("GameServer Name: %s", gs.ObjectMeta.Name))
// might as well Sleep, nothing else going to happen for at least 10 seconds. Let other things work.
logger.Info("Waiting for 10 seconds")
time.Sleep(10 * time.Second)

// it should go back after 10 seconds
gs, err = framework.WaitForGameServerStateWithLogger(logger, gs, agonesv1.GameServerStateReady, 15*time.Second)
assert.NoError(t, err)
assert.Equal(t, agonesv1.GameServerStateReady, gs.Status.State)
// Since polling the backing GameServer can sometimes pause for longer than 10 seconds,
// we are instead going to look at the event stream for the GameServer to determine that the requisite change to
// Reserved and back to Ready has taken place.
//
// There is a possibility that Events may get dropped if the Kubernetes cluster gets overwhelmed, or
// time out after a period. So if this test becomes flaky because due to these potential issues, we will
// need to find an alternate approach. At this stage through, it seems to be working consistently.
err = wait.PollImmediate(time.Second, 2*time.Minute, func() (bool, error) {
logger.Info("checking gameserver events")
list, err := framework.KubeClient.CoreV1().Events(defaultNs).Search(scheme.Scheme, gs)
if err != nil {
return false, err
}

list, err := framework.KubeClient.CoreV1().Events(defaultNs).Search(scheme.Scheme, gs)
assert.NoError(t, err)
var readyEvent corev1.Event
var reserverdEvent corev1.Event

for _, e := range list.Items {
logger.WithField("first-time", e.FirstTimestamp).WithField("count", e.Count).
WithField("last-time", e.LastTimestamp).
WithField("name", e.Name).
WithField("reason", e.Reason).WithField("message", e.Message).Info("gs event details")
}
for _, e := range list.Items {
if e.Reason == string(agonesv1.GameServerStateReady) {
readyEvent = e
}
if e.Reason == string(agonesv1.GameServerStateReserved) {
reserverdEvent = e
}
}
if readyEvent.Reason == "" || reserverdEvent.Reason == "" {
return false, nil
}

pod, err := framework.KubeClient.CoreV1().Pods(defaultNs).Get(gs.ObjectMeta.Name, metav1.GetOptions{})
assert.NoError(t, err)
logger.WithField("status", pod.Status).Info("Pod Status")
// debug once we have both a Ready and Reserved event
for _, e := range list.Items {
logger.WithField("first-time", e.FirstTimestamp).WithField("count", e.Count).
WithField("last-time", e.LastTimestamp).
WithField("name", e.Name).
WithField("reason", e.Reason).WithField("message", e.Message).Info("gs event details")
}

list, err = framework.KubeClient.CoreV1().Events(defaultNs).Search(scheme.Scheme, pod)
if readyEvent.Count != 2 {
return false, nil
}
diff := readyEvent.LastTimestamp.Sub(reserverdEvent.FirstTimestamp.Time).Seconds()
// allow for some variation
if diff >= 10 && diff <= 20 {
return true, nil
}
return true, errors.Errorf("difference of %v seconds was not between 10 and 20", diff)
})
assert.NoError(t, err)

for _, e := range list.Items {
logger.WithField("first-time", e.FirstTimestamp).WithField("count", e.Count).
WithField("last-time", e.LastTimestamp).
WithField("name", e.Name).
WithField("reason", e.Reason).WithField("message", e.Message).Info("gs pod details")
}
}

func TestGameServerShutdown(t *testing.T) {
Expand Down

0 comments on commit 6d5c7a2

Please sign in to comment.