Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flaky: TestGameServerReserve #1414

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 47 additions & 26 deletions test/e2e/gameserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
e2eframework "agones.dev/agones/test/e2e/framework"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -554,38 +555,58 @@ func TestGameServerReserve(t *testing.T) {
logger.Info("Received response")
assert.Equal(t, "ACK: RESERVE\n", reply)

gs, err = framework.WaitForGameServerStateWithLogger(logger, gs, agonesv1.GameServerStateReserved, time.Minute)
assert.NoError(t, err, fmt.Sprintf("GameServer Name: %s", gs.ObjectMeta.Name))
assert.Equal(t, agonesv1.GameServerStateReserved, gs.Status.State, fmt.Sprintf("GameServer Name: %s", gs.ObjectMeta.Name))
// might as well Sleep, nothing else going to happen for at least 10 seconds. Let other things work.
logger.Info("Waiting for 10 seconds")
time.Sleep(10 * time.Second)

// it should go back after 10 seconds
gs, err = framework.WaitForGameServerStateWithLogger(logger, gs, agonesv1.GameServerStateReady, 15*time.Second)
assert.NoError(t, err)
assert.Equal(t, agonesv1.GameServerStateReady, gs.Status.State)
// Since polling the backing GameServer can sometimes pause for longer than 10 seconds,
// we are instead going to look at the event stream for the GameServer to determine that the requisite change to
// Reserved and back to Ready has taken place.
//
// There is a possibility that Events may get dropped if the Kubernetes cluster gets overwhelmed, or
// time out after a period. So if this test becomes flaky because due to these potential issues, we will
// need to find an alternate approach. At this stage through, it seems to be working consistently.
err = wait.PollImmediate(time.Second, 2*time.Minute, func() (bool, error) {
logger.Info("checking gameserver events")
list, err := framework.KubeClient.CoreV1().Events(defaultNs).Search(scheme.Scheme, gs)
if err != nil {
return false, err
}

list, err := framework.KubeClient.CoreV1().Events(defaultNs).Search(scheme.Scheme, gs)
assert.NoError(t, err)
var readyEvent corev1.Event
var reserverdEvent corev1.Event

for _, e := range list.Items {
logger.WithField("first-time", e.FirstTimestamp).WithField("count", e.Count).
WithField("last-time", e.LastTimestamp).
WithField("name", e.Name).
WithField("reason", e.Reason).WithField("message", e.Message).Info("gs event details")
}
for _, e := range list.Items {
if e.Reason == string(agonesv1.GameServerStateReady) {
readyEvent = e
}
if e.Reason == string(agonesv1.GameServerStateReserved) {
reserverdEvent = e
}
}
if readyEvent.Reason == "" || reserverdEvent.Reason == "" {
return false, nil
}

pod, err := framework.KubeClient.CoreV1().Pods(defaultNs).Get(gs.ObjectMeta.Name, metav1.GetOptions{})
assert.NoError(t, err)
logger.WithField("status", pod.Status).Info("Pod Status")
// debug once we have both a Ready and Reserved event
for _, e := range list.Items {
logger.WithField("first-time", e.FirstTimestamp).WithField("count", e.Count).
WithField("last-time", e.LastTimestamp).
WithField("name", e.Name).
WithField("reason", e.Reason).WithField("message", e.Message).Info("gs event details")
}

list, err = framework.KubeClient.CoreV1().Events(defaultNs).Search(scheme.Scheme, pod)
if readyEvent.Count != 2 {
return false, nil
}
diff := readyEvent.LastTimestamp.Sub(reserverdEvent.FirstTimestamp.Time).Seconds()
// allow for some variation
if diff >= 10 && diff <= 20 {
return true, nil
}
return true, errors.Errorf("difference of %v seconds was not between 10 and 20", diff)
})
assert.NoError(t, err)

for _, e := range list.Items {
logger.WithField("first-time", e.FirstTimestamp).WithField("count", e.Count).
WithField("last-time", e.LastTimestamp).
WithField("name", e.Name).
WithField("reason", e.Reason).WithField("message", e.Message).Info("gs pod details")
}
}

func TestGameServerShutdown(t *testing.T) {
Expand Down