From 84fae81322c189e90e5e4fb4e78479cf05147c0b Mon Sep 17 00:00:00 2001 From: Alexander Apalikov Date: Sun, 22 Sep 2019 20:11:53 +0300 Subject: [PATCH] Marking Evicted Pods as Unhealthy (#1056) Add aditional health check for Gameservers Controller. --- pkg/gameservers/health.go | 8 +++++++- test/e2e/gameserver_test.go | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/pkg/gameservers/health.go b/pkg/gameservers/health.go index 45af430e4e..ef8aef5947 100644 --- a/pkg/gameservers/health.go +++ b/pkg/gameservers/health.go @@ -104,7 +104,7 @@ func NewHealthController(health healthcheck.Handler, // isUnhealthy returns if the Pod event is going // to cause the GameServer to become Unhealthy func (hc *HealthController) isUnhealthy(pod *corev1.Pod) bool { - return hc.unschedulableWithNoFreePorts(pod) || hc.failedContainer(pod) + return hc.evictedPod(pod) || hc.unschedulableWithNoFreePorts(pod) || hc.failedContainer(pod) } // unschedulableWithNoFreePorts checks if the reason the Pod couldn't be scheduled @@ -120,6 +120,12 @@ func (hc *HealthController) unschedulableWithNoFreePorts(pod *corev1.Pod) bool { return false } +// evictedPod checks if the Pod was Evicted +// could be caused by reaching limit on Ephemeral storage +func (hc *HealthController) evictedPod(pod *corev1.Pod) bool { + return pod.Status.Reason == "Evicted" +} + // failedContainer checks each container, and determines if there was a failed // container func (hc *HealthController) failedContainer(pod *corev1.Pod) bool { diff --git a/test/e2e/gameserver_test.go b/test/e2e/gameserver_test.go index 15033aa94e..a9f77ee506 100644 --- a/test/e2e/gameserver_test.go +++ b/test/e2e/gameserver_test.go @@ -15,6 +15,7 @@ package e2e import ( + "fmt" "testing" "time" @@ -368,6 +369,24 @@ func TestGameServerShutdown(t *testing.T) { assert.NoError(t, err) } +// TestGameServerEvicted test that if Gameserver would be evicted than it becomes Unhealthy +// Ephemeral Storage limit set to 0Mi +func TestGameServerEvicted(t *testing.T) { + t.Parallel() + gs := defaultGameServer() + gs.Spec.Template.Spec.Containers[0].Resources.Limits[corev1.ResourceEphemeralStorage] = resource.MustParse("0Mi") + newGs, err := framework.AgonesClient.AgonesV1().GameServers(defaultNs).Create(gs) + + assert.Nil(t, err, fmt.Sprintf("creating %v GameServer instances failed (%v): %v", gs.Spec, gs.Name, err)) + + logrus.WithField("name", newGs.ObjectMeta.Name).Info("GameServer created, waiting for being Evicted and Unhealthy") + + _, err = framework.WaitForGameServerState(newGs, agonesv1.GameServerStateUnhealthy, 5*time.Minute) + + assert.Nil(t, err, fmt.Sprintf("waiting for %v GameServer Unhealthy state timed out (%v): %v", + gs.Spec, gs.Name, err)) +} + func TestGameServerPassthroughPort(t *testing.T) { t.Parallel() gs := defaultGameServer()