Skip to content

Commit

Permalink
Marking Evicted Pods as Unhealthy (#1056)
Browse files Browse the repository at this point in the history
Add aditional health check for Gameservers Controller.
  • Loading branch information
aLekSer authored and markmandel committed Sep 22, 2019
1 parent 69edb8b commit 84fae81
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
8 changes: 7 additions & 1 deletion pkg/gameservers/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func NewHealthController(health healthcheck.Handler,
// isUnhealthy returns if the Pod event is going
// to cause the GameServer to become Unhealthy
func (hc *HealthController) isUnhealthy(pod *corev1.Pod) bool {
return hc.unschedulableWithNoFreePorts(pod) || hc.failedContainer(pod)
return hc.evictedPod(pod) || hc.unschedulableWithNoFreePorts(pod) || hc.failedContainer(pod)
}

// unschedulableWithNoFreePorts checks if the reason the Pod couldn't be scheduled
Expand All @@ -120,6 +120,12 @@ func (hc *HealthController) unschedulableWithNoFreePorts(pod *corev1.Pod) bool {
return false
}

// evictedPod checks if the Pod was Evicted
// could be caused by reaching limit on Ephemeral storage
func (hc *HealthController) evictedPod(pod *corev1.Pod) bool {
return pod.Status.Reason == "Evicted"
}

// failedContainer checks each container, and determines if there was a failed
// container
func (hc *HealthController) failedContainer(pod *corev1.Pod) bool {
Expand Down
19 changes: 19 additions & 0 deletions test/e2e/gameserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package e2e

import (
"fmt"
"testing"
"time"

Expand Down Expand Up @@ -368,6 +369,24 @@ func TestGameServerShutdown(t *testing.T) {
assert.NoError(t, err)
}

// TestGameServerEvicted test that if Gameserver would be evicted than it becomes Unhealthy
// Ephemeral Storage limit set to 0Mi
func TestGameServerEvicted(t *testing.T) {
t.Parallel()
gs := defaultGameServer()
gs.Spec.Template.Spec.Containers[0].Resources.Limits[corev1.ResourceEphemeralStorage] = resource.MustParse("0Mi")
newGs, err := framework.AgonesClient.AgonesV1().GameServers(defaultNs).Create(gs)

assert.Nil(t, err, fmt.Sprintf("creating %v GameServer instances failed (%v): %v", gs.Spec, gs.Name, err))

logrus.WithField("name", newGs.ObjectMeta.Name).Info("GameServer created, waiting for being Evicted and Unhealthy")

_, err = framework.WaitForGameServerState(newGs, agonesv1.GameServerStateUnhealthy, 5*time.Minute)

assert.Nil(t, err, fmt.Sprintf("waiting for %v GameServer Unhealthy state timed out (%v): %v",
gs.Spec, gs.Name, err))
}

func TestGameServerPassthroughPort(t *testing.T) {
t.Parallel()
gs := defaultGameServer()
Expand Down

0 comments on commit 84fae81

Please sign in to comment.