From 68b04ee9dc59ca2d2f21118e8cadd754f9d2fc37 Mon Sep 17 00:00:00 2001 From: Zach Loafman Date: Thu, 23 May 2024 22:38:33 +0000 Subject: [PATCH] agones-{extensions,allocator}: Be more defensive about draining I looked at the flakes for TestGameServerCreationRightAfterDeletingOneExtensionsPod and TestAllocatorAfterDeleteReplica, and looked at kubelet logs. We definitely see the Pod go un-ready, but the timing is fairly tight and I'm not sure how long it takes the Service to react. Let's be a little more defensive here and lame-duck for a little longer. --- install/helm/agones/templates/extensions-deployment.yaml | 4 ++-- install/helm/agones/templates/service/allocation.yaml | 4 ++-- install/yaml/install.yaml | 8 ++++---- test/e2e/allocator/pod_termination_test.go | 2 +- test/e2e/extensions/high_availability_test.go | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/install/helm/agones/templates/extensions-deployment.yaml b/install/helm/agones/templates/extensions-deployment.yaml index 9e7dc35d08..8e8bee40f3 100644 --- a/install/helm/agones/templates/extensions-deployment.yaml +++ b/install/helm/agones/templates/extensions-deployment.yaml @@ -92,7 +92,7 @@ spec: priorityClassName: {{ .Values.agones.priorityClassName }} {{- end }} serviceAccountName: {{ .Values.agones.serviceaccount.controller.name }} - terminationGracePeriodSeconds: {{ mul .Values.agones.extensions.readiness.periodSeconds .Values.agones.extensions.readiness.failureThreshold 3 }} + terminationGracePeriodSeconds: {{ mul .Values.agones.extensions.readiness.periodSeconds .Values.agones.extensions.readiness.failureThreshold 5 }} containers: - name: agones-extensions image: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.extensions.name}}:{{ default .Values.agones.image.tag .Values.agones.image.extensions.tag }}" @@ -137,7 +137,7 @@ spec: - name: CONTAINER_NAME value: "agones-extensions" - name: READINESS_SHUTDOWN_DURATION - value: {{ mul .Values.agones.extensions.readiness.periodSeconds .Values.agones.extensions.readiness.failureThreshold 2 }}s + value: {{ mul .Values.agones.extensions.readiness.periodSeconds .Values.agones.extensions.readiness.failureThreshold 4 }}s ports: - name: webhooks containerPort: 8081 diff --git a/install/helm/agones/templates/service/allocation.yaml b/install/helm/agones/templates/service/allocation.yaml index cd3ae400e3..7d5794a205 100644 --- a/install/helm/agones/templates/service/allocation.yaml +++ b/install/helm/agones/templates/service/allocation.yaml @@ -213,7 +213,7 @@ spec: {{ toYaml .Values.agones.allocator.tolerations | indent 8 }} {{- end }} serviceAccountName: {{ $.Values.agones.serviceaccount.allocator.name }} - terminationGracePeriodSeconds: {{ mul .Values.agones.allocator.readiness.periodSeconds .Values.agones.allocator.readiness.failureThreshold 3 }} + terminationGracePeriodSeconds: {{ mul .Values.agones.allocator.readiness.periodSeconds .Values.agones.allocator.readiness.failureThreshold 5 }} {{- if eq .Values.agones.allocator.disableTLS false }} volumes: - name: tls @@ -292,7 +292,7 @@ spec: - name: ALLOCATION_BATCH_WAIT_TIME value: {{ .Values.agones.allocator.allocationBatchWaitTime | quote }} - name: READINESS_SHUTDOWN_DURATION - value: {{ mul .Values.agones.allocator.readiness.periodSeconds .Values.agones.extensions.readiness.failureThreshold 2 }}s + value: {{ mul .Values.agones.allocator.readiness.periodSeconds .Values.agones.extensions.readiness.failureThreshold 4 }}s ports: {{- if .Values.agones.allocator.service.http.enabled }} - name: {{ .Values.agones.allocator.service.http.portName }} diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index 4bf0dec820..cfa540ac6a 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -17216,7 +17216,7 @@ spec: value: "true" priorityClassName: agones-system serviceAccountName: agones-controller - terminationGracePeriodSeconds: 27 + terminationGracePeriodSeconds: 45 containers: - name: agones-extensions image: "us-docker.pkg.dev/agones-images/release/agones-extensions:1.41.0-dev" @@ -17259,7 +17259,7 @@ spec: - name: CONTAINER_NAME value: "agones-extensions" - name: READINESS_SHUTDOWN_DURATION - value: 18s + value: 36s ports: - name: webhooks containerPort: 8081 @@ -17420,7 +17420,7 @@ spec: operator: Equal value: "true" serviceAccountName: agones-allocator - terminationGracePeriodSeconds: 27 + terminationGracePeriodSeconds: 45 volumes: - name: tls secret: @@ -17491,7 +17491,7 @@ spec: - name: ALLOCATION_BATCH_WAIT_TIME value: "500ms" - name: READINESS_SHUTDOWN_DURATION - value: 18s + value: 36s ports: - name: https containerPort: 8443 diff --git a/test/e2e/allocator/pod_termination_test.go b/test/e2e/allocator/pod_termination_test.go index 65d7688544..3e5038d9f8 100644 --- a/test/e2e/allocator/pod_termination_test.go +++ b/test/e2e/allocator/pod_termination_test.go @@ -34,7 +34,7 @@ import ( const ( retryInterval = 5 * time.Second - retryTimeout = 45 * time.Second + retryTimeout = 60 * time.Second ) func TestAllocatorAfterDeleteReplica(t *testing.T) { diff --git a/test/e2e/extensions/high_availability_test.go b/test/e2e/extensions/high_availability_test.go index 49ec425628..74c3fdf499 100644 --- a/test/e2e/extensions/high_availability_test.go +++ b/test/e2e/extensions/high_availability_test.go @@ -84,7 +84,7 @@ func TestGameServerCreationRightAfterDeletingOneExtensionsPod(t *testing.T) { logger.Infof("Removing one of the Extensions Pods: %v", list.Items[1].ObjectMeta.Name) deleteAgonesExtensionsPod(ctx, t, false) - endTime := time.Now().Add(30 * time.Second) + endTime := time.Now().Add(60 * time.Second) for time.Now().Before(endTime) { gs := framework.DefaultGameServer(defaultNs) logger.Infof("Creating game-server %s...", gs.Name)