From b7b99b1e1c44fbc7aaf97519d466c71512d381f8 Mon Sep 17 00:00:00 2001 From: Dane H Lim Date: Tue, 2 Jul 2024 13:07:58 -0700 Subject: [PATCH] Integ tests containers do not restart unexpectedly --- agent/engine/common_integ_test.go | 15 +++++ agent/engine/engine_integ_test.go | 97 +++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/agent/engine/common_integ_test.go b/agent/engine/common_integ_test.go index eeac803a94..aaf5608e96 100644 --- a/agent/engine/common_integ_test.go +++ b/agent/engine/common_integ_test.go @@ -38,6 +38,7 @@ import ( ssmfactory "github.com/aws/amazon-ecs-agent/agent/ssm/factory" "github.com/aws/amazon-ecs-agent/agent/statechange" "github.com/aws/amazon-ecs-agent/agent/taskresource" + "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/restart" apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status" apitaskstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/task/status" "github.com/aws/amazon-ecs-agent/ecs-agent/credentials" @@ -280,6 +281,20 @@ func createTestContainerWithImageAndName(image string, name string) *apicontaine } } +func createTestRestartableContainer(image, name string, exitAfterSecs int, + restartPolicy *restart.RestartPolicy) *apicontainer.Container { + container := createTestContainerWithImageAndName(image, name) + container.RestartPolicy = restartPolicy + if container.RestartPolicyEnabled() { + container.RestartTracker = restart.NewRestartTracker(*container.RestartPolicy) + } + + // Container should exit after `exitAfterSecs` seconds have elapsed. + container.Command = []string{"sh", "-c", fmt.Sprintf("sleep %d", exitAfterSecs)} + + return container +} + func waitForTaskCleanup(t *testing.T, taskEngine TaskEngine, taskArn string, seconds int) { for i := 0; i < seconds; i++ { _, ok := taskEngine.(*DockerTaskEngine).State().TaskByArn(taskArn) diff --git a/agent/engine/engine_integ_test.go b/agent/engine/engine_integ_test.go index 905345a1f3..e94a8fe87a 100644 --- a/agent/engine/engine_integ_test.go +++ b/agent/engine/engine_integ_test.go @@ -35,6 +35,7 @@ import ( "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi" "github.com/aws/amazon-ecs-agent/agent/dockerclient/sdkclientfactory" "github.com/aws/amazon-ecs-agent/agent/engine/dockerstate" + "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/restart" apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status" apitaskstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/task/status" "github.com/aws/amazon-ecs-agent/ecs-agent/credentials" @@ -955,3 +956,99 @@ func TestImageWithDigestInteg(t *testing.T) { require.NoError(t, err, "failed to remove container during cleanup") removeImage(t, localRegistryBusyboxImage) } + +// TestContainerDoesNotRestartWhenShouldNotRestart tests that a task's container does not restart upon exit when +// the container is not eligible for restart. +func TestContainerDoesNotRestartWhenShouldNotRestart(t *testing.T) { + tcs := []struct { + name string + containerExitAfterSecs int + restartPolicy *restart.RestartPolicy + }{ + { + name: "container restart policy disabled", + containerExitAfterSecs: 2, + restartPolicy: &restart.RestartPolicy{ + Enabled: false, + RestartAttemptPeriod: 1, + }, + }, + { + name: "container restart policy not configured", + containerExitAfterSecs: 2, + restartPolicy: nil, + }, + { + name: "container restart policy enabled but ignores container exit code", + containerExitAfterSecs: 2, + restartPolicy: &restart.RestartPolicy{ + Enabled: true, + IgnoredExitCodes: []int{0}, + RestartAttemptPeriod: 1, + }, + }, + { + name: "container restart policy enabled but container exits before restart attempt period", + containerExitAfterSecs: 2, + restartPolicy: &restart.RestartPolicy{ + Enabled: true, + RestartAttemptPeriod: 3, + }, + }, + } + + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + // Prepare task engine. + cfg := defaultTestConfigIntegTest() + taskEngine, done, _ := setup(cfg, nil, t) + defer done() + + // Prepare task. + container := createTestRestartableContainer(localRegistryBusyboxImage+":latest", "container", + tc.containerExitAfterSecs, tc.restartPolicy) + task := &apitask.Task{ + Arn: "test-arn", + Family: "family", + Version: "1", + DesiredStatusUnsafe: apitaskstatus.TaskRunning, + Containers: []*apicontainer.Container{container}, + } + + // Start task. + go taskEngine.AddTask(task) + + // Status of task and container should transition to RUNNING. + verifyContainerManifestPulledStateChange(t, taskEngine) + verifyTaskManifestPulledStateChange(t, taskEngine) + verifyContainerRunningStateChange(t, taskEngine) + verifyTaskRunningStateChange(t, taskEngine) + + // Wait until container has exited. + time.Sleep(time.Duration(tc.containerExitAfterSecs) * time.Second) + + // Status of task and container should transition to STOPPED. + verifyContainerStoppedStateChange(t, taskEngine) + verifyTaskStoppedStateChange(t, taskEngine) + + // Check restart information/count for the task's container. + engineTask, ok := taskEngine.GetTaskByArn(task.Arn) + require.True(t, ok, "unable to get task with task ARN %s from task engine", task.Arn) + require.NotNil(t, task.Containers, "task's containers slice is nil when should be not nil") + require.Greater(t, len(task.Containers), 0, + "task's container slice length is 0 when should be greater than 0") + if container.RestartPolicyEnabled() { + require.Equal(t, 0, engineTask.Containers[0].RestartTracker.GetRestartCount(), + "container's restart count is not 0 when it should be 0") + } else { + require.Nil(t, engineTask.Containers[0].RestartTracker, + "container's restart tracker (including restart count info) is set when it should not be set") + } + + // Clean up. + err := taskEngine.(*DockerTaskEngine).removeContainer(task, container) + require.NoError(t, err, "failed to remove container during cleanup") + removeImage(t, localRegistryBusyboxImage) + }) + } +}