From 5d4940a27c95c9b4abc211f412269a3d9d951a2c Mon Sep 17 00:00:00 2001 From: Yajie Chu Date: Thu, 17 Oct 2019 17:01:05 -0700 Subject: [PATCH] enable customers to cleanup the NonECS Images which is older than NON_ECS_IMAGE_MINIMUM_CLEANUP_AGE (the wait duration between current time and image creation time) --- README.md | 1 + agent/config/config.go | 5 + agent/config/config_test.go | 2 + agent/config/config_unix.go | 1 + agent/config/config_unix_test.go | 1 + agent/config/config_windows.go | 1 + agent/config/config_windows_test.go | 1 + agent/config/types.go | 3 + agent/engine/docker_image_manager.go | 31 ++++-- agent/engine/docker_image_manager_test.go | 109 ++++++++++++++++++++++ 10 files changed, 149 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 37ed13de30e..87be54488fe 100644 --- a/README.md +++ b/README.md @@ -155,6 +155,7 @@ additional details on each available environment variable. | `ECS_DISABLE_IMAGE_CLEANUP` | `true` | Whether to disable automated image cleanup for the ECS Agent. | `false` | `false` | | `ECS_IMAGE_CLEANUP_INTERVAL` | 30m | The time interval between automated image cleanup cycles. If set to less than 10 minutes, the value is ignored. | 30m | 30m | | `ECS_IMAGE_MINIMUM_CLEANUP_AGE` | 30m | The minimum time interval between when an image is pulled and when it can be considered for automated image cleanup. | 1h | 1h | +| `NON_ECS_IMAGE_MINIMUM_CLEANUP_AGE` | 30m | The minimum time interval between when a non ECS image is created and when it can be considered for automated image cleanup. | 1h | 1h | | `ECS_NUM_IMAGES_DELETE_PER_CYCLE` | 5 | The maximum number of images to delete in a single automated image cleanup cycle. If set to less than 1, the value is ignored. | 5 | 5 | | `ECS_IMAGE_PULL_BEHAVIOR` | <default | always | once | prefer-cached > | The behavior used to customize the pull image process. If `default` is specified, the image will be pulled remotely, if the pull fails then the cached image in the instance will be used. If `always` is specified, the image will be pulled remotely, if the pull fails then the task will fail. If `once` is specified, the image will be pulled remotely if it has not been pulled before or if the image was removed by image cleanup, otherwise the cached image in the instance will be used. If `prefer-cached` is specified, the image will be pulled remotely if there is no cached image, otherwise the cached image in the instance will be used. | default | default | | `ECS_IMAGE_PULL_INACTIVITY_TIMEOUT` | 1m | The time to wait after docker pulls complete waiting for extraction of a container. Useful for tuning large Windows containers. | 1m | 3m | diff --git a/agent/config/config.go b/agent/config/config.go index 997eb1ae3e8..bc0f446a62c 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -83,6 +83,10 @@ const ( // has been pulled before it can be deleted. DefaultImageDeletionAge = 1 * time.Hour + // DefaultNonECSImageDeletionAge specifies the default value for minimum amount of elapsed time after an image + // has been created before it can be deleted + DefaultNonECSImageDeletionAge = 1 * time.Hour + // minimumTaskCleanupWaitDuration specifies the minimum duration to wait before cleaning up // a task's container. This is used to enforce sane values for the config.TaskCleanupWaitDuration field. minimumTaskCleanupWaitDuration = 1 * time.Minute @@ -520,6 +524,7 @@ func environmentConfig() (Config, error) { TaskIAMRoleEnabledForNetworkHost: utils.ParseBool(os.Getenv("ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST"), false), ImageCleanupDisabled: utils.ParseBool(os.Getenv("ECS_DISABLE_IMAGE_CLEANUP"), false), MinimumImageDeletionAge: parseEnvVariableDuration("ECS_IMAGE_MINIMUM_CLEANUP_AGE"), + NonECSMinimumImageDeletionAge: parseEnvVariableDuration("NON_ECS_IMAGE_MINIMUM_CLEANUP_AGE"), ImageCleanupInterval: parseEnvVariableDuration("ECS_IMAGE_CLEANUP_INTERVAL"), NumImagesToDeletePerCycle: parseNumImagesToDeletePerCycle(), NumNonECSContainersToDeletePerCycle: parseNumNonECSContainersToDeletePerCycle(), diff --git a/agent/config/config_test.go b/agent/config/config_test.go index ca8e4be4c01..63b9a696cc7 100644 --- a/agent/config/config_test.go +++ b/agent/config/config_test.go @@ -105,6 +105,7 @@ func TestEnvironmentConfig(t *testing.T) { defer setTestEnv("ECS_DISABLE_IMAGE_CLEANUP", "true")() defer setTestEnv("ECS_IMAGE_CLEANUP_INTERVAL", "2h")() defer setTestEnv("ECS_IMAGE_MINIMUM_CLEANUP_AGE", "30m")() + defer setTestEnv("NON_ECS_IMAGE_MINIMUM_CLEANUP_AGE", "30m")() defer setTestEnv("ECS_NUM_IMAGES_DELETE_PER_CYCLE", "2")() defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "always")() defer setTestEnv("ECS_INSTANCE_ATTRIBUTES", "{\"my_attribute\": \"testing\"}")() @@ -148,6 +149,7 @@ func TestEnvironmentConfig(t *testing.T) { assert.Equal(t, expectedDurationPollingMetricsWaitDuration, conf.PollingMetricsWaitDuration) assert.True(t, conf.TaskENIEnabled, "Wrong value for TaskNetwork") assert.Equal(t, (30 * time.Minute), conf.MinimumImageDeletionAge) + assert.Equal(t, (30 * time.Minute), conf.NonECSMinimumImageDeletionAge) assert.Equal(t, (2 * time.Hour), conf.ImageCleanupInterval) assert.Equal(t, 2, conf.NumImagesToDeletePerCycle) assert.Equal(t, ImagePullAlwaysBehavior, conf.ImagePullBehavior) diff --git a/agent/config/config_unix.go b/agent/config/config_unix.go index b2238058aa8..ebea675f946 100644 --- a/agent/config/config_unix.go +++ b/agent/config/config_unix.go @@ -60,6 +60,7 @@ func DefaultConfig() Config { CredentialsAuditLogDisabled: false, ImageCleanupDisabled: false, MinimumImageDeletionAge: DefaultImageDeletionAge, + NonECSMinimumImageDeletionAge: DefaultNonECSImageDeletionAge, ImageCleanupInterval: DefaultImageCleanupTimeInterval, ImagePullInactivityTimeout: defaultImagePullInactivityTimeout, NumImagesToDeletePerCycle: DefaultNumImagesToDeletePerCycle, diff --git a/agent/config/config_unix_test.go b/agent/config/config_unix_test.go index fe3d168a96d..8c333c1edbe 100644 --- a/agent/config/config_unix_test.go +++ b/agent/config/config_unix_test.go @@ -55,6 +55,7 @@ func TestConfigDefault(t *testing.T) { assert.Equal(t, defaultCredentialsAuditLogFile, cfg.CredentialsAuditLogFile, "CredentialsAuditLogFile is set incorrectly") assert.False(t, cfg.ImageCleanupDisabled, "ImageCleanupDisabled default is set incorrectly") assert.Equal(t, DefaultImageDeletionAge, cfg.MinimumImageDeletionAge, "MinimumImageDeletionAge default is set incorrectly") + assert.Equal(t, DefaultNonECSImageDeletionAge, cfg.NonECSMinimumImageDeletionAge, "NonECSMinimumImageDeletionAge default is set incorrectly") assert.Equal(t, DefaultImageCleanupTimeInterval, cfg.ImageCleanupInterval, "ImageCleanupInterval default is set incorrectly") assert.Equal(t, DefaultNumImagesToDeletePerCycle, cfg.NumImagesToDeletePerCycle, "NumImagesToDeletePerCycle default is set incorrectly") assert.Equal(t, defaultCNIPluginsPath, cfg.CNIPluginsPath, "CNIPluginsPath default is set incorrectly") diff --git a/agent/config/config_windows.go b/agent/config/config_windows.go index 0e8bed016ab..63914a19fc7 100644 --- a/agent/config/config_windows.go +++ b/agent/config/config_windows.go @@ -91,6 +91,7 @@ func DefaultConfig() Config { CredentialsAuditLogDisabled: false, ImageCleanupDisabled: false, MinimumImageDeletionAge: DefaultImageDeletionAge, + NonECSMinimumImageDeletionAge: DefaultNonECSImageDeletionAge, ImageCleanupInterval: DefaultImageCleanupTimeInterval, NumImagesToDeletePerCycle: DefaultNumImagesToDeletePerCycle, NumNonECSContainersToDeletePerCycle: DefaultNumNonECSContainersToDeletePerCycle, diff --git a/agent/config/config_windows_test.go b/agent/config/config_windows_test.go index 293fc592973..ba4bb86c7b2 100644 --- a/agent/config/config_windows_test.go +++ b/agent/config/config_windows_test.go @@ -50,6 +50,7 @@ func TestConfigDefault(t *testing.T) { assert.Equal(t, `C:\ProgramData\Amazon\ECS\log\audit.log`, cfg.CredentialsAuditLogFile, "CredentialsAuditLogFile is set incorrectly") assert.False(t, cfg.ImageCleanupDisabled, "ImageCleanupDisabled default is set incorrectly") assert.Equal(t, DefaultImageDeletionAge, cfg.MinimumImageDeletionAge, "MinimumImageDeletionAge default is set incorrectly") + assert.Equal(t, DefaultNonECSImageDeletionAge, cfg.NonECSMinimumImageDeletionAge, "NonECSMinimumImageDeletionAge default is set incorrectly") assert.Equal(t, DefaultImageCleanupTimeInterval, cfg.ImageCleanupInterval, "ImageCleanupInterval default is set incorrectly") assert.Equal(t, DefaultNumImagesToDeletePerCycle, cfg.NumImagesToDeletePerCycle, "NumImagesToDeletePerCycle default is set incorrectly") assert.Equal(t, `C:\ProgramData\Amazon\ECS\data`, cfg.DataDirOnHost, "Default DataDirOnHost set incorrectly") diff --git a/agent/config/types.go b/agent/config/types.go index a3bbe9f9c0b..2678db15607 100644 --- a/agent/config/types.go +++ b/agent/config/types.go @@ -171,6 +171,9 @@ type Config struct { // before it can be deleted MinimumImageDeletionAge time.Duration + // NonECSMinimumImageDeletionAge specifies the minimum time since non ecs images created before it can be deleted + NonECSMinimumImageDeletionAge time.Duration + // ImageCleanupInterval specifies the time to wait before performing the image // cleanup since last time it was executed ImageCleanupInterval time.Duration diff --git a/agent/engine/docker_image_manager.go b/agent/engine/docker_image_manager.go index 08486dfb9d5..4ab7393f488 100644 --- a/agent/engine/docker_image_manager.go +++ b/agent/engine/docker_image_manager.go @@ -66,6 +66,7 @@ type dockerImageManager struct { deleteNonECSImagesEnabled bool nonECSContainerCleanupWaitDuration time.Duration numNonECSContainersToDelete int + nonECSMinimumAgeBeforeDeletion time.Duration } // ImageStatesForDeletion is used for implementing the sort interface @@ -84,6 +85,7 @@ func NewImageManager(cfg *config.Config, client dockerapi.DockerClient, state do deleteNonECSImagesEnabled: cfg.DeleteNonECSImagesEnabled, nonECSContainerCleanupWaitDuration: cfg.TaskCleanupWaitDuration, numNonECSContainersToDelete: cfg.NumNonECSContainersToDeletePerCycle, + nonECSMinimumAgeBeforeDeletion: cfg.NonECSMinimumImageDeletionAge, } } @@ -273,6 +275,12 @@ func (imageManager *dockerImageManager) isImageOldEnough(imageState *image.Image return ageOfImage > imageManager.minimumAgeBeforeDeletion } +//TODO: change image createdTime to image lastUsedTime when docker support it in the future +func (imageManager *dockerImageManager) nonECSImageOldEnough(NonECSImage ImageWithSizeID) bool { + ageOfImage := time.Now().Sub(NonECSImage.createdTime) + return ageOfImage > imageManager.nonECSMinimumAgeBeforeDeletion +} + // Implementing sort interface based on last used times of the images func (imageStates ImageStatesForDeletion) Len() int { return len(imageStates) @@ -419,9 +427,10 @@ func (imageManager *dockerImageManager) getNonECSContainerIDs(ctx context.Contex } type ImageWithSizeID struct { - RepoTags []string - ImageID string - Size int64 + RepoTags []string + ImageID string + Size int64 + createdTime time.Time } func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context, nonECSImagesNumToDelete int) { @@ -441,6 +450,10 @@ func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context, if numImagesAlreadyDeleted >= nonECSImagesNumToDelete { break } + // use current time - image creation time to determine if image is old enough to be deleted. + if !imageManager.nonECSImageOldEnough(image) { + continue + } if len(image.RepoTags) > 1 { seelog.Debugf("Non-ECS image has more than one tag Image: %s (Tags: %s)", image.ImageID, image.RepoTags) for _, tag := range image.RepoTags { @@ -476,11 +489,17 @@ func (imageManager *dockerImageManager) getNonECSImages(ctx context.Context) []I seelog.Errorf("Error inspecting non-ECS image: (ImageID: %s), %s", imageID, err) continue } + createTime := time.Time{} + createTime, err = time.Parse(time.RFC3339, resp.Created) + if err != nil { + seelog.Warnf("Error parse the inspected non-ECS image created time (ImageID: %s), %v", imageID, err) + } allImages = append(allImages, ImageWithSizeID{ - ImageID: imageID, - Size: resp.Size, - RepoTags: resp.RepoTags, + ImageID: imageID, + Size: resp.Size, + RepoTags: resp.RepoTags, + createdTime: createTime, }) } diff --git a/agent/engine/docker_image_manager_test.go b/agent/engine/docker_image_manager_test.go index 536e6d75709..1f7d27bf7ec 100644 --- a/agent/engine/docker_image_manager_test.go +++ b/agent/engine/docker_image_manager_test.go @@ -1253,6 +1253,115 @@ func TestNonECSImageAndContainersCleanupRemoveImage_DontDeleteExcludedImage(t *t assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed") } +func TestNonECSImageAndContainerCleanupRemoveImage_DontDeleteNotOldEnoughImage(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := mock_dockerapi.NewMockDockerClient(ctrl) + imageManager := &dockerImageManager{ + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + numNonECSContainersToDelete: 10, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + deleteNonECSImagesEnabled: true, + nonECSContainerCleanupWaitDuration: time.Hour * 3, + nonECSMinimumAgeBeforeDeletion: time.Hour * 100, + } + imageManager.SetSaver(statemanager.NewNoopStateManager()) + + listContainersResponse := dockerapi.ListContainersResponse{ + DockerIDs: []string{"1"}, + } + + inspectContainerResponse := &types.ContainerJSON{ + ContainerJSONBase: &types.ContainerJSONBase{ + ID: "1", + State: &types.ContainerState{ + Status: "exited", + FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano), + }, + }, + } + + inspectImageResponse := &types.ImageInspect{ + Size: 4096, + RepoTags: []string{"tester"}, + Created: time.Now().AddDate(0, 0, -1).Format(time.RFC3339), + } + + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty1"}, + RepoTags: []string{"tester"}, + } + + client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes() + client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes() + client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1) + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() + client.EXPECT().InspectImage(listImagesResponse.ImageIDs[0]).Return(inspectImageResponse, nil).Times(1) + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + imageManager.removeUnusedImages(ctx) +} + +func TestNonECSImageAndContainerCleanupRemoveImage_DeleteOldEnoughImage(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := mock_dockerapi.NewMockDockerClient(ctrl) + imageManager := &dockerImageManager{ + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + numNonECSContainersToDelete: 10, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + deleteNonECSImagesEnabled: true, + nonECSContainerCleanupWaitDuration: time.Hour * 3, + nonECSMinimumAgeBeforeDeletion: time.Hour * 3, + } + imageManager.SetSaver(statemanager.NewNoopStateManager()) + + listContainersResponse := dockerapi.ListContainersResponse{ + DockerIDs: []string{"1"}, + } + + inspectContainerResponse := &types.ContainerJSON{ + ContainerJSONBase: &types.ContainerJSONBase{ + ID: "1", + State: &types.ContainerState{ + Status: "exited", + FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano), + }, + }, + } + + inspectImageResponse := &types.ImageInspect{ + Size: 4096, + RepoTags: []string{"tester"}, + Created: time.Now().AddDate(0, 0, -1).Format(time.RFC3339), + } + + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty1"}, + RepoTags: []string{"tester"}, + } + + client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes() + client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes() + client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1) + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() + client.EXPECT().InspectImage(listImagesResponse.ImageIDs[0]).Return(inspectImageResponse, nil).Times(1) + client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.ImageIDs[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1) + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + imageManager.removeUnusedImages(ctx) +} + func TestNonECSImageAndContainersCleanupRemoveImage_DontDeleteECSImages(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish()