Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

image manager: cleanup 'dead' and 'created' containers #2015

Merged
merged 1 commit into from
May 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func (cfg *Config) validateAndOverrideBounds() error {
// If a value has been set for taskCleanupWaitDuration and the value is less than the minimum allowed cleanup duration,
// print a warning and override it
if cfg.TaskCleanupWaitDuration < minimumTaskCleanupWaitDuration {
seelog.Warnf("Invalid value for image cleanup duration, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultTaskCleanupWaitDuration.String(), cfg.TaskCleanupWaitDuration, minimumTaskCleanupWaitDuration)
seelog.Warnf("Invalid value for ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultTaskCleanupWaitDuration.String(), cfg.TaskCleanupWaitDuration, minimumTaskCleanupWaitDuration)
cfg.TaskCleanupWaitDuration = DefaultTaskCleanupWaitDuration
}

Expand All @@ -316,7 +316,7 @@ func (cfg *Config) validateAndOverrideBounds() error {
}

if cfg.ImageCleanupInterval < minimumImageCleanupInterval {
seelog.Warnf("Invalid value for image cleanup duration, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultImageCleanupTimeInterval.String(), cfg.ImageCleanupInterval, minimumImageCleanupInterval)
seelog.Warnf("Invalid value for ECS_IMAGE_CLEANUP_INTERVAL, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultImageCleanupTimeInterval.String(), cfg.ImageCleanupInterval, minimumImageCleanupInterval)
cfg.ImageCleanupInterval = DefaultImageCleanupTimeInterval
}

Expand Down
19 changes: 14 additions & 5 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context)

var numECSImagesDeleted int
imageManager.imageStatesConsideredForDeletion = imageManager.imagesConsiderForDeletion(imageManager.getAllImageStates())

for i := 0; i < imageManager.numImagesToDelete; i++ {
err := imageManager.removeLeastRecentlyUsedImage(ctx)
numECSImagesDeleted = i
Expand Down Expand Up @@ -342,9 +343,17 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte
continue
}

finishedTime, _ := time.Parse(time.Now().String(), response.State.FinishedAt)
seelog.Debugf("Inspecting Non-ECS Container ID [%s] for removal, Finished [%s] Status [%s]", id, response.State.FinishedAt, response.State.Status)
finishedTime, err := time.Parse(time.RFC3339Nano, response.State.FinishedAt)
if err != nil {
sparrc marked this conversation as resolved.
Show resolved Hide resolved
seelog.Errorf("Error parsing time string for container. id: %s, time: %s err: %s", id, response.State.FinishedAt, err)
continue
sparrc marked this conversation as resolved.
Show resolved Hide resolved
}

if response.State.Status == "exited" && time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration {
if (response.State.Status == "exited" ||
response.State.Status == "dead" ||
response.State.Status == "created") &&
time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration {
nonECSContainerRemoveAvailableIDs = append(nonECSContainerRemoveAvailableIDs, id)
}
}
Expand All @@ -353,13 +362,13 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte
if numNonECSContainerDeleted == imageManager.numNonECSContainersToDelete {
break
}
seelog.Infof("Removing non-ECS container id: %s", id)
seelog.Debugf("Removing non-ECS Container ID %s", id)
err := imageManager.client.RemoveContainer(ctx, id, dockerclient.RemoveContainerTimeout)
if err == nil {
seelog.Infof("Image removed: %s", id)
seelog.Infof("Removed Container ID: %s", id)
numNonECSContainerDeleted++
} else {
seelog.Errorf("Error removing Image %s - %v", id, err)
seelog.Errorf("Error Removing Container ID %s - %s", id, err)
continue
}
}
Expand Down
261 changes: 245 additions & 16 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/dockerclient"
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi"
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks"
mock_dockerapi "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks"
"github.com/aws/amazon-ecs-agent/agent/ec2"
"github.com/aws/amazon-ecs-agent/agent/engine/dockerstate"
"github.com/aws/amazon-ecs-agent/agent/engine/image"
Expand Down Expand Up @@ -984,12 +984,14 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) {
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

Expand All @@ -1002,35 +1004,262 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) {
ID: "1",
State: &types.ContainerState{
Status: "exited",
FinishedAt: time.Now().AddDate(0, -2, 0).String(),
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.ImageIDs[0], dockerclient.RemoveImageTimeout).Return(nil).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

if len(listContainersResponse.DockerIDs) != 1 {
t.Error("Error removing containers ids")
assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// Dead images should be cleaned up.
func TestNonECSImageAndContainers_RemoveDeadContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
if len(inspectContainerResponse.ID) != 1 {
t.Error("Error inspecting containers ids")
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}
if len(imageManager.imageStates) != 0 {
t.Error("Error removing image state after the image is removed")

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "dead",
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// Old 'Created' Images should be cleaned up
func TestNonECSImageAndContainersCleanup_RemoveOldCreatedContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), "1", gomock.Any()).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

func TestNonECSImageAndContainersCleanup_DontRemoveContainerWithInvalidFinishedTime(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: "Hello! I am an invalid timestamp!!",
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), "1", gomock.Any()).Return(nil).Times(0)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// New 'Created' Images should NOT be cleaned up.
func TestNonECSImageAndContainersCleanup_DoNotRemoveNewlyCreatedContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: time.Now().Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(0)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

func TestDeleteImage(t *testing.T) {
Expand Down