Skip to content

Commit

Permalink
image manager: cleanup 'dead' and 'created' containers
Browse files Browse the repository at this point in the history
also cleanup of 'dangling' images that have no tags or names associated
with them (ie, they show as <none> in 'docker images')

closes #1684

unit tests

dont touch dangling images -- for now

skip containers that don't have a finished time
  • Loading branch information
sparrc committed May 3, 2019
1 parent df83ed3 commit 55c24df
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 23 deletions.
4 changes: 2 additions & 2 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func (cfg *Config) validateAndOverrideBounds() error {
// If a value has been set for taskCleanupWaitDuration and the value is less than the minimum allowed cleanup duration,
// print a warning and override it
if cfg.TaskCleanupWaitDuration < minimumTaskCleanupWaitDuration {
seelog.Warnf("Invalid value for image cleanup duration, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultTaskCleanupWaitDuration.String(), cfg.TaskCleanupWaitDuration, minimumTaskCleanupWaitDuration)
seelog.Warnf("Invalid value for ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultTaskCleanupWaitDuration.String(), cfg.TaskCleanupWaitDuration, minimumTaskCleanupWaitDuration)
cfg.TaskCleanupWaitDuration = DefaultTaskCleanupWaitDuration
}

Expand All @@ -316,7 +316,7 @@ func (cfg *Config) validateAndOverrideBounds() error {
}

if cfg.ImageCleanupInterval < minimumImageCleanupInterval {
seelog.Warnf("Invalid value for image cleanup duration, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultImageCleanupTimeInterval.String(), cfg.ImageCleanupInterval, minimumImageCleanupInterval)
seelog.Warnf("Invalid value for ECS_IMAGE_CLEANUP_INTERVAL, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultImageCleanupTimeInterval.String(), cfg.ImageCleanupInterval, minimumImageCleanupInterval)
cfg.ImageCleanupInterval = DefaultImageCleanupTimeInterval
}

Expand Down
19 changes: 14 additions & 5 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context)

var numECSImagesDeleted int
imageManager.imageStatesConsideredForDeletion = imageManager.imagesConsiderForDeletion(imageManager.getAllImageStates())

for i := 0; i < imageManager.numImagesToDelete; i++ {
err := imageManager.removeLeastRecentlyUsedImage(ctx)
numECSImagesDeleted = i
Expand Down Expand Up @@ -342,9 +343,17 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte
continue
}

finishedTime, _ := time.Parse(time.Now().String(), response.State.FinishedAt)
seelog.Debugf("Inspecting Non-ECS Container ID [%s] for removal, Finished [%s] Status [%s]", id, response.State.FinishedAt, response.State.Status)
finishedTime, err := time.Parse(time.RFC3339Nano, response.State.FinishedAt)
if err != nil {
seelog.Errorf("Error parsing time string for container. id: %s, time: %s err: %s", id, response.State.FinishedAt, err)
continue
}

if response.State.Status == "exited" && time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration {
if (response.State.Status == "exited" ||
response.State.Status == "dead" ||
response.State.Status == "created") &&
time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration {
nonECSContainerRemoveAvailableIDs = append(nonECSContainerRemoveAvailableIDs, id)
}
}
Expand All @@ -353,13 +362,13 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte
if numNonECSContainerDeleted == imageManager.numNonECSContainersToDelete {
break
}
seelog.Infof("Removing non-ECS container id: %s", id)
seelog.Debugf("Removing non-ECS Container ID %s", id)
err := imageManager.client.RemoveContainer(ctx, id, dockerclient.RemoveContainerTimeout)
if err == nil {
seelog.Infof("Image removed: %s", id)
seelog.Infof("Removed Container ID: %s", id)
numNonECSContainerDeleted++
} else {
seelog.Errorf("Error removing Image %s - %v", id, err)
seelog.Errorf("Error Removing Container ID %s - %s", id, err)
continue
}
}
Expand Down
261 changes: 245 additions & 16 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/dockerclient"
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi"
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks"
mock_dockerapi "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks"
"github.com/aws/amazon-ecs-agent/agent/ec2"
"github.com/aws/amazon-ecs-agent/agent/engine/dockerstate"
"github.com/aws/amazon-ecs-agent/agent/engine/image"
Expand Down Expand Up @@ -984,12 +984,14 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) {
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

Expand All @@ -1002,35 +1004,262 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) {
ID: "1",
State: &types.ContainerState{
Status: "exited",
FinishedAt: time.Now().AddDate(0, -2, 0).String(),
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.ImageIDs[0], dockerclient.RemoveImageTimeout).Return(nil).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

if len(listContainersResponse.DockerIDs) != 1 {
t.Error("Error removing containers ids")
assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// Dead images should be cleaned up.
func TestNonECSImageAndContainers_RemoveDeadContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
if len(inspectContainerResponse.ID) != 1 {
t.Error("Error inspecting containers ids")
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}
if len(imageManager.imageStates) != 0 {
t.Error("Error removing image state after the image is removed")

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "dead",
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// Old 'Created' Images should be cleaned up
func TestNonECSImageAndContainersCleanup_RemoveOldCreatedContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), "1", gomock.Any()).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

func TestNonECSImageAndContainersCleanup_DontRemoveContainerWithInvalidFinishedTime(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: "Hello! I am an invalid timestamp!!",
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), "1", gomock.Any()).Return(nil).Times(0)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// New 'Created' Images should NOT be cleaned up.
func TestNonECSImageAndContainersCleanup_DoNotRemoveNewlyCreatedContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: time.Now().Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(0)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

func TestDeleteImage(t *testing.T) {
Expand Down

0 comments on commit 55c24df

Please sign in to comment.