Skip to content

Commit

Permalink
Fix concurrent writes during image cleanup
Browse files Browse the repository at this point in the history
This patch inspects the cleanup and resolves the inherent concurrent map write
issue reported. An unit test has been added to increase confidence in the fix.

Fixes aws#707

Signed-off-by: Vinothkumar Siddharth <sidvin@amazon.com>
  • Loading branch information
vsiddharth authored and petderek committed Jun 22, 2017
1 parent 7cbf70e commit 3603267
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 4 deletions.
8 changes: 4 additions & 4 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,6 @@ func (imageManager *dockerImageManager) getImageState(containerImageID string) (

// removeImageState removes the imageState from the list of imageState objects in ImageManager
func (imageManager *dockerImageManager) removeImageState(imageStateToBeRemoved *image.ImageState) {
imageManager.updateLock.Lock()
defer imageManager.updateLock.Unlock()
for i, imageState := range imageManager.imageStates {
if imageState.Image.ImageID == imageStateToBeRemoved.Image.ImageID {
// Image State found; hence remove it
Expand Down Expand Up @@ -277,6 +275,9 @@ func (imageManager *dockerImageManager) performPeriodicImageCleanup(ctx context.
}

func (imageManager *dockerImageManager) removeUnusedImages() {
seelog.Infof("Begin building map of eligible unused images for deletion")
imageManager.updateLock.Lock()
defer imageManager.updateLock.Unlock()
imageManager.imageStatesConsideredForDeletion = make(map[string]*image.ImageState)
for _, imageState := range imageManager.getAllImageStates() {
imageManager.imageStatesConsideredForDeletion[imageState.Image.ImageID] = imageState
Expand Down Expand Up @@ -305,8 +306,6 @@ func (imageManager *dockerImageManager) removeLeastRecentlyUsedImage() error {
}

func (imageManager *dockerImageManager) getUnusedImageForDeletion() *image.ImageState {
imageManager.updateLock.RLock()
defer imageManager.updateLock.RUnlock()
candidateImageStatesForDeletion := imageManager.getCandidateImagesForDeletion()
if len(candidateImageStatesForDeletion) < 1 {
seelog.Infof("No eligible images for deletion for this cleanup cycle")
Expand Down Expand Up @@ -350,6 +349,7 @@ func (imageManager *dockerImageManager) deleteImage(imageID string, imageState *
seelog.Infof("Image removed: %v", imageID)
imageState.RemoveImageName(imageID)
if len(imageState.Image.Names) == 0 {
seelog.Infof("Cleaning up all tracking information for image %s as it has zero references", imageID)
delete(imageManager.imageStatesConsideredForDeletion, imageState.Image.ImageID)
imageManager.removeImageState(imageState)
imageManager.state.RemoveImageState(imageState)
Expand Down
70 changes: 70 additions & 0 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package engine
import (
"errors"
"reflect"
"sync"
"testing"
"time"

Expand All @@ -25,8 +26,11 @@ import (
"github.com/aws/amazon-ecs-agent/agent/engine/dockerstate"
"github.com/aws/amazon-ecs-agent/agent/engine/image"
"github.com/aws/amazon-ecs-agent/agent/statemanager"

docker "github.com/fsouza/go-dockerclient"
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/net/context"
)

Expand Down Expand Up @@ -951,3 +955,69 @@ func TestGetImageStateFromImageNameNoImageState(t *testing.T) {
t.Error("Incorrect image state retrieved by image name")
}
}

// TestConcurrentRemoveUnusedImages checks for concurrent map writes
// in the imageManager
func TestConcurrentRemoveUnusedImages(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
}

imageManager.SetSaver(statemanager.NewNoopStateManager())
container := &api.Container{
Name: "testContainer",
Image: "testContainerImage",
}
sourceImage := &image.Image{
ImageID: "sha256:qwerty",
}
sourceImage.Names = append(sourceImage.Names, container.Image)
imageInspected := &docker.Image{
ID: "sha256:qwerty",
}
client.EXPECT().InspectImage(container.Image).Return(imageInspected, nil).AnyTimes()
err := imageManager.RecordContainerReference(container)
if err != nil {
t.Error("Error in adding container to an existing image state")
}
require.Equal(t, 1, len(imageManager.imageStates))

// Remove container reference from image state to trigger cleanup
err = imageManager.RemoveContainerReferenceFromImageState(container)
assert.NoError(t, err)

imageState, _ := imageManager.getImageState(imageInspected.ID)
imageState.PulledAt = time.Now().AddDate(0, -2, 0)
imageState.LastUsedAt = time.Now().AddDate(0, -2, 0)

client.EXPECT().RemoveImage(container.Image, removeImageTimeout).Return(nil)
require.Equal(t, 1, len(imageManager.imageStates))

// We create 1000 goroutines and then perform a channel close
// to simulate the concurrent map write problem
numRoutines := 1000
var waitGroup sync.WaitGroup
waitGroup.Add(numRoutines)

ok := make(chan bool)

for i := 0; i < numRoutines; i++ {
go func() {
<-ok
imageManager.removeUnusedImages()
waitGroup.Done()
}()
}

close(ok)
waitGroup.Wait()
require.Equal(t, 0, len(imageManager.imageStates))
}

0 comments on commit 3603267

Please sign in to comment.