Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GC Docker containers and images after the container exits #1071

Merged
merged 4 commits into from
Apr 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 25 additions & 44 deletions client/driver/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -812,59 +812,18 @@ func (h *DockerHandle) Kill() error {
// Stop the container
err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds()))
if err != nil {
h.executor.Exit()
h.pluginClient.Kill()

// Container has already been removed.
if strings.Contains(err.Error(), NoSuchContainerError) {
h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID)
h.executor.Exit()
h.pluginClient.Kill()
return nil
}
h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err)
h.executor.Exit()
h.pluginClient.Kill()
return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
}
h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID)

// Cleanup container
if h.cleanupContainer {
err = h.client.RemoveContainer(docker.RemoveContainerOptions{
ID: h.containerID,
RemoveVolumes: true,
})
if err != nil {
h.logger.Printf("[ERR] driver.docker: failed to remove container %s", h.containerID)
return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err)
}
h.logger.Printf("[INFO] driver.docker: removed container %s", h.containerID)
}

// Cleanup image. This operation may fail if the image is in use by another
// job. That is OK. Will we log a message but continue.
if h.cleanupImage {
err = h.client.RemoveImage(h.imageID)
if err != nil {
containers, err := h.client.ListContainers(docker.ListContainersOptions{
// The image might be in use by a stopped container, so check everything
All: true,
Filters: map[string][]string{
"image": []string{h.imageID},
},
})
if err != nil {
h.logger.Printf("[ERR] driver.docker: failed to query list of containers matching image:%s", h.imageID)
return fmt.Errorf("Failed to query list of containers: %s", err)
}
inUse := len(containers)
if inUse > 0 {
h.logger.Printf("[INFO] driver.docker: image %s is still in use by %d container(s)", h.imageID, inUse)
} else {
return fmt.Errorf("Failed to remove image %s", h.imageID)
}
} else {
h.logger.Printf("[INFO] driver.docker: removed image %s", h.imageID)
}
}
return nil
}

Expand Down Expand Up @@ -893,4 +852,26 @@ func (h *DockerHandle) run() {
h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err)
}
h.pluginClient.Kill()

// Stop the container just incase the docker daemon's wait returned
// incorrectly
if err := h.client.StopContainer(h.containerID, 0); err != nil {
_, noSuchContainer := err.(*docker.NoSuchContainer)
_, containerNotRunning := err.(*docker.ContainerNotRunning)
if !containerNotRunning && !noSuchContainer {
h.logger.Printf("[ERR] driver.docker: error stopping container: %v", err)
}
}

// Remove the container
if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, Force: true}); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrap this in if h.cleanupContainer {

The user can configure Nomad to keep the container

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dadgar There is no point in keeping a container which has exits. The only reason I can think about keeping a container once it exits is to see the logs, which we copy in alloc dirs now.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like you didn't do this... Is there any way to get Nomad to keep a container that has exited? There is a reason to keep a container once it has exited besides logs: I want to run docker inspect on it so I can tell why it was killed. All nomad is telling me is Exit Code: 137, Exit Message: "Docker container exited with non-zero exit code: 137" which isn't OOMKilled, (that shows up in the exit message). The logs don't have any hints, so I'm trying to debug further but all the containers disappear as soon as they're mysteriously killed.

h.logger.Printf("[ERR] driver.docker: error removing container: %v", err)
}

// Cleanup the image
if h.cleanupImage {
if err := h.client.RemoveImage(h.imageID); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you get rid of the logic that was here before? It is not an error when the image is in use by other containers

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dadgar The extra logic's purpose was to only tell the user why an image couldn't be removed. The error from RemoveImage API conveys that message anyways and we are not returning this error anyways.

h.logger.Printf("[ERR] driver.docker: error removing image: %v", err)
}
}
}
48 changes: 48 additions & 0 deletions client/driver/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -757,3 +757,51 @@ func TestDockerUser(t *testing.T) {
t.Fatalf("Expecting '%v' in '%v'", msg, err)
}
}

func TestDockerDriver_CleanupContainer(t *testing.T) {
t.Parallel()
task := &structs.Task{
Name: "redis-demo",
Config: map[string]interface{}{
"image": "busybox",
"command": "/bin/echo",
"args": []string{"hello"},
},
Resources: &structs.Resources{
MemoryMB: 256,
CPU: 512,
},
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
},
}

_, handle, cleanup := dockerSetup(t, task)
defer cleanup()

// Update should be a no-op
err := handle.Update(task)
if err != nil {
t.Fatalf("err: %v", err)
}

select {
case res := <-handle.WaitCh():
if !res.Successful() {
t.Fatalf("err: %v", res)
}

time.Sleep(3 * time.Second)

// Ensure that the container isn't present
_, err := client.InspectContainer(handle.(*DockerHandle).containerID)
if err == nil {
t.Fatalf("expected to not get container")
}

case <-time.After(time.Duration(tu.TestMultiplier()*5) * time.Second):
t.Fatalf("timeout")
}

}