-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GC Docker containers and images after the container exits #1071
Changes from all commits
4d6a2dc
d71a070
5bc6d3f
3c973fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -812,59 +812,18 @@ func (h *DockerHandle) Kill() error { | |
// Stop the container | ||
err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds())) | ||
if err != nil { | ||
h.executor.Exit() | ||
h.pluginClient.Kill() | ||
|
||
// Container has already been removed. | ||
if strings.Contains(err.Error(), NoSuchContainerError) { | ||
h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID) | ||
h.executor.Exit() | ||
h.pluginClient.Kill() | ||
return nil | ||
} | ||
h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err) | ||
h.executor.Exit() | ||
h.pluginClient.Kill() | ||
return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) | ||
} | ||
h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID) | ||
|
||
// Cleanup container | ||
if h.cleanupContainer { | ||
err = h.client.RemoveContainer(docker.RemoveContainerOptions{ | ||
ID: h.containerID, | ||
RemoveVolumes: true, | ||
}) | ||
if err != nil { | ||
h.logger.Printf("[ERR] driver.docker: failed to remove container %s", h.containerID) | ||
return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err) | ||
} | ||
h.logger.Printf("[INFO] driver.docker: removed container %s", h.containerID) | ||
} | ||
|
||
// Cleanup image. This operation may fail if the image is in use by another | ||
// job. That is OK. Will we log a message but continue. | ||
if h.cleanupImage { | ||
err = h.client.RemoveImage(h.imageID) | ||
if err != nil { | ||
containers, err := h.client.ListContainers(docker.ListContainersOptions{ | ||
// The image might be in use by a stopped container, so check everything | ||
All: true, | ||
Filters: map[string][]string{ | ||
"image": []string{h.imageID}, | ||
}, | ||
}) | ||
if err != nil { | ||
h.logger.Printf("[ERR] driver.docker: failed to query list of containers matching image:%s", h.imageID) | ||
return fmt.Errorf("Failed to query list of containers: %s", err) | ||
} | ||
inUse := len(containers) | ||
if inUse > 0 { | ||
h.logger.Printf("[INFO] driver.docker: image %s is still in use by %d container(s)", h.imageID, inUse) | ||
} else { | ||
return fmt.Errorf("Failed to remove image %s", h.imageID) | ||
} | ||
} else { | ||
h.logger.Printf("[INFO] driver.docker: removed image %s", h.imageID) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
|
@@ -893,4 +852,26 @@ func (h *DockerHandle) run() { | |
h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err) | ||
} | ||
h.pluginClient.Kill() | ||
|
||
// Stop the container just incase the docker daemon's wait returned | ||
// incorrectly | ||
if err := h.client.StopContainer(h.containerID, 0); err != nil { | ||
_, noSuchContainer := err.(*docker.NoSuchContainer) | ||
_, containerNotRunning := err.(*docker.ContainerNotRunning) | ||
if !containerNotRunning && !noSuchContainer { | ||
h.logger.Printf("[ERR] driver.docker: error stopping container: %v", err) | ||
} | ||
} | ||
|
||
// Remove the container | ||
if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, Force: true}); err != nil { | ||
h.logger.Printf("[ERR] driver.docker: error removing container: %v", err) | ||
} | ||
|
||
// Cleanup the image | ||
if h.cleanupImage { | ||
if err := h.client.RemoveImage(h.imageID); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did you get rid of the logic that was here before? It is not an error when the image is in use by other containers There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dadgar The extra logic's purpose was to only tell the user why an image couldn't be removed. The error from |
||
h.logger.Printf("[ERR] driver.docker: error removing image: %v", err) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wrap this in
if h.cleanupContainer {
The user can configure Nomad to keep the container
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dadgar There is no point in keeping a container which has exits. The only reason I can think about keeping a container once it exits is to see the logs, which we copy in alloc dirs now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like you didn't do this... Is there any way to get Nomad to keep a container that has exited? There is a reason to keep a container once it has exited besides logs: I want to run
docker inspect
on it so I can tell why it was killed. All nomad is telling me isExit Code: 137, Exit Message: "Docker container exited with non-zero exit code: 137"
which isn't OOMKilled, (that shows up in the exit message). The logs don't have any hints, so I'm trying to debug further but all the containers disappear as soon as they're mysteriously killed.