diff --git a/client/driver/docker.go b/client/driver/docker.go index 18d83ed0469c..4f6871333296 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -804,7 +804,7 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (*StartRespon return nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", d.driverConfig.ImageName, d.imageID, err) } - container, err := d.createContainer(config) + container, err := d.createContainer(client, config) if err != nil { wrapped := fmt.Sprintf("Failed to create container: %v", err) d.logger.Printf("[ERR] driver.docker: %s", wrapped) @@ -1510,7 +1510,7 @@ func (d *DockerDriver) loadImage(driverConfig *DockerDriverConfig, client *docke // createContainer creates the container given the passed configuration. It // attempts to handle any transient Docker errors. -func (d *DockerDriver) createContainer(config docker.CreateContainerOptions) (*docker.Container, error) { +func (d *DockerDriver) createContainer(client createContainerClient, config docker.CreateContainerOptions) (*docker.Container, error) { // Create a container attempted := 0 CREATE: @@ -1521,6 +1521,16 @@ CREATE: d.logger.Printf("[DEBUG] driver.docker: failed to create container %q from image %q (ID: %q) (attempt %d): %v", config.Name, d.driverConfig.ImageName, d.imageID, attempted+1, createErr) + + // Volume management tools like Portworx may not have detached a volume + // from a previous node before Nomad started a task replacement task. + // Treat these errors as recoverable so we retry. + if strings.Contains(strings.ToLower(createErr.Error()), "volume is attached on another node") { + return nil, structs.NewRecoverableError(createErr, true) + } + + // If the container already exists determine whether it's already + // running or if it's dead and needs to be recreated. if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") { containers, err := client.ListContainers(docker.ListContainersOptions{ All: true, @@ -2082,3 +2092,12 @@ func authIsEmpty(auth *docker.AuthConfiguration) bool { auth.Email == "" && auth.ServerAddress == "" } + +// createContainerClient is the subset of Docker Client methods used by the +// createContainer method to ease testing subtle error conditions. +type createContainerClient interface { + CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) + InspectContainer(id string) (*docker.Container, error) + ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error) + RemoveContainer(opts docker.RemoveContainerOptions) error +} diff --git a/client/driver/docker_test.go b/client/driver/docker_test.go index d8abea032fb9..58e9bb813a06 100644 --- a/client/driver/docker_test.go +++ b/client/driver/docker_test.go @@ -2301,6 +2301,41 @@ func TestDockerDriver_ReadonlyRootfs(t *testing.T) { assert.True(t, container.HostConfig.ReadonlyRootfs, "ReadonlyRootfs option not set") } +// fakeDockerClient can be used in places that accept an interface for the +// docker client such as createContainer. +type fakeDockerClient struct{} + +func (fakeDockerClient) CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) { + return nil, fmt.Errorf("volume is attached on another node") +} +func (fakeDockerClient) InspectContainer(id string) (*docker.Container, error) { + panic("not implemented") +} +func (fakeDockerClient) ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error) { + panic("not implemented") +} +func (fakeDockerClient) RemoveContainer(opts docker.RemoveContainerOptions) error { + panic("not implemented") +} + +// TestDockerDriver_VolumeError asserts volume related errors when creating a +// container are recoverable. +func TestDockerDriver_VolumeError(t *testing.T) { + if !tu.IsTravis() { + t.Parallel() + } + + // setup + task, _, _ := dockerTask(t) + tctx := testDockerDriverContexts(t, task) + driver := NewDockerDriver(tctx.DriverCtx).(*DockerDriver) + driver.driverConfig = &DockerDriverConfig{ImageName: "test"} + + // assert volume error is recoverable + _, err := driver.createContainer(fakeDockerClient{}, docker.CreateContainerOptions{}) + require.True(t, structs.IsRecoverable(err)) +} + func TestDockerDriver_AdvertiseIPv6Address(t *testing.T) { if !tu.IsTravis() { t.Parallel()