Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

driver/docker: Don't pull InfraImage if it exists #13265

Merged
merged 5 commits into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/13265.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
driver/docker: Eliminate excess Docker registry pulls for the `infra_image` when it already exists locally.
```
76 changes: 64 additions & 12 deletions drivers/docker/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,7 @@ func (d *Driver) CreateNetwork(allocID string, createSpec *drivers.NetworkCreate
return nil, false, fmt.Errorf("failed to connect to docker daemon: %s", err)
}

repo, _ := parseDockerImage(d.config.InfraImage)
authOptions, err := firstValidAuth(repo, []authBackend{
authFromDockerConfig(d.config.Auth.Config),
authFromHelper(d.config.Auth.Helper),
})
if err != nil {
d.logger.Debug("auth failed for infra container image pull", "image", d.config.InfraImage, "error", err)
}
_, err = d.coordinator.PullImage(d.config.InfraImage, authOptions, allocID, noopLogEventFn, d.config.infraImagePullTimeoutDuration, d.config.pullActivityTimeoutDuration)
if err != nil {
if err := d.pullInfraImage(allocID); err != nil {
return nil, false, err
}

Expand Down Expand Up @@ -101,10 +92,28 @@ func (d *Driver) DestroyNetwork(allocID string, spec *drivers.NetworkIsolationSp
return fmt.Errorf("failed to connect to docker daemon: %s", err)
}

return client.RemoveContainer(docker.RemoveContainerOptions{
if err := client.RemoveContainer(docker.RemoveContainerOptions{
Force: true,
ID: spec.Labels[dockerNetSpecLabelKey],
})
}); err != nil {
return err
}

if d.config.GC.Image {

// The Docker image ID is needed in order to correctly update the image
// reference count. Any error finding this, however, should not result
// in an error shutting down the allocrunner.
dockerImage, err := client.InspectImage(d.config.InfraImage)
if err != nil {
d.logger.Warn("InspectImage failed for infra_image container destroy",
"image", d.config.InfraImage, "error", err)
return nil
}
d.coordinator.RemoveImage(dockerImage.ID, allocID)
}

return nil
}

// createSandboxContainerConfig creates a docker container configuration which
Expand All @@ -124,3 +133,46 @@ func (d *Driver) createSandboxContainerConfig(allocID string, createSpec *driver
},
}, nil
}

// pullInfraImage conditionally pulls the `infra_image` from the Docker registry
// only if its name uses the "latest" tag or the image doesn't already exist locally.
func (d *Driver) pullInfraImage(allocID string) error {
repo, tag := parseDockerImage(d.config.InfraImage)

// There's a (narrow) time-of-check-time-of-use race here. If we call
// InspectImage and then a concurrent task shutdown happens before we call
// IncrementImageReference, we could end up removing the image, and it
// would no longer exist by the time we get to PullImage below.
d.coordinator.imageLock.Lock()

if tag != "latest" {
dockerImage, err := client.InspectImage(d.config.InfraImage)
if err != nil {
d.logger.Debug("InspectImage failed for infra_image container pull",
"image", d.config.InfraImage, "error", err)
} else if dockerImage != nil {
// Image exists, so no pull is attempted; just increment its reference
// count and unlock the image lock.
d.coordinator.incrementImageReferenceImpl(dockerImage.ID, d.config.InfraImage, allocID)
d.coordinator.imageLock.Unlock()
return nil
}
}

// At this point we have performed all the image work needed, so unlock. It
// is possible in environments with slow networks that the image pull may
// take a while, so while defer unlock would be best, this allows us to
// remove the lock sooner.
d.coordinator.imageLock.Unlock()

authOptions, err := firstValidAuth(repo, []authBackend{
authFromDockerConfig(d.config.Auth.Config),
authFromHelper(d.config.Auth.Helper),
})
if err != nil {
d.logger.Debug("auth failed for infra_image container pull", "image", d.config.InfraImage, "error", err)
}

_, err = d.coordinator.PullImage(d.config.InfraImage, authOptions, allocID, noopLogEventFn, d.config.infraImagePullTimeoutDuration, d.config.pullActivityTimeoutDuration)
return err
}
3 changes: 2 additions & 1 deletion website/content/docs/drivers/docker.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,8 @@ host system.

- `infra_image` - This is the Docker image to use when creating the parent
container necessary when sharing network namespaces between tasks. Defaults to
`gcr.io/google_containers/pause-<goarch>:3.1`.
`gcr.io/google_containers/pause-<goarch>:3.1`. The image will only be pulled from
the container registry if its tag is `latest` or the image doesn't yet exist locally.

- `infra_image_pull_timeout` - A time duration that controls how long Nomad will
wait before cancelling an in-progress pull of the Docker image as specified in
Expand Down