Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

driver/docker: allow configurable pull context timeout setting. #8589

Merged
merged 1 commit into from
Aug 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 32 additions & 13 deletions drivers/docker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,18 @@ var (
hclspec.NewAttr("infra_image", "string", false),
hclspec.NewLiteral(`"gcr.io/google_containers/pause-amd64:3.0"`),
),
// timeout to use when pulling the infra image.
"infra_image_pull_timeout": hclspec.NewDefault(
hclspec.NewAttr("infra_image_pull_timeout", "string", false),
hclspec.NewLiteral(`"5m"`),
),

// the duration that the driver will wait for activity from the Docker engine during an image pull
// before canceling the request
"pull_activity_timeout": hclspec.NewDefault(
hclspec.NewAttr("pull_activity_timeout", "string", false),
hclspec.NewLiteral(`"2m"`),
),

// disable_log_collection indicates whether docker driver should collect logs of docker
// task containers. If true, nomad doesn't start docker_logger/logmon processes
"disable_log_collection": hclspec.NewAttr("disable_log_collection", "bool", false),
Expand Down Expand Up @@ -349,6 +353,10 @@ var (
"pid_mode": hclspec.NewAttr("pid_mode", "string", false),
"port_map": hclspec.NewAttr("port_map", "list(map(number))", false),
"privileged": hclspec.NewAttr("privileged", "bool", false),
"image_pull_timeout": hclspec.NewDefault(
hclspec.NewAttr("image_pull_timeout", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"readonly_rootfs": hclspec.NewAttr("readonly_rootfs", "bool", false),
"security_opt": hclspec.NewAttr("security_opt", "list(string)", false),
"shm_size": hclspec.NewAttr("shm_size", "number", false),
Expand Down Expand Up @@ -415,6 +423,7 @@ type TaskConfig struct {
PidMode string `codec:"pid_mode"`
PortMap hclutils.MapStrInt `codec:"port_map"`
Privileged bool `codec:"privileged"`
ImagePullTimeout string `codec:"image_pull_timeout"`
ReadonlyRootfs bool `codec:"readonly_rootfs"`
SecurityOpt []string `codec:"security_opt"`
ShmSize int64 `codec:"shm_size"`
Expand Down Expand Up @@ -567,18 +576,20 @@ type ContainerGCConfig struct {
}

type DriverConfig struct {
Endpoint string `codec:"endpoint"`
Auth AuthConfig `codec:"auth"`
TLS TLSConfig `codec:"tls"`
GC GCConfig `codec:"gc"`
Volumes VolumeConfig `codec:"volumes"`
AllowPrivileged bool `codec:"allow_privileged"`
AllowCaps []string `codec:"allow_caps"`
GPURuntimeName string `codec:"nvidia_runtime"`
InfraImage string `codec:"infra_image"`
DisableLogCollection bool `codec:"disable_log_collection"`
PullActivityTimeout string `codec:"pull_activity_timeout"`
pullActivityTimeoutDuration time.Duration `codec:"-"`
Endpoint string `codec:"endpoint"`
Auth AuthConfig `codec:"auth"`
TLS TLSConfig `codec:"tls"`
GC GCConfig `codec:"gc"`
Volumes VolumeConfig `codec:"volumes"`
AllowPrivileged bool `codec:"allow_privileged"`
AllowCaps []string `codec:"allow_caps"`
GPURuntimeName string `codec:"nvidia_runtime"`
InfraImage string `codec:"infra_image"`
InfraImagePullTimeout string `codec:"infra_image_pull_timeout"`
infraImagePullTimeoutDuration time.Duration `codec:"-"`
DisableLogCollection bool `codec:"disable_log_collection"`
PullActivityTimeout string `codec:"pull_activity_timeout"`
pullActivityTimeoutDuration time.Duration `codec:"-"`

AllowRuntimesList []string `codec:"allow_runtimes"`
allowRuntimes map[string]struct{} `codec:"-"`
Expand Down Expand Up @@ -667,6 +678,14 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.config.pullActivityTimeoutDuration = dur
}

if d.config.InfraImagePullTimeout != "" {
dur, err := time.ParseDuration(d.config.InfraImagePullTimeout)
if err != nil {
return fmt.Errorf("failed to parse 'infra_image_pull_timeout' duaration: %v", err)
}
d.config.infraImagePullTimeoutDuration = dur
}

d.config.allowRuntimes = make(map[string]struct{}, len(d.config.AllowRuntimesList))
for _, r := range d.config.AllowRuntimesList {
d.config.allowRuntimes[r] = struct{}{}
Expand Down
74 changes: 54 additions & 20 deletions drivers/docker/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ func TestConfig_ParseHCL(t *testing.T) {
image = "redis:3.2"
}`,
&TaskConfig{
Image: "redis:3.2",
Devices: []DockerDevice{},
Mounts: []DockerMount{},
CPUCFSPeriod: 100000,
Image: "redis:3.2",
Devices: []DockerDevice{},
Mounts: []DockerMount{},
CPUCFSPeriod: 100000,
ImagePullTimeout: "5m",
},
},
}
Expand Down Expand Up @@ -53,40 +54,44 @@ func TestConfig_ParseJSON(t *testing.T) {
name: "nil values for blocks are safe",
input: `{"Config": {"image": "bash:3", "mounts": null}}`,
expected: TaskConfig{
Image: "bash:3",
Mounts: []DockerMount{},
Devices: []DockerDevice{},
CPUCFSPeriod: 100000,
Image: "bash:3",
Mounts: []DockerMount{},
Devices: []DockerDevice{},
CPUCFSPeriod: 100000,
ImagePullTimeout: "5m",
},
},
{
name: "nil values for 'volumes' field are safe",
input: `{"Config": {"image": "bash:3", "volumes": null}}`,
expected: TaskConfig{
Image: "bash:3",
Mounts: []DockerMount{},
Devices: []DockerDevice{},
CPUCFSPeriod: 100000,
Image: "bash:3",
Mounts: []DockerMount{},
Devices: []DockerDevice{},
CPUCFSPeriod: 100000,
ImagePullTimeout: "5m",
},
},
{
name: "nil values for 'args' field are safe",
input: `{"Config": {"image": "bash:3", "args": null}}`,
expected: TaskConfig{
Image: "bash:3",
Mounts: []DockerMount{},
Devices: []DockerDevice{},
CPUCFSPeriod: 100000,
Image: "bash:3",
Mounts: []DockerMount{},
Devices: []DockerDevice{},
CPUCFSPeriod: 100000,
ImagePullTimeout: "5m",
},
},
{
name: "nil values for string fields are safe",
input: `{"Config": {"image": "bash:3", "command": null}}`,
expected: TaskConfig{
Image: "bash:3",
Mounts: []DockerMount{},
Devices: []DockerDevice{},
CPUCFSPeriod: 100000,
Image: "bash:3",
Mounts: []DockerMount{},
Devices: []DockerDevice{},
CPUCFSPeriod: 100000,
ImagePullTimeout: "5m",
},
},
}
Expand Down Expand Up @@ -178,6 +183,7 @@ func TestConfig_ParseAllHCL(t *testing.T) {
cfgStr := `
config {
image = "redis:3.2"
image_pull_timeout = "15m"
advertise_ipv6_address = true
args = ["command_arg1", "command_arg2"]
auth {
Expand Down Expand Up @@ -301,6 +307,7 @@ config {

expected := &TaskConfig{
Image: "redis:3.2",
ImagePullTimeout: "15m",
AdvertiseIPv6Addr: true,
Args: []string{"command_arg1", "command_arg2"},
Auth: DockerAuth{
Expand Down Expand Up @@ -528,6 +535,33 @@ func TestConfig_InternalCapabilities(t *testing.T) {
}
}

func TestConfig_DriverConfig_InfraImagePullTimeout(t *testing.T) {
cases := []struct {
name string
config string
expected string
}{
{
name: "default",
config: `{}`,
expected: "5m",
},
{
name: "set explicitly",
config: `{ infra_image_pull_timeout = "1m" }`,
expected: "1m",
},
}

for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
var tc DriverConfig
hclutils.NewConfigParser(configSpec).ParseHCL(t, "config "+c.config, &tc)
require.Equal(t, c.expected, tc.InfraImagePullTimeout)
})
}
}

func TestConfig_DriverConfig_PullActivityTimeout(t *testing.T) {
cases := []struct {
name string
Expand Down
11 changes: 7 additions & 4 deletions drivers/docker/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ func newDockerCoordinator(config *dockerCoordinatorConfig) *dockerCoordinator {

// PullImage is used to pull an image. It returns the pulled imaged ID or an
// error that occurred during the pull
func (d *dockerCoordinator) PullImage(image string, authOptions *docker.AuthConfiguration, callerID string, emitFn LogEventFn, pullActivityTimeout time.Duration) (imageID string, err error) {
func (d *dockerCoordinator) PullImage(image string, authOptions *docker.AuthConfiguration, callerID string,
emitFn LogEventFn, pullTimeout, pullActivityTimeout time.Duration) (imageID string, err error) {
// Get the future
d.imageLock.Lock()
future, ok := d.pullFutures[image]
Expand All @@ -140,7 +141,7 @@ func (d *dockerCoordinator) PullImage(image string, authOptions *docker.AuthConf
// Make the future
future = newPullFuture()
d.pullFutures[image] = future
go d.pullImageImpl(image, authOptions, pullActivityTimeout, future)
go d.pullImageImpl(image, authOptions, pullTimeout, pullActivityTimeout, future)
}
d.imageLock.Unlock()

Expand All @@ -167,11 +168,13 @@ func (d *dockerCoordinator) PullImage(image string, authOptions *docker.AuthConf

// pullImageImpl is the implementation of pulling an image. The results are
// returned via the passed future
func (d *dockerCoordinator) pullImageImpl(image string, authOptions *docker.AuthConfiguration, pullActivityTimeout time.Duration, future *pullFuture) {
func (d *dockerCoordinator) pullImageImpl(image string, authOptions *docker.AuthConfiguration,
pullTimeout, pullActivityTimeout time.Duration, future *pullFuture) {

defer d.clearPullLogger(image)
// Parse the repo and tag
repo, tag := parseDockerImage(image)
ctx, cancel := context.WithCancel(context.Background())
ctx, cancel := context.WithTimeout(context.Background(), pullTimeout)
defer cancel()

pm := newImageProgressManager(image, cancel, pullActivityTimeout, d.handlePullInactivity,
Expand Down
16 changes: 8 additions & 8 deletions drivers/docker/coordinator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ func TestDockerCoordinator_ConcurrentPulls(t *testing.T) {
// Create a coordinator
coordinator := newDockerCoordinator(config)

id, _ := coordinator.PullImage(image, nil, uuid.Generate(), nil, 2*time.Minute)
id, _ := coordinator.PullImage(image, nil, uuid.Generate(), nil, 5*time.Minute, 2*time.Minute)
for i := 0; i < 9; i++ {
go func() {
coordinator.PullImage(image, nil, uuid.Generate(), nil, 2*time.Minute)
coordinator.PullImage(image, nil, uuid.Generate(), nil, 5*time.Minute, 2*time.Minute)
}()
}

Expand Down Expand Up @@ -129,7 +129,7 @@ func TestDockerCoordinator_Pull_Remove(t *testing.T) {
callerIDs := make([]string, 10, 10)
for i := 0; i < 10; i++ {
callerIDs[i] = uuid.Generate()
id, _ = coordinator.PullImage(image, nil, callerIDs[i], nil, 2*time.Minute)
id, _ = coordinator.PullImage(image, nil, callerIDs[i], nil, 5*time.Minute, 2*time.Minute)
}

// Check the reference count
Expand Down Expand Up @@ -195,7 +195,7 @@ func TestDockerCoordinator_Remove_Cancel(t *testing.T) {
callerID := uuid.Generate()

// Pull image
id, _ := coordinator.PullImage(image, nil, callerID, nil, 2*time.Minute)
id, _ := coordinator.PullImage(image, nil, callerID, nil, 5*time.Minute, 2*time.Minute)

// Check the reference count
if references := coordinator.imageRefCount[id]; len(references) != 1 {
Expand All @@ -211,7 +211,7 @@ func TestDockerCoordinator_Remove_Cancel(t *testing.T) {
}

// Pull image again within delay
id, _ = coordinator.PullImage(image, nil, callerID, nil, 2*time.Minute)
id, _ = coordinator.PullImage(image, nil, callerID, nil, 5*time.Minute, 2*time.Minute)

// Check the reference count
if references := coordinator.imageRefCount[id]; len(references) != 1 {
Expand Down Expand Up @@ -244,7 +244,7 @@ func TestDockerCoordinator_No_Cleanup(t *testing.T) {
callerID := uuid.Generate()

// Pull image
id, _ := coordinator.PullImage(image, nil, callerID, nil, 2*time.Minute)
id, _ := coordinator.PullImage(image, nil, callerID, nil, 5*time.Minute, 2*time.Minute)

// Check the reference count
if references := coordinator.imageRefCount[id]; len(references) != 0 {
Expand Down Expand Up @@ -283,10 +283,10 @@ func TestDockerCoordinator_Cleanup_HonorsCtx(t *testing.T) {
callerID := uuid.Generate()

// Pull image
id1, _ := coordinator.PullImage(image1ID, nil, callerID, nil, 2*time.Minute)
id1, _ := coordinator.PullImage(image1ID, nil, callerID, nil, 5*time.Minute, 2*time.Minute)
require.Len(t, coordinator.imageRefCount[id1], 1, "image reference count")

id2, _ := coordinator.PullImage(image2ID, nil, callerID, nil, 2*time.Minute)
id2, _ := coordinator.PullImage(image2ID, nil, callerID, nil, 5*time.Minute, 2*time.Minute)
require.Len(t, coordinator.imageRefCount[id2], 1, "image reference count")

// remove one image, cancel ctx, remove second, and assert only first image is cleanedup
Expand Down
7 changes: 6 additions & 1 deletion drivers/docker/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,12 @@ func (d *Driver) pullImage(task *drivers.TaskConfig, driverConfig *TaskConfig, c
},
})

return d.coordinator.PullImage(driverConfig.Image, authOptions, task.ID, d.emitEventFunc(task), d.config.pullActivityTimeoutDuration)
pullDur, err := time.ParseDuration(driverConfig.ImagePullTimeout)
if err != nil {
return "", fmt.Errorf("Failed to parse image_pull_timeout: %v", err)
}

return d.coordinator.PullImage(driverConfig.Image, authOptions, task.ID, d.emitEventFunc(task), pullDur, d.config.pullActivityTimeoutDuration)
}

func (d *Driver) emitEventFunc(task *drivers.TaskConfig) LogEventFn {
Expand Down
5 changes: 3 additions & 2 deletions drivers/docker/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,9 @@ func TestDockerDriver_Start_BadPull_Recoverable(t *testing.T) {
testutil.DockerCompatible(t)

taskCfg := TaskConfig{
Image: "127.0.0.1:32121/foo", // bad path
Command: "echo",
Image: "127.0.0.1:32121/foo", // bad path
ImagePullTimeout: "5m",
Command: "echo",
Args: []string{
"hello",
},
Expand Down
12 changes: 7 additions & 5 deletions drivers/docker/driver_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,8 @@ func TestDockerDriver_Start_Image_HTTPS(t *testing.T) {
testutil.DockerCompatible(t)

taskCfg := TaskConfig{
Image: "https://gcr.io/google_containers/pause:0.8.0",
Image: "https://gcr.io/google_containers/pause:0.8.0",
ImagePullTimeout: "5m",
}
task := &drivers.TaskConfig{
ID: uuid.Generate(),
Expand Down Expand Up @@ -746,10 +747,11 @@ func newTaskConfig(variant string, command []string) TaskConfig {
}

return TaskConfig{
Image: image,
LoadImage: loadImage,
Command: command[0],
Args: command[1:],
Image: image,
ImagePullTimeout: "5m",
LoadImage: loadImage,
Command: command[0],
Args: command[1:],
}
}

Expand Down
7 changes: 4 additions & 3 deletions drivers/docker/driver_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ func newTaskConfig(variant string, command []string) TaskConfig {
busyboxImageID := "hashicorpnomad/busybox-windows:server2016-0.1"

return TaskConfig{
Image: busyboxImageID,
Command: command[0],
Args: command[1:],
Image: busyboxImageID,
ImagePullTimeout: "5m",
Command: command[0],
Args: command[1:],
}
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/docker/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func (d *Driver) CreateNetwork(allocID string) (*drivers.NetworkIsolationSpec, b
if err != nil {
d.logger.Debug("auth failed for infra container image pull", "image", d.config.InfraImage, "error", err)
}
_, err = d.coordinator.PullImage(d.config.InfraImage, authOptions, allocID, noopLogEventFn, d.config.pullActivityTimeoutDuration)
_, err = d.coordinator.PullImage(d.config.InfraImage, authOptions, allocID, noopLogEventFn, d.config.infraImagePullTimeoutDuration, d.config.pullActivityTimeoutDuration)
if err != nil {
return nil, false, err
}
Expand Down
Loading