diff --git a/drivers/docker/config.go b/drivers/docker/config.go index d381fb6eeca7..a8c6b0981474 100644 --- a/drivers/docker/config.go +++ b/drivers/docker/config.go @@ -251,7 +251,11 @@ var ( hclspec.NewAttr("nvidia_runtime", "string", false), hclspec.NewLiteral(`"nvidia"`), ), - + // list of docker runtimes allowed to be used + "allow_runtimes": hclspec.NewDefault( + hclspec.NewAttr("allow_runtimes", "list(string)", false), + hclspec.NewLiteral(`["runc", "nvidia"]`), + ), // image to use when creating a network namespace parent container "infra_image": hclspec.NewDefault( hclspec.NewAttr("infra_image", "string", false), @@ -341,6 +345,7 @@ var ( })), "network_aliases": hclspec.NewAttr("network_aliases", "list(string)", false), "network_mode": hclspec.NewAttr("network_mode", "string", false), + "runtime": hclspec.NewAttr("runtime", "string", false), "pids_limit": hclspec.NewAttr("pids_limit", "number", false), "pid_mode": hclspec.NewAttr("pid_mode", "string", false), "port_map": hclspec.NewAttr("port_map", "list(map(number))", false), @@ -404,6 +409,7 @@ type TaskConfig struct { Mounts []DockerMount `codec:"mounts"` NetworkAliases []string `codec:"network_aliases"` NetworkMode string `codec:"network_mode"` + Runtime string `codec:"runtime"` PidsLimit int64 `codec:"pids_limit"` PidMode string `codec:"pid_mode"` PortMap hclutils.MapStrInt `codec:"port_map"` @@ -572,6 +578,9 @@ type DriverConfig struct { DisableLogCollection bool `codec:"disable_log_collection"` PullActivityTimeout string `codec:"pull_activity_timeout"` pullActivityTimeoutDuration time.Duration `codec:"-"` + + AllowRuntimesList []string `codec:"allow_runtimes"` + allowRuntimes map[string]struct{} `codec:"-"` } type AuthConfig struct { @@ -657,6 +666,11 @@ func (d *Driver) SetConfig(c *base.Config) error { d.config.pullActivityTimeoutDuration = dur } + d.config.allowRuntimes = make(map[string]struct{}, len(d.config.AllowRuntimesList)) + for _, r := range d.config.AllowRuntimesList { + d.config.allowRuntimes[r] = struct{}{} + } + if c.AgentConfig != nil { d.clientConfig = c.AgentConfig.Driver } diff --git a/drivers/docker/config_test.go b/drivers/docker/config_test.go index 98189ba08468..e1438fa9a353 100644 --- a/drivers/docker/config_test.go +++ b/drivers/docker/config_test.go @@ -270,6 +270,7 @@ config { } privileged = true readonly_rootfs = true + runtime = "runc" security_opt = [ "credentialspec=file://gmsaUser.json" ], @@ -398,6 +399,7 @@ config { }, Privileged: true, ReadonlyRootfs: true, + Runtime: "runc", SecurityOpt: []string{ "credentialspec=file://gmsaUser.json", }, @@ -551,3 +553,34 @@ func TestConfig_DriverConfig_PullActivityTimeout(t *testing.T) { }) } } + +func TestConfig_DriverConfig_AllowRuntimes(t *testing.T) { + cases := []struct { + name string + config string + expected map[string]struct{} + }{ + { + name: "pure default", + config: `{}`, + expected: map[string]struct{}{"runc": struct{}{}, "nvidia": struct{}{}}, + }, + { + name: "custom", + config: `{ allow_runtimes = ["runc", "firecracker"]}`, + expected: map[string]struct{}{"runc": struct{}{}, "firecracker": struct{}{}}, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + var tc map[string]interface{} + hclutils.NewConfigParser(configSpec).ParseHCL(t, "config "+c.config, &tc) + + dh := dockerDriverHarness(t, tc) + d := dh.Impl().(*Driver) + require.Equal(t, c.expected, d.config.allowRuntimes) + }) + } + +} diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go index 5880d976d2ec..c6818eedb619 100644 --- a/drivers/docker/driver.go +++ b/drivers/docker/driver.go @@ -739,6 +739,20 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T config.WorkingDir = driverConfig.WorkDir } + containerRuntime := driverConfig.Runtime + if _, ok := task.DeviceEnv[nvidiaVisibleDevices]; ok { + if !d.gpuRuntime { + return c, fmt.Errorf("requested docker runtime %q was not found", d.config.GPURuntimeName) + } + if containerRuntime != "" && containerRuntime != d.config.GPURuntimeName { + return c, fmt.Errorf("conflicting runtime requests: gpu runtime %q conflicts with task runtime %q", d.config.GPURuntimeName, containerRuntime) + } + containerRuntime = d.config.GPURuntimeName + } + if _, ok := d.config.allowRuntimes[containerRuntime]; !ok && containerRuntime != "" { + return c, fmt.Errorf("requested runtime %q is not allowed", containerRuntime) + } + hostConfig := &docker.HostConfig{ Memory: task.Resources.LinuxResources.MemoryLimitBytes, CPUShares: task.Resources.LinuxResources.CPUShares, @@ -752,13 +766,8 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T VolumeDriver: driverConfig.VolumeDriver, PidsLimit: &driverConfig.PidsLimit, - } - if _, ok := task.DeviceEnv[nvidiaVisibleDevices]; ok { - if !d.gpuRuntime { - return c, fmt.Errorf("requested docker-runtime %q was not found", d.config.GPURuntimeName) - } - hostConfig.Runtime = d.config.GPURuntimeName + Runtime: containerRuntime, } // Calculate CPU Quota diff --git a/drivers/docker/driver_test.go b/drivers/docker/driver_test.go index ef0c413f918a..deb3d4b2aa94 100644 --- a/drivers/docker/driver_test.go +++ b/drivers/docker/driver_test.go @@ -1029,6 +1029,29 @@ func TestDockerDriver_SecurityOptFromFile(t *testing.T) { require.Contains(t, container.HostConfig.SecurityOpt[0], "reboot") } +func TestDockerDriver_Runtime(t *testing.T) { + if !tu.IsCI() { + t.Parallel() + } + testutil.DockerCompatible(t) + + task, cfg, ports := dockerTask(t) + defer freeport.Return(ports) + cfg.Runtime = "runc" + require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) + + client, d, handle, cleanup := dockerSetup(t, task) + defer cleanup() + require.NoError(t, d.WaitUntilStarted(task.ID, 5*time.Second)) + + container, err := client.InspectContainer(handle.containerID) + if err != nil { + t.Fatalf("err: %v", err) + } + + require.Exactly(t, cfg.Runtime, container.HostConfig.Runtime) +} + func TestDockerDriver_CreateContainerConfig(t *testing.T) { t.Parallel() @@ -1053,6 +1076,70 @@ func TestDockerDriver_CreateContainerConfig(t *testing.T) { require.Equal(t, containerName, c.Name) } +func TestDockerDriver_CreateContainerConfig_RuntimeConflict(t *testing.T) { + t.Parallel() + + task, cfg, ports := dockerTask(t) + defer freeport.Return(ports) + task.DeviceEnv[nvidia.NvidiaVisibleDevices] = "GPU_UUID_1" + + require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) + + dh := dockerDriverHarness(t, nil) + driver := dh.Impl().(*Driver) + driver.gpuRuntime = true + + // Should error if a runtime was explicitly set that doesn't match gpu runtime + cfg.Runtime = "nvidia" + c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1") + require.NoError(t, err) + require.Equal(t, "nvidia", c.HostConfig.Runtime) + + cfg.Runtime = "custom" + _, err = driver.createContainerConfig(task, cfg, "org/repo:0.1") + require.Error(t, err) + require.Contains(t, err.Error(), "conflicting runtime requests") +} + +func TestDockerDriver_CreateContainerConfig_ChecksAllowRuntimes(t *testing.T) { + t.Parallel() + + dh := dockerDriverHarness(t, nil) + driver := dh.Impl().(*Driver) + driver.gpuRuntime = true + driver.config.allowRuntimes = map[string]struct{}{ + "runc": struct{}{}, + "custom": struct{}{}, + } + + allowRuntime := []string{ + "", // default always works + "runc", + "custom", + } + + task, cfg, ports := dockerTask(t) + defer freeport.Return(ports) + require.NoError(t, task.EncodeConcreteDriverConfig(cfg)) + + for _, runtime := range allowRuntime { + t.Run(runtime, func(t *testing.T) { + cfg.Runtime = runtime + c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1") + require.NoError(t, err) + require.Equal(t, runtime, c.HostConfig.Runtime) + }) + } + + t.Run("not allowed: denied", func(t *testing.T) { + cfg.Runtime = "denied" + _, err := driver.createContainerConfig(task, cfg, "org/repo:0.1") + require.Error(t, err) + require.Contains(t, err.Error(), `runtime "denied" is not allowed`) + }) + +} + func TestDockerDriver_CreateContainerConfig_User(t *testing.T) { t.Parallel() @@ -1188,12 +1275,6 @@ func TestDockerDriver_CreateContainerConfigWithRuntimes(t *testing.T) { if !tu.IsCI() { t.Parallel() } - if !testutil.DockerIsConnected(t) { - t.Skip("Docker not connected") - } - if runtime.GOOS != "linux" { - t.Skip("nvidia plugin supports only linux") - } testCases := []struct { description string gpuRuntimeSet bool @@ -1235,7 +1316,9 @@ func TestDockerDriver_CreateContainerConfigWithRuntimes(t *testing.T) { task, cfg, ports := dockerTask(t) defer freeport.Return(ports) - dh := dockerDriverHarness(t, nil) + dh := dockerDriverHarness(t, map[string]interface{}{ + "allow_runtimes": []string{"runc", "nvidia", "nvidia-runtime-modified-name"}, + }) driver := dh.Impl().(*Driver) driver.gpuRuntime = testCase.gpuRuntimeSet diff --git a/website/pages/docs/drivers/docker.mdx b/website/pages/docs/drivers/docker.mdx index c863b96d0c4b..18d091f8e784 100644 --- a/website/pages/docs/drivers/docker.mdx +++ b/website/pages/docs/drivers/docker.mdx @@ -406,6 +406,17 @@ The `docker` driver supports the following configuration in the job spec. Only - `readonly_rootfs` - (Optional) `true` or `false` (default). Mount the container's filesystem as read only. +- `runtime` - (Optional) A string representing a configured runtime to pass to docker. + This is equivalent to the `--runtime` argument in the docker CLI + For example, to use gVisor: + + ```hcl + config { + # gVisor runtime is runsc + runtime = "runsc" + } + ``` + - `pids_limit` - (Optional) An integer value that specifies the pid limit for the container. Defaults to unlimited. @@ -699,6 +710,9 @@ plugin "docker" { and cap_drop options. Supports the value "ALL" as a shortcut for whitelisting all capabilities. +- `allow_runtimes` - defaults to `["runc", "nvidia"]` - A list of the allowed + docker runtimes a task may use. + - `auth` stanza: - `config` - Allows an operator to specify a