diff --git a/README.md b/README.md index 6b27a9a2d09..a40daeb4ae4 100644 --- a/README.md +++ b/README.md @@ -430,6 +430,7 @@ Resource flags: - :whale: `--blkio-weight`: Block IO (relative weight), between 10 and 1000, or 0 to disable (default 0) - :whale: `--cgroupns=(host|private)`: Cgroup namespace to use - Default: "private" on cgroup v2 hosts, "host" on cgroup v1 hosts +- :whale: `--cgroup-parent`: Optional parent cgroup for the container - :whale: `--device`: Add a host device to the container Intel RDT flags: @@ -575,7 +576,7 @@ Verify flags: - :nerd_face: `--cosign-key`: Path to the public key file, KMS, URI or Kubernetes Secret for `--verify=cosign` Unimplemented `docker run` flags: - `--attach`, `--blkio-weight-device`, `--cgroup-parent`, `--cpu-rt-*`, `--detach-keys`, `--device-*`, + `--attach`, `--blkio-weight-device`, `--cpu-rt-*`, `--detach-keys`, `--device-*`, `--disable-content-trust`, `--domainname`, `--expose`, `--health-*`, `--ip6`, `--isolation`, `--no-healthcheck`, `--link*`, `--mac-address`, `--publish-all`, `--sig-proxy`, `--storage-opt`, `--userns`, `--volume-driver`, `--volumes-from` diff --git a/cmd/nerdctl/run.go b/cmd/nerdctl/run.go index 822fd348be8..2e137611ffd 100644 --- a/cmd/nerdctl/run.go +++ b/cmd/nerdctl/run.go @@ -168,6 +168,7 @@ func setCreateFlags(cmd *cobra.Command) { cmd.Flags().StringSlice("cgroup-conf", nil, "Configure cgroup v2 (key=value)") cmd.Flags().Uint16("blkio-weight", 0, "Block IO (relative weight), between 10 and 1000, or 0 to disable (default 0)") cmd.Flags().String("cgroupns", defaults.CgroupnsMode(), `Cgroup namespace to use, the default depends on the cgroup version ("host"|"private")`) + cmd.Flags().String("cgroup-parent", "", "Optional parent cgroup for the container") cmd.RegisterFlagCompletionFunc("cgroupns", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { return []string{"host", "private"}, cobra.ShellCompDirectiveNoFileComp }) diff --git a/cmd/nerdctl/run_cgroup_linux.go b/cmd/nerdctl/run_cgroup_linux.go index 2c964f46c0a..9bd10ef264f 100644 --- a/cmd/nerdctl/run_cgroup_linux.go +++ b/cmd/nerdctl/run_cgroup_linux.go @@ -86,28 +86,35 @@ func generateCgroupOpts(cmd *cobra.Command, id string) ([]oci.SpecOpts, error) { if err != nil { return nil, err } + + parent, err := cmd.Flags().GetString("cgroup-parent") + if err != nil { + return nil, err + } + if cgroupManager == "none" { if !rootlessutil.IsRootless() { - return nil, errors.New("cgroup-manager \"none\" is only supported for rootless") + return nil, errors.New(`cgroup-manager "none" is only supported for rootless`) } if cpus > 0.0 || memStr != "" || memSwap != "" || pidsLimit > 0 { - logrus.Warn("cgroup manager is set to \"none\", discarding resource limit requests. " + + logrus.Warn(`cgroup manager is set to "none", discarding resource limit requests. ` + "(Hint: enable cgroup v2 with systemd: https://rootlesscontaine.rs/getting-started/common/cgroup2/)") } + if parent != "" { + logrus.Warnf(`cgroup manager is set to "none", ignoring cgroup parent %q`+ + "(Hint: enable cgroup v2 with systemd: https://rootlesscontaine.rs/getting-started/common/cgroup2/)", parent) + } return []oci.SpecOpts{oci.WithCgroup("")}, nil } var opts []oci.SpecOpts // nolint: prealloc - - if cgroupManager == "systemd" { - slice := "system.slice" - if rootlessutil.IsRootlessChild() { - slice = "user.slice" - } - // "slice:prefix:name" - cg := slice + ":nerdctl:" + id - opts = append(opts, oci.WithCgroup(cg)) + path, err := generateCgroupPath(cmd, cgroupManager, parent, id) + if err != nil { + return nil, err + } + if path != "" { + opts = append(opts, oci.WithCgroup(path)) } // cpus: from https://github.com/containerd/containerd/blob/v1.4.3/cmd/ctr/commands/run/run_unix.go#L187-L193 @@ -155,6 +162,7 @@ func generateCgroupOpts(cmd *cobra.Command, id string) ([]oci.SpecOpts, error) { if cpusetMems != "" { opts = append(opts, oci.WithCPUsMems(cpusetMems)) } + var mem64 int64 if memStr != "" { mem64, err = units.RAMInBytes(memStr) @@ -162,8 +170,8 @@ func generateCgroupOpts(cmd *cobra.Command, id string) ([]oci.SpecOpts, error) { return nil, fmt.Errorf("failed to parse memory bytes %q: %w", memStr, err) } opts = append(opts, oci.WithMemoryLimit(uint64(mem64))) - } + var memReserve64 int64 if memReserve != "" { memReserve64, err = units.RAMInBytes(memReserve) @@ -280,6 +288,44 @@ func generateCgroupOpts(cmd *cobra.Command, id string) ([]oci.SpecOpts, error) { return opts, nil } +func generateCgroupPath(cmd *cobra.Command, cgroupManager, parent, id string) (string, error) { + var ( + path string + usingSystemd = cgroupManager == "systemd" + slice = "system.slice" + scopePrefix = ":nerdctl:" + ) + if rootlessutil.IsRootlessChild() { + slice = "user.slice" + } + + if parent == "" { + if usingSystemd { + // "slice:prefix:name" + path = slice + scopePrefix + id + } + // Nothing to do for the non-systemd case if a parent wasn't supplied, + // containerd already sets a default cgroup path as // + return path, nil + } + + // If the user asked for a cgroup parent and we're using systemd, + // Docker uses the following: + // parent + prefix (in our case, nerdctl) + containerID. + // + // In the non systemd case, it's just /parent/containerID + if usingSystemd { + if len(parent) <= 6 || !strings.HasSuffix(parent, ".slice") { + return "", errors.New(`cgroup-parent for systemd cgroup should be a valid slice named as "xxx.slice"`) + } + path = parent + scopePrefix + id + } else { + path = filepath.Join(parent, id) + } + + return path, nil +} + func parseDevice(s string) (hostDevPath string, mode string, err error) { mode = "rwm" split := strings.Split(s, ":") diff --git a/cmd/nerdctl/run_cgroup_linux_test.go b/cmd/nerdctl/run_cgroup_linux_test.go index c05fea26c20..4fa6faa8374 100644 --- a/cmd/nerdctl/run_cgroup_linux_test.go +++ b/cmd/nerdctl/run_cgroup_linux_test.go @@ -20,6 +20,7 @@ import ( "bytes" "fmt" "os" + "path/filepath" "testing" "github.com/containerd/cgroups" @@ -261,7 +262,6 @@ func TestParseDevice(t *testing.T) { assert.ErrorContains(t, err, tc.err) } } - } func TestRunCgroupConf(t *testing.T) { @@ -283,6 +283,51 @@ func TestRunCgroupConf(t *testing.T) { "cat", "memory.high").AssertOutExactly("33554432\n") } +func TestRunCgroupParent(t *testing.T) { + t.Parallel() + base := testutil.NewBase(t) + info := base.Info() + containerName := testutil.Identifier(t) + defer base.Cmd("rm", "-f", containerName).Run() + + switch info.CgroupDriver { + case "none", "": + t.Skip("test requires cgroup driver") + } + + t.Logf("Using %q cgroup driver", info.CgroupDriver) + + parent := "/foobarbaz" + if info.CgroupDriver == "systemd" { + // Path separators aren't allowed in systemd path. runc + // explicitly checks for this. + // https://github.com/opencontainers/runc/blob/016a0d29d1750180b2a619fc70d6fe0d80111be0/libcontainer/cgroups/systemd/common.go#L65-L68 + parent = "foobarbaz.slice" + } + + // cgroup2 without host cgroup ns will just output 0::/ which doesn't help much to verify + // we got our expected path. This approach should work for both cgroup1 and 2, there will + // just be many more entries for cgroup1 as there'll be an entry per controller. + base.Cmd( + "run", + "-d", + "--name", + containerName, + "--cgroupns=host", + "--cgroup-parent", parent, + testutil.AlpineImage, + "sleep", + "infinity", + ).AssertOK() + + id := base.InspectContainer(containerName).ID + expected := filepath.Join(parent, id) + if info.CgroupDriver == "systemd" { + expected = filepath.Join(parent, fmt.Sprintf("nerdctl-%s", id)) + } + base.Cmd("exec", containerName, "cat", "/proc/self/cgroup").AssertOutContains(expected) +} + func TestRunBlkioWeightCgroupV2(t *testing.T) { t.Parallel() if cgroups.Mode() != cgroups.Unified {