diff --git a/contrib/completions/bash/runc b/contrib/completions/bash/runc index 782234e86d4..353c8ffdbbd 100644 --- a/contrib/completions/bash/runc +++ b/contrib/completions/bash/runc @@ -461,7 +461,6 @@ _runc_run() { --no-subreaper --no-pivot --no-new-keyring - --no-mount-fallback " local options_with_args=" @@ -568,7 +567,6 @@ _runc_create() { --help --no-pivot --no-new-keyring - --no-mount-fallback " local options_with_args=" @@ -629,7 +627,6 @@ _runc_restore() { --no-pivot --auto-dedup --lazy-pages - --no-mount-fallback " local options_with_args=" diff --git a/create.go b/create.go index 3788a532fce..97854b846cb 100644 --- a/create.go +++ b/create.go @@ -51,10 +51,6 @@ command(s) that get executed on start, edit the args parameter of the spec. See Name: "preserve-fds", Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)", }, - cli.BoolFlag{ - Name: "no-mount-fallback", - Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)", - }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 1, exactArgs); err != nil { diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 1ece49c3732..68d2f8f2678 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -214,10 +214,6 @@ type Config struct { // When RootlessCgroups is set, cgroups errors are ignored. RootlessCgroups bool `json:"rootless_cgroups,omitempty"` - // Do not try to remount a bind mount again after the first attempt failed on source - // filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set - NoMountFallback bool `json:"no_mount_fallback,omitempty"` - // TimeOffsets specifies the offset for supporting time namespaces. TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"` diff --git a/libcontainer/configs/mount_linux.go b/libcontainer/configs/mount_linux.go index 6d4106de0c6..3f489295d97 100644 --- a/libcontainer/configs/mount_linux.go +++ b/libcontainer/configs/mount_linux.go @@ -15,6 +15,10 @@ type Mount struct { // Mount flags. Flags int `json:"flags"` + // Mount flags that were explicitly cleared in the configuration (meaning + // the user explicitly requested that these flags *not* be set). + ClearedFlags int `json:"cleared_flags"` + // Propagation Flags PropagationFlags []int `json:"propagation_flags"` diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index eafd6c82d05..a2e41ea5638 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -34,7 +34,6 @@ type mountConfig struct { cgroup2Path string rootlessCgroups bool cgroupns bool - noMountFallback bool } // mountEntry contains mount data specific to a mount point. @@ -83,7 +82,6 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig, mountFds mountFds) (er cgroup2Path: iConfig.Cgroup2Path, rootlessCgroups: iConfig.RootlessCgroups, cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), - noMountFallback: config.NoMountFallback, } for i, m := range config.Mounts { entry := mountEntry{Mount: m} @@ -409,6 +407,51 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) { }) } +const ( + // The atime "enum" flags (which are mutually exclusive). + mntAtimeEnumFlags = unix.MS_NOATIME | unix.MS_RELATIME | unix.MS_STRICTATIME + // All atime-related flags. + mntAtimeFlags = mntAtimeEnumFlags | unix.MS_NODIRATIME + // Flags which can be locked when inheriting mounts in a different userns. + // In the kernel, these are the mounts that are locked using MNT_LOCK_*. + mntLockFlags = unix.MS_RDONLY | unix.MS_NODEV | unix.MS_NOEXEC | + unix.MS_NOSUID | mntAtimeFlags +) + +func statfsToMountFlags(st unix.Statfs_t) int { + // From . + const ST_NOSYMFOLLOW = 0x2000 //nolint:revive + + var flags int + for _, f := range []struct { + st, ms int + }{ + // See calculate_f_flags() in fs/statfs.c. + {unix.ST_RDONLY, unix.MS_RDONLY}, + {unix.ST_NOSUID, unix.MS_NOSUID}, + {unix.ST_NODEV, unix.MS_NODEV}, + {unix.ST_NOEXEC, unix.MS_NOEXEC}, + {unix.ST_MANDLOCK, unix.MS_MANDLOCK}, + {unix.ST_SYNCHRONOUS, unix.MS_SYNCHRONOUS}, + {unix.ST_NOATIME, unix.MS_NOATIME}, + {unix.ST_NODIRATIME, unix.MS_NODIRATIME}, + {unix.ST_RELATIME, unix.MS_RELATIME}, + {ST_NOSYMFOLLOW, unix.MS_NOSYMFOLLOW}, + // There is no ST_STRICTATIME -- see below. + } { + if int(st.Flags)&f.st == f.st { + flags |= f.ms + } + } + // MS_STRICTATIME is a "fake" MS_* flag. It isn't stored in mnt->mnt_flags, + // and so it doesn't show up in statfs(2). If none of the other flags in + // atime enum are present, the mount is MS_STRICTATIME. + if flags&mntAtimeEnumFlags == 0 { + flags |= unix.MS_STRICTATIME + } + return flags +} + func mountToRootfs(c *mountConfig, m mountEntry) error { rootfs := c.root @@ -509,11 +552,97 @@ func mountToRootfs(c *mountConfig, m mountEntry) error { return err } } - // bind mount won't change mount options, we need remount to make mount options effective. - // first check that we have non-default options required before attempting a remount - if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 { - // only remount if unique mount options are set - if err := remount(m, rootfs, c.noMountFallback); err != nil { + + // The initial MS_BIND won't change the mount options, we need to do a + // separate MS_BIND|MS_REMOUNT to apply the mount options. We skip + // doing this if the user has not specified any mount flags at all + // (including cleared flags) -- in which case we just keep the original + // mount flags. + // + // Note that the fact we check whether any clearing flags are set is in + // contrast to mount(8)'s current behaviour, but is what users probably + // expect. See . + if m.Flags & ^(unix.MS_BIND|unix.MS_REC|unix.MS_REMOUNT) != 0 || m.ClearedFlags != 0 { + if err := utils.WithProcfd(rootfs, m.Destination, func(dstFD string) error { + flags := m.Flags | unix.MS_BIND | unix.MS_REMOUNT + // The runtime-spec says we SHOULD map to the relevant mount(8) + // behaviour. However, it's not clear whether we want the + // "mount --bind -o ..." or "mount --bind -o remount,..." + // behaviour here -- both of which are somewhat broken[1]. + // + // So, if the user has passed "remount" as a mount option, we + // implement the "mount --bind -o remount" behaviour, otherwise + // we implement the spiritual intent of the "mount --bind -o" + // behaviour, which should match what users expect. Maybe + // mount(8) will eventually implement this behaviour too.. + // + // [1]: https://github.com/util-linux/util-linux/issues/2433 + + // Initially, we emulate "mount --bind -o ..." where we set + // only the requested flags (clearing any existing flags). The + // only difference from mount(8) is that we do this + // unconditionally, regardless of whether any set-me mount + // options have been requested. + // + // TODO: We are not doing any special handling of the atime + // flags here, which means that the mount will inherit the old + // atime flags if the user didn't explicitly request a + // different set of flags. This also has the mount(8) bug where + // "nodiratime,norelatime" will result in a + // "nodiratime,relatime" mount. + mountErr := mountViaFDs("", nil, m.Destination, dstFD, "", uintptr(flags), "") + if mountErr == nil { + return nil + } + + // If the mount failed, the mount may contain locked mount + // flags. In that case, we emulate "mount --bind -o + // remount,...", where we take the existing mount flags of the + // mount and apply the request flags (including clearing flags) + // on top. The main divergence we have from mount(8) here is + // that we handle atimes correctly to make sure we error out if + // we cannot fulfil the requested mount flags. + + var st unix.Statfs_t + if err := unix.Statfs(m.src(), &st); err != nil { + return &os.PathError{Op: "statfs", Path: m.src(), Err: err} + } + srcFlags := statfsToMountFlags(st) + // If the user explicitly request one of the locked flags *not* + // be set, we need to return an error to avoid producing mounts + // that don't match the user's request. + if srcFlags&m.ClearedFlags&mntLockFlags != 0 { + return mountErr + } + + // If an MS_*ATIME flag was requested, it must match the + // existing one. This handles two separate kernel bugs, and + // matches the logic of can_change_locked_flags() but without + // these bugs: + // + // * (2.6.30+) Since commit 613cbe3d4870 ("Don't set relatime + // when noatime is specified"), MS_RELATIME is ignored when + // MS_NOATIME is set. This means that us inheriting MS_NOATIME + // from a mount while requesting MS_RELATIME would *silently* + // produce an MS_NOATIME mount. + // + // * (2.6.30+) Since its introduction in commit d0adde574b84 + // ("Add a strictatime mount option"), MS_STRICTATIME has + // caused any passed MS_RELATIME and MS_NOATIME flags to be + // ignored which results in us *silently* producing + // MS_STRICTATIME mounts even if the user requested MS_RELATIME + // or MS_NOATIME. + if m.Flags&mntAtimeFlags != 0 && m.Flags&mntAtimeFlags != srcFlags&mntAtimeFlags { + return mountErr + } + + // Retry the mount with the existing lockable mount flags + // applied. + flags |= srcFlags & mntLockFlags + mountErr = mountViaFDs("", nil, m.Destination, dstFD, "", uintptr(flags), "") + logrus.Debugf("remount retry: srcFlags=0x%x flagsSet=0x%x flagsClr=0x%x: %v", srcFlags, m.Flags, m.ClearedFlags, mountErr) + return mountErr + }); err != nil { return err } } @@ -1103,37 +1232,6 @@ func writeSystemProperty(key, value string) error { return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644) } -func remount(m mountEntry, rootfs string, noMountFallback bool) error { - return utils.WithProcfd(rootfs, m.Destination, func(dstFD string) error { - flags := uintptr(m.Flags | unix.MS_REMOUNT) - err := mountViaFDs("", nil, m.Destination, dstFD, m.Device, flags, "") - if err == nil { - return nil - } - // Check if the source has flags set according to noMountFallback - src := m.src() - var s unix.Statfs_t - if err := unix.Statfs(src, &s); err != nil { - return &os.PathError{Op: "statfs", Path: src, Err: err} - } - var checkflags int - if noMountFallback { - // Check for ro only - checkflags = unix.MS_RDONLY - } else { - // Check for ro, nodev, noexec, nosuid, noatime, relatime, strictatime, - // nodiratime - checkflags = unix.MS_RDONLY | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NOATIME | unix.MS_RELATIME | unix.MS_STRICTATIME | unix.MS_NODIRATIME - } - if int(s.Flags)&checkflags == 0 { - return err - } - // ... and retry the mount with flags found above. - flags |= uintptr(int(s.Flags) & checkflags) - return mountViaFDs("", nil, m.Destination, dstFD, m.Device, flags, "") - }) -} - // Do the mount operation followed by additional mounts required to take care // of propagation flags. This will always be scoped inside the container rootfs. func mountPropagate(m mountEntry, rootfs string, mountLabel string) error { diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index c5553832776..45d8313f627 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -313,7 +313,6 @@ type CreateOpts struct { Spec *specs.Spec RootlessEUID bool RootlessCgroups bool - NoMountFallback bool } // getwd is a wrapper similar to os.Getwd, except it always gets @@ -359,7 +358,6 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { NoNewKeyring: opts.NoNewKeyring, RootlessEUID: opts.RootlessEUID, RootlessCgroups: opts.RootlessCgroups, - NoMountFallback: opts.NoMountFallback, } for _, m := range spec.Mounts { @@ -977,10 +975,15 @@ func parseMountOptions(options []string) *configs.Mount { // or the flag is not supported on the platform, // then it is a data value for a specific fs type. if f, exists := mountFlags[o]; exists && f.flag != 0 { + // FIXME: The *atime flags are special (they are more of an enum + // with quite hairy semantics) and thus arguably setting some of + // them should clear unrelated flags. if f.clear { m.Flags &= ^f.flag + m.ClearedFlags |= f.flag } else { m.Flags |= f.flag + m.ClearedFlags &= ^f.flag } } else if f, exists := mountPropagationMapping[o]; exists && f != 0 { m.PropagationFlags = append(m.PropagationFlags, f) diff --git a/restore.go b/restore.go index de5b48d54c2..d65afcfc788 100644 --- a/restore.go +++ b/restore.go @@ -98,10 +98,6 @@ using the runc checkpoint command.`, Value: "", Usage: "Specify an LSM mount context to be used during restore.", }, - cli.BoolFlag{ - Name: "no-mount-fallback", - Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)", - }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 1, exactArgs); err != nil { diff --git a/run.go b/run.go index 8b4f4d1fb23..82781669d10 100644 --- a/run.go +++ b/run.go @@ -64,10 +64,6 @@ command(s) that get executed on start, edit the args parameter of the spec. See Name: "preserve-fds", Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)", }, - cli.BoolFlag{ - Name: "no-mount-fallback", - Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)", - }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 1, exactArgs); err != nil { diff --git a/tests/integration/mounts_sshfs.bats b/tests/integration/mounts_sshfs.bats index 540403e4051..0bef01784f3 100644 --- a/tests/integration/mounts_sshfs.bats +++ b/tests/integration/mounts_sshfs.bats @@ -4,107 +4,438 @@ load helpers function setup() { setup_busybox - update_config '.process.args = ["/bin/echo", "Hello World"]' } function teardown() { - # Some distros do not have fusermount installed - # as a dependency of fuse-sshfs, and good ol' umount works. - fusermount -u "$DIR" || umount "$DIR" + if [ -v DIR ]; then + # Some distros do not have fusermount installed + # as a dependency of fuse-sshfs, and good ol' umount works. + fusermount -u "$DIR" || umount "$DIR" + unset DIR + fi teardown_bundle } +function sshfs_has_flag() { + if [ -v DIR ]; then + awk '$2 == "'"$DIR"'" { print $4 }' &2 + awk '$2 == "'"$DIR"'"' &2 } -@test "runc run [rw bind mount of a ro fuse sshfs mount]" { - setup_sshfs "ro" - update_config ' .mounts += [{ - type: "bind", - source: "'"$DIR"'", - destination: "/mnt", - options: ["rw", "rprivate", "nosuid", "nodev", "rbind"] - }]' +function setup_sshfs_bind_flags() { + host_flags="$1" # ro,nodev,nosuid + bind_flags="$2" # ro,nosuid,bind - runc run --no-mount-fallback test_busybox - [ "$status" -eq 0 ] -} + setup_sshfs "$host_flags" -@test "runc run [dev,exec,suid,atime bind mount of a nodev,nosuid,noexec,noatime fuse sshfs mount]" { - setup_sshfs "nodev,nosuid,noexec,noatime" - # The "sync" option is used to trigger a remount with the below options. - # It serves no further purpose. Otherwise only a bind mount without - # applying the below options will be done. - update_config ' .mounts += [{ - type: "bind", - source: "'"$DIR"'", - destination: "/mnt", - options: ["dev", "suid", "exec", "atime", "rprivate", "rbind", "sync"] - }]' + cat >"rootfs/find-tmp.awk" <<-'EOF' + #!/bin/awk -f + $2 == "/mnt" { print $4 } + EOF + chmod +x "rootfs/find-tmp.awk" - runc run test_busybox - [ "$status" -eq 0 ] + update_config '.process.args = ["sh", "-c", "/find-tmp.awk . +@test "runc run [mount(8)-unlike behaviour: --bind with clearing flag]" { + requires root + + pass_sshfs_bind_flags "ro,noexec,nosymfollow,nodiratime" "bind,dev" + # Unspecified flags must be cleared as well. + run ! grep -wq ro <<<"$mnt_flags" + run -0 grep -wq rw <<<"$mnt_flags" + run ! grep -wq noexec <<<"$mnt_flags" + run ! grep -wq nosymfollow <<<"$mnt_flags" + # FIXME FIXME: As with mount(8), trying to clear an atime flag the "naive" + # way will be ignored! + run -0 grep -wq nodiratime <<<"$mnt_flags" + + # Now try with a user namespace. + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] ' + + pass_sshfs_bind_flags "ro,noexec,nosymfollow,nodiratime" "bind,dev" + # Lockable flags must be kept, because we didn't request them explicitly. + run -0 grep -wq ro <<<"$mnt_flags" + run ! grep -wq rw <<<"$mnt_flags" + run -0 grep -wq noexec <<<"$mnt_flags" + run -0 grep -wq nodiratime <<<"$mnt_flags" + # nosymfollow is not lockable, so it must be cleared. + run ! grep -wq nosymfollow <<<"$mnt_flags" +} + +@test "runc run [implied-rw bind mount of a ro fuse sshfs mount]" { + requires root + + pass_sshfs_bind_flags "ro" "bind,nosuid,nodev,rprivate" + # Unspecified flags must be cleared (rw default). + run ! grep -wq ro <<<"$mnt_flags" + run -0 grep -wq rw <<<"$mnt_flags" + # The new flags must be applied. + run -0 grep -wq nosuid <<<"$mnt_flags" + run -0 grep -wq nodev <<<"$mnt_flags" + + # Now try with a user namespace. The results should be the same as above. + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] ' + + pass_sshfs_bind_flags "ro" "bind,nosuid,nodev,rprivate" + # "ro" must still be set (inherited). + run -0 grep -wq ro <<<"$mnt_flags" + # The new flags must be applied. + run -0 grep -wq nosuid <<<"$mnt_flags" + run -0 grep -wq nodev <<<"$mnt_flags" +} + +@test "runc run [explicit-rw bind mount of a ro fuse sshfs mount]" { + requires root + + # Try to overwrite MS_RDONLY. As we are running in a userns-less container, + # we can overwrite MNT_LOCKED flags. + pass_sshfs_bind_flags "ro" "bind,rw,nosuid,nodev,rprivate" + # "ro" must be cleared and replaced with "rw". + run ! grep -wq ro <<<"$mnt_flags" + run -0 grep -wq rw <<<"$mnt_flags" + # The new flags must be applied. + run -0 grep -wq nosuid <<<"$mnt_flags" + run -0 grep -wq nodev <<<"$mnt_flags" + + # Now try with a user namespace. + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] ' + + # This must fail because we explicitly requested a mount with a MNT_LOCKED + # mount option cleared (when the source mount has those mounts enabled), + # namely MS_RDONLY. + fail_sshfs_bind_flags "ro" "bind,rw,nosuid,nodev,rprivate" +} + +@test "runc run [dev,exec,suid,atime bind mount of a nodev,nosuid,noexec,noatime fuse sshfs mount]" { + requires root + + # When running without userns, overwriting host flags should work. + pass_sshfs_bind_flags "nosuid,nodev,noexec,noatime" "bind,dev,suid,exec,atime" + # Unspecified flags must be cleared (rw default). + run ! grep -wq ro <<<"$mnt_flags" + run -0 grep -wq rw <<<"$mnt_flags" + # Check that the flags were actually cleared by the mount. + run ! grep -wq nosuid <<<"$mnt_flags" + run ! grep -wq nodev <<<"$mnt_flags" + run ! grep -wq noexec <<<"$mnt_flags" + # FIXME FIXME: As with mount(8), trying to clear an atime flag the "naive" + # way will be ignored! + run -0 grep -wq noatime <<<"$mnt_flags" + + # Now try with a user namespace. + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] ' + + # This must fail because we explicitly requested a mount with MNT_LOCKED + # mount options cleared (when the source mount has those mounts enabled). + fail_sshfs_bind_flags "nodev,nosuid,nosuid,noatime" "bind,dev,suid,exec,atime" +} + +# Test to ensure we don't regress bind-mounting /etc/resolv.conf with +# containerd . +@test "runc run [ro bind mount of a nodev,nosuid,noexec fuse sshfs mount]" { + requires root + + # Setting flags that are not locked should work. + pass_sshfs_bind_flags "rw,nodev,nosuid,nodev,noexec,noatime" "bind,ro" + # The flagset should be the union of the two. + run -0 grep -wq ro <<<"$mnt_flags" + # Unspecified flags must be cleared. + run ! grep -wq nosuid <<<"$mnt_flags" + run ! grep -wq nodev <<<"$mnt_flags" + run ! grep -wq noexec <<<"$mnt_flags" + + # Now try with a user namespace. + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] ' + + # Setting flags that are not locked should work. + pass_sshfs_bind_flags "rw,nodev,nosuid,nodev,noexec,noatime" "bind,ro" + # The flagset should be the union of the two. + run -0 grep -wq ro <<<"$mnt_flags" + # (Unspecified MNT_LOCKED flags are inherited.) + run -0 grep -wq nosuid <<<"$mnt_flags" + run -0 grep -wq nodev <<<"$mnt_flags" + run -0 grep -wq noexec <<<"$mnt_flags" +} + +@test "runc run [ro,symfollow bind mount of a rw,nodev,nosymfollow fuse sshfs mount]" { + requires root + + pass_sshfs_bind_flags "rw,nodev,nosymfollow" "bind,ro,symfollow" + # Must switch to ro. + run -0 grep -wq ro <<<"$mnt_flags" + run ! grep -wq rw <<<"$mnt_flags" + # Unspecified flags must be cleared. + run ! grep -wq nodev <<<"$mnt_flags" + # nosymfollow must also be cleared. + run ! grep -wq nosymfollow <<<"$mnt_flags" + + # Now try with a user namespace. + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] ' + + # Unsetting flags that are not lockable should work. + pass_sshfs_bind_flags "rw,nodev,nosymfollow" "bind,ro,symfollow" + # The flagset should be the union of the two. + run -0 grep -wq ro <<<"$mnt_flags" + run -0 grep -wq nodev <<<"$mnt_flags" + # nosymfollow is not lockable, so it must be cleared. + run ! grep -wq nosymfollow <<<"$mnt_flags" + + # Implied unsetting of non-lockable flags should also work. + pass_sshfs_bind_flags "rw,nodev,nosymfollow" "bind,rw" + # The flagset should be the union of the two. + run -0 grep -wq rw <<<"$mnt_flags" + run -0 grep -wq nodev <<<"$mnt_flags" + # nosymfollow is not lockable, so it must be cleared. + run ! grep -wq nosymfollow <<<"$mnt_flags" +} + +@test "runc run [ro,noexec bind mount of a nosuid,noatime fuse sshfs mount]" { + requires root + + # Setting flags that are not locked should work. + pass_sshfs_bind_flags "nodev,nosuid,noatime" "bind,ro,exec" + # The flagset must match the requested set. + run -0 grep -wq ro <<<"$mnt_flags" + run ! grep -wq noexec <<<"$mnt_flags" + # Unspecified flags must be cleared. + run ! grep -wq nosuid <<<"$mnt_flags" + run ! grep -wq nodev <<<"$mnt_flags" + # FIXME: As with mount(8), runc keeps the old atime setting by default. + run -0 grep -wq noatime <<<"$mnt_flags" + + # Now try with a user namespace. + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] ' + + # Setting flags that are not locked should work. + pass_sshfs_bind_flags "nodev,nosuid,noatime" "bind,ro,exec" + # The flagset should be the union of the two. + run -0 grep -wq ro <<<"$mnt_flags" + run ! grep -wq noexec <<<"$mnt_flags" + # (Unspecified MNT_LOCKED flags are inherited.) + run -0 grep -wq nosuid <<<"$mnt_flags" + run -0 grep -wq nodev <<<"$mnt_flags" + run -0 grep -wq noatime <<<"$mnt_flags" +} + +@test "runc run [bind mount {no,rel,strict}atime semantics]" { + requires root + + function is_strictatime() { + # There is no "strictatime" in /proc/self/mounts. + run ! grep -wq noatime <<<"${1:-$mnt_flags}" + run ! grep -wq relatime <<<"${1:-$mnt_flags}" + run ! grep -wq nodiratime <<<"${1:-$mnt_flags}" + } + + # FIXME: As with mount(8), runc keeps the old atime setting by default. + pass_sshfs_bind_flags "noatime" "bind" + run -0 grep -wq noatime <<<"$mnt_flags" + run ! grep -wq relatime <<<"$mnt_flags" + + # FIXME: As with mount(8), runc keeps the old atime setting by default. + pass_sshfs_bind_flags "noatime" "bind,norelatime" + run -0 grep -wq noatime <<<"$mnt_flags" + run ! grep -wq relatime <<<"$mnt_flags" + + # FIXME FIXME: As with mount(8), trying to clear an atime flag the "naive" + # way will be ignored! + pass_sshfs_bind_flags "noatime" "bind,atime" + run -0 grep -wq noatime <<<"$mnt_flags" + run ! grep -wq relatime <<<"$mnt_flags" + + # ... but explicitly setting a different flag works. + pass_sshfs_bind_flags "noatime" "bind,relatime" + run ! grep -wq noatime <<<"$mnt_flags" + run -0 grep -wq relatime <<<"$mnt_flags" + + # Setting a flag that mount(8) would combine should result in only the + # requested flag being set. + pass_sshfs_bind_flags "noatime" "bind,nodiratime" + run ! grep -wq noatime <<<"$mnt_flags" + run -0 grep -wq nodiratime <<<"$mnt_flags" + # MS_DIRATIME implies MS_RELATIME by default. + run -0 grep -wq relatime <<<"$mnt_flags" + + # Clearing flags that mount(8) would not clear works. + pass_sshfs_bind_flags "nodiratime" "bind,strictatime" + is_strictatime "$mnt_flags" + + # nodiratime is a little weird -- it implies relatime unless you set + # another option (noatime or strictatime). But, runc also has norelatime -- + # so nodiratime,norelatime should _probably_ result in the same thing as + # nodiratime,strictatime. + pass_sshfs_bind_flags "noatime" "bind,nodiratime,strictatime" + run ! grep -wq noatime <<<"$mnt_flags" + run -0 grep -wq nodiratime <<<"$mnt_flags" + run ! grep -wq relatime <<<"$mnt_flags" + # FIXME FIXME: relatime should not be set in this case. + pass_sshfs_bind_flags "noatime" "bind,nodiratime,norelatime" + run ! grep -wq noatime <<<"$mnt_flags" + run -0 grep -wq nodiratime <<<"$mnt_flags" + run -0 grep -wq relatime <<<"$mnt_flags" + + # Now try with a user namespace. + update_config ' .linux.namespaces += [{"type": "user"}] + | .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] + | .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65534}] ' + + # Requesting a mount without specifying any preference for atime works, and + # inherits the original flags. + + pass_sshfs_bind_flags "strictatime" "bind" + is_strictatime "$mnt_flags" + + pass_sshfs_bind_flags "relatime" "bind" + run -0 grep -wq relatime <<<"$mnt_flags" + + pass_sshfs_bind_flags "nodiratime" "bind" + run -0 grep -wq nodiratime <<<"$mnt_flags" + # MS_DIRATIME implies MS_RELATIME by default. + run -0 grep -wq relatime <<<"$mnt_flags" + + pass_sshfs_bind_flags "noatime,nodiratime" "bind" + run -0 grep -wq noatime <<<"$mnt_flags" + run -0 grep -wq nodiratime <<<"$mnt_flags" + + # An unrelated clear flag has no effect. + pass_sshfs_bind_flags "noatime,nodiratime" "bind,norelatime" + run -0 grep -wq noatime <<<"$mnt_flags" + run -0 grep -wq nodiratime <<<"$mnt_flags" + + # Attempting to change most *atime flags will fail with user namespaces + # because *atime flags are all MNT_LOCKED. + fail_sshfs_bind_flags "nodiratime" "bind,strictatime" + fail_sshfs_bind_flags "relatime" "bind,strictatime" + fail_sshfs_bind_flags "noatime" "bind,strictatime" + fail_sshfs_bind_flags "nodiratime" "bind,noatime" + fail_sshfs_bind_flags "relatime" "bind,noatime" + fail_sshfs_bind_flags "relatime" "bind,nodiratime" + # Make sure strictatime sources are correctly handled by runc (the kernel + # ignores some other mount flags when passing MS_STRICTATIME). See + # remount() in rootfs_linux.go for details. + fail_sshfs_bind_flags "strictatime" "bind,relatime" + fail_sshfs_bind_flags "strictatime" "bind,noatime" + fail_sshfs_bind_flags "strictatime" "bind,nodiratime" + # Make sure that runc correctly handles the MS_NOATIME|MS_RELATIME kernel + # bug. See remount() in rootfs_linux.go for more details. + fail_sshfs_bind_flags "noatime" "bind,relatime" + + # Attempting to bind-mount a mount with a request to clear the atime + # setting that would normally inherited must not work. + # FIXME FIXME: All of these cases should fail. + pass_sshfs_bind_flags "strictatime" "bind,nostrictatime" + is_strictatime "$mnt_flags" + pass_sshfs_bind_flags "nodiratime" "bind,diratime" + run -0 grep -wq nodiratime <<<"$mnt_flags" + pass_sshfs_bind_flags "nodiratime" "bind,norelatime" # MS_DIRATIME implies MS_RELATIME + run -0 grep -wq nodiratime <<<"$mnt_flags" + pass_sshfs_bind_flags "relatime" "bind,norelatime" + run -0 grep -wq relatime <<<"$mnt_flags" + pass_sshfs_bind_flags "noatime" "bind,atime" + run -0 grep -wq noatime <<<"$mnt_flags" + pass_sshfs_bind_flags "noatime,nodiratime" "bind,atime" + run -0 grep -wq noatime <<<"$mnt_flags" + run -0 grep -wq nodiratime <<<"$mnt_flags" } diff --git a/utils_linux.go b/utils_linux.go index b5c855cdbb9..b88e886ffa0 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -180,7 +180,6 @@ func createContainer(context *cli.Context, id string, spec *specs.Spec) (*libcon Spec: spec, RootlessEUID: os.Geteuid() != 0, RootlessCgroups: rootlessCg, - NoMountFallback: context.Bool("no-mount-fallback"), }) if err != nil { return nil, err