Skip to content

Commit

Permalink
Support recursive mount attrs ("rro", "rnosuid", "rnodev", ...)
Browse files Browse the repository at this point in the history
The new mount option "rro" makes the mount point recursively read-only,
by calling `mount_setattr(2)` with `MOUNT_ATTR_RDONLY` and `AT_RECURSIVE`.
https://man7.org/linux/man-pages/man2/mount_setattr.2.html

Requires kernel >= 5.12.

The "rro" option string conforms to the proposal in util-linux/util-linux Issue 1501.

Fix issue 2823

Similary, this commit also adds the following mount options:
- rrw
- r[no]{suid,dev,exec,relatime,atime,strictatime,diratime,symfollow}
- [no]symfollow

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
  • Loading branch information
AkihiroSuda committed Nov 12, 2021
1 parent c1103d9 commit 1c030d2
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 9 deletions.
6 changes: 6 additions & 0 deletions libcontainer/configs/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ type Mount struct {
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`

// Mount properties to set recursively (AT_RECURSIVE), see mount_setattr(2)
RecAttrSet uint64 `json:"ret_attr_set"`

// Mount properties to clear recursively (AT_RECURSIVE), see mount_setattr(2)
RecAttrClr uint64 `json:"rec_attr_clr"`

// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`

Expand Down
18 changes: 17 additions & 1 deletion libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/userns"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runc/libcontainer/utils/syscallutil"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -474,7 +475,6 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
return err
}
}

if m.Relabel != "" {
if err := label.Validate(m.Relabel); err != nil {
return err
Expand All @@ -498,6 +498,9 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
}
return mountPropagate(m, rootfs, mountLabel, mountFd)
}
if err := setRecAttr(m, rootfs); err != nil {
return err
}
return nil
}

Expand Down Expand Up @@ -1113,3 +1116,16 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string, mountFd
}
return nil
}

func setRecAttr(m *configs.Mount, rootfs string) error {
if m.RecAttrSet == 0 && m.RecAttrClr == 0 {
return nil
}
return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
attr := syscallutil.MountAttr{
AttrSet: m.RecAttrSet,
AttrClr: m.RecAttrClr,
}
return syscallutil.MountSetattr(-1, procfd, syscallutil.AT_RECURSIVE, &attr)
})
}
61 changes: 53 additions & 8 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/seccomp"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runc/libcontainer/utils/syscallutil"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"

Expand Down Expand Up @@ -332,7 +333,7 @@ func createLibcontainerMount(cwd string, m specs.Mount) (*configs.Mount, error)
// return nil, fmt.Errorf("mount destination %s is not absolute", m.Destination)
logrus.Warnf("mount destination %s is not absolute. Support for non-absolute mount destinations will be removed in a future release.", m.Destination)
}
flags, pgflags, data, ext := parseMountOptions(m.Options)
flags, pgflags, data, recAttrSet, recAttrClr, ext := parseMountOptions(m.Options)
source := m.Source
device := m.Type
if flags&unix.MS_BIND != 0 {
Expand All @@ -351,6 +352,8 @@ func createLibcontainerMount(cwd string, m specs.Mount) (*configs.Mount, error)
Data: data,
Flags: flags,
PropagationFlags: pgflags,
RecAttrSet: recAttrSet,
RecAttrClr: recAttrClr,
Extensions: ext,
}, nil
}
Expand Down Expand Up @@ -760,13 +763,15 @@ func setupUserNamespace(spec *specs.Spec, config *configs.Config) error {
}

// parseMountOptions parses the string and returns the flags, propagation
// flags and any mount data that it contains.
func parseMountOptions(options []string) (int, []int, string, int) {
// flags, any mount data that it contains, and {recAttrSet, recAttrClr, extFlags}.
func parseMountOptions(options []string) (int, []int, string, uint64, uint64, int) {
var (
flag int
pgflag []int
data []string
extFlags int
flag int
pgflag []int
data []string
recAttrSet uint64
recAttrClr uint64
extFlags int
)
flags := map[string]struct {
clear bool
Expand Down Expand Up @@ -796,6 +801,7 @@ func parseMountOptions(options []string) (int, []int, string, int) {
"norelatime": {true, unix.MS_RELATIME},
"nostrictatime": {true, unix.MS_STRICTATIME},
"nosuid": {false, unix.MS_NOSUID},
"nosymfollow": {false, unix.MS_NOSYMFOLLOW}, // since kernel 5.10
"rbind": {false, unix.MS_BIND | unix.MS_REC},
"relatime": {false, unix.MS_RELATIME},
"remount": {false, unix.MS_REMOUNT},
Expand All @@ -805,6 +811,7 @@ func parseMountOptions(options []string) (int, []int, string, int) {
"strictatime": {false, unix.MS_STRICTATIME},
"suid": {true, unix.MS_NOSUID},
"sync": {false, unix.MS_SYNCHRONOUS},
"symfollow": {true, unix.MS_NOSYMFOLLOW}, // since kernel 5.10
}
propagationFlags := map[string]int{
"private": unix.MS_PRIVATE,
Expand All @@ -816,6 +823,33 @@ func parseMountOptions(options []string) (int, []int, string, int) {
"rslave": unix.MS_SLAVE | unix.MS_REC,
"runbindable": unix.MS_UNBINDABLE | unix.MS_REC,
}

// See https://man7.org/linux/man-pages/man2/mount_setattr.2.html
recAttrFlags := map[string]struct {
clear bool
flag uint64
}{
"rro": {false, syscallutil.MOUNT_ATTR_RDONLY},
"rrw": {true, syscallutil.MOUNT_ATTR_RDONLY},
"rnosuid": {false, syscallutil.MOUNT_ATTR_NOSUID},
"rsuid": {true, syscallutil.MOUNT_ATTR_NOSUID},
"rnodev": {false, syscallutil.MOUNT_ATTR_NODEV},
"rdev": {true, syscallutil.MOUNT_ATTR_NODEV},
"rnoexec": {false, syscallutil.MOUNT_ATTR_NOEXEC},
"rexec": {true, syscallutil.MOUNT_ATTR_NOEXEC},
"rnodiratime": {false, syscallutil.MOUNT_ATTR_NODIRATIME},
"rdiratime": {true, syscallutil.MOUNT_ATTR_NODIRATIME},
"rrelatime": {false, syscallutil.MOUNT_ATTR_RELATIME},
"rnorelatime": {true, syscallutil.MOUNT_ATTR_RELATIME},
"rnoatime": {false, syscallutil.MOUNT_ATTR_NOATIME},
"ratime": {true, syscallutil.MOUNT_ATTR_NOATIME},
"rstrictatime": {false, syscallutil.MOUNT_ATTR_STRICTATIME},
"rnostrictatime": {true, syscallutil.MOUNT_ATTR_STRICTATIME},
"rnosymfollow": {false, syscallutil.MOUNT_ATTR_NOSYMFOLLOW}, // since kernel 5.14
"rsymfollow": {true, syscallutil.MOUNT_ATTR_NOSYMFOLLOW}, // since kernel 5.14
// No support for MOUNT_ATTR_IDMAP yet (needs UserNS FD)
}

extensionFlags := map[string]struct {
clear bool
flag int
Expand All @@ -834,6 +868,17 @@ func parseMountOptions(options []string) (int, []int, string, int) {
}
} else if f, exists := propagationFlags[o]; exists && f != 0 {
pgflag = append(pgflag, f)
} else if f, exists := recAttrFlags[o]; exists {
if f.clear {
recAttrClr |= f.flag
} else {
recAttrSet |= f.flag
if f.flag&syscallutil.MOUNT_ATTR__ATIME == f.flag {
// https://man7.org/linux/man-pages/man2/mount_setattr.2.html
// "cannot simply specify the access-time setting in attr_set, but must also include MOUNT_ATTR__ATIME in the attr_clr field."
recAttrClr |= syscallutil.MOUNT_ATTR__ATIME
}
}
} else if f, exists := extensionFlags[o]; exists && f.flag != 0 {
if f.clear {
extFlags &= ^f.flag
Expand All @@ -844,7 +889,7 @@ func parseMountOptions(options []string) (int, []int, string, int) {
data = append(data, o)
}
}
return flag, pgflag, strings.Join(data, ","), extFlags
return flag, pgflag, strings.Join(data, ","), recAttrSet, recAttrClr, extFlags
}

func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) {
Expand Down
57 changes: 57 additions & 0 deletions libcontainer/utils/syscallutil/syscallutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Package syscallutil provdes addenda to golang.org/x/sys/unix
package syscallutil

import (
"unsafe"

"golang.org/x/sys/unix"
)

// nolint
const (
AT_EMPTY_PATH = unix.AT_EMPTY_PATH
AT_RECURSIVE = 0x8000 // https://github.com/torvalds/linux/blob/v5.12/include/uapi/linux/fcntl.h#L112
AT_SYMLINK_NOFOLLOW = unix.AT_SYMLINK_NOFOLLOW
AT_NO_AUTOMOUNT = unix.AT_NO_AUTOMOUNT
MOUNT_ATTR_RDONLY = 0x00000001 // since kernel 5.12, https://github.com/torvalds/linux/blob/v5.12/include/uapi/linux/mount.h#L113
MOUNT_ATTR_NOSUID = 0x00000002 // since kernel 5.12
MOUNT_ATTR_NODEV = 0x00000004 // since kernel 5.12
MOUNT_ATTR_NOEXEC = 0x00000008 // since kernel 5.12
MOUNT_ATTR__ATIME = 0x00000070 // since kernel 5.12
MOUNT_ATTR_RELATIME = 0x00000000 // since kernel 5.12
MOUNT_ATTR_NOATIME = 0x00000010 // since kernel 5.12
MOUNT_ATTR_STRICTATIME = 0x00000020 // since kernel 5.12
MOUNT_ATTR_NODIRATIME = 0x00000080 // since kernel 5.12
MOUNT_ATTR_IDMAP = 0x00100000 // since kernel 5.12
MOUNT_ATTR_NOSYMFOLLOW = 0x00200000 // since kernel 5.14, https://github.com/torvalds/linux/blob/v5.14/include/uapi/linux/mount.h#L123
MOUNT_ATTR_SIZE_VER0 = 32 // https://github.com/torvalds/linux/blob/v5.12/include/uapi/linux/mount.h#L135
)

// MountAttr corresponds to struct mount_attr, version 0, appeared in kernel 5.12.
// https://github.com/torvalds/linux/blob/v5.12/include/uapi/linux/mount.h#L124-L132
type MountAttr struct {
AttrSet uint64 // __u64 attr_set
AttrClr uint64 // __u64 attr_clr
Propagation uint64 // __u64 propagation
UsernsFd uint64 // __u64 userns_fd
}

// MountSetattr is a wrapper for mount_setattr(2).
//
// int syscall(SYS_mount_setattr, int dirfd, const char *pathname, unsigned int flags, struct mount_attr *attr, size_t size);
//
// Requires kernel >= 5.12.
// https://man7.org/linux/man-pages/man2/mount_setattr.2.html
func MountSetattr(dirfd int, pathname string, flags uint, attr *MountAttr) error {
pathnamePtr, err := unix.BytePtrFromString(pathname)
if err != nil {
return err
}
_, _, errno := unix.Syscall6(unix.SYS_MOUNT_SETATTR,
uintptr(dirfd), uintptr(unsafe.Pointer(pathnamePtr)), uintptr(flags),
uintptr(unsafe.Pointer(attr)), unsafe.Sizeof(*attr), 0)
if errno != 0 {
return errno
}
return nil
}
78 changes: 78 additions & 0 deletions tests/integration/mounts_recursive.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env bats

load helpers

TESTVOLUME="${BATS_RUN_TMPDIR}/mounts_recursive"

function setup_volume() {
# requires root (in the current user namespace) to mount tmpfs outside runc
requires root

mkdir -p "${TESTVOLUME}"
mount -t tmpfs none "${TESTVOLUME}"
echo "foo" >"${TESTVOLUME}/foo"

mkdir "${TESTVOLUME}/subvol"
mount -t tmpfs none "${TESTVOLUME}/subvol"
echo "bar" >"${TESTVOLUME}/subvol/bar"
}

function teardown_volume() {
umount -R "${TESTVOLUME}"
}

function setup() {
setup_volume
setup_busybox
}

function teardown() {
teardown_volume
teardown_bundle
}

@test "runc run [rbind,ro mount is read-only but not recursively]" {
update_config ".mounts += [{source: \"${TESTVOLUME}\" , destination: \"/mnt\", options: [\"rbind\",\"ro\"]}]"

runc run -d --console-socket "$CONSOLE_SOCKET" test_rbind_ro
[ "$status" -eq 0 ]

runc exec test_rbind_ro touch /mnt/foo
[ "$status" -eq 1 ]
[[ "${output}" == *"Read-only file system"* ]]

runc exec test_rbind_ro touch /mnt/subvol/bar
[ "$status" -eq 0 ]
}

@test "runc run [rbind,rro mount is recursively read-only]" {
requires_kernel 5.12
update_config ".mounts += [{source: \"${TESTVOLUME}\" , destination: \"/mnt\", options: [\"rbind\",\"rro\"]}]"

runc run -d --console-socket "$CONSOLE_SOCKET" test_rbind_rro
[ "$status" -eq 0 ]

runc exec test_rbind_rro touch /mnt/foo
[ "$status" -eq 1 ]
[[ "${output}" == *"Read-only file system"* ]]

runc exec test_rbind_rro touch /mnt/subvol/bar
[ "$status" -eq 1 ]
[[ "${output}" == *"Read-only file system"* ]]
}

@test "runc run [rbind,ro,rro mount is recursively read-only too]" {
requires_kernel 5.12
update_config ".mounts += [{source: \"${TESTVOLUME}\" , destination: \"/mnt\", options: [\"rbind\",\"ro\",\"rro\"]}]"

runc run -d --console-socket "$CONSOLE_SOCKET" test_rbind_ro_rro
[ "$status" -eq 0 ]

runc exec test_rbind_ro_rro touch /mnt/foo
[ "$status" -eq 1 ]
[[ "${output}" == *"Read-only file system"* ]]

runc exec test_rbind_ro_rro touch /mnt/subvol/bar
[ "$status" -eq 1 ]
[[ "${output}" == *"Read-only file system"* ]]
}

0 comments on commit 1c030d2

Please sign in to comment.