From 69b7c9a5436fb84cea6447a1a1d7e186e94b2fad Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Fri, 2 Jul 2021 19:16:58 +0000 Subject: [PATCH 1/2] Revert parts of 938cfeb0f27303721081223816d4f251ffeb1767 When read and writing the UID/GID, we always want the value relative to the root user namespace, the kernel will take care of remapping this to the user namespace for us. Calling from_kuid(user_ns, uid) with a unmapped uid will return -1 as that uid is outside of the scope of that namespace, and will result in the files inside the namespace all being owned by 'nobody' and not being allowed to call chmod or chown on them. Signed-off-by: Allan Jude --- include/os/linux/kernel/linux/vfs_compat.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index 91e908598fbb..b5ff1559ece6 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -296,11 +296,7 @@ static inline struct dentry *file_dentry(const struct file *f) static inline uid_t zfs_uid_read_impl(struct inode *ip) { -#ifdef HAVE_SUPER_USER_NS - return (from_kuid(ip->i_sb->s_user_ns, ip->i_uid)); -#else return (from_kuid(kcred->user_ns, ip->i_uid)); -#endif } static inline uid_t zfs_uid_read(struct inode *ip) @@ -310,11 +306,7 @@ static inline uid_t zfs_uid_read(struct inode *ip) static inline gid_t zfs_gid_read_impl(struct inode *ip) { -#ifdef HAVE_SUPER_USER_NS - return (from_kgid(ip->i_sb->s_user_ns, ip->i_gid)); -#else return (from_kgid(kcred->user_ns, ip->i_gid)); -#endif } static inline gid_t zfs_gid_read(struct inode *ip) @@ -324,20 +316,12 @@ static inline gid_t zfs_gid_read(struct inode *ip) static inline void zfs_uid_write(struct inode *ip, uid_t uid) { -#ifdef HAVE_SUPER_USER_NS - ip->i_uid = make_kuid(ip->i_sb->s_user_ns, uid); -#else ip->i_uid = make_kuid(kcred->user_ns, uid); -#endif } static inline void zfs_gid_write(struct inode *ip, gid_t gid) { -#ifdef HAVE_SUPER_USER_NS - ip->i_gid = make_kgid(ip->i_sb->s_user_ns, gid); -#else ip->i_gid = make_kgid(kcred->user_ns, gid); -#endif } /* From 9081b6f09dc06493b59bcfc5e0d7dfbd07515f96 Mon Sep 17 00:00:00 2001 From: Will Andrews Date: Sun, 21 Feb 2021 10:19:43 -0600 Subject: [PATCH 2/2] Add Linux namespace delegation support This allows ZFS datasets to be delegated to a user/mount namespace Within that namespace, only the delegated datasets are visible Works very similarly to Zones/Jailes on other ZFS OSes As a user: ``` $ unshare -Um $ zfs list no datasets available $ echo $$ 1234 ``` As root: ``` # zfs list NAME ZONED MOUNTPOINT containers off /containers containers/host off /containers/host containers/host/child off /containers/host/child containers/host/child/gchild off /containers/host/child/gchild containers/unpriv on /unpriv containers/unpriv/child on /unpriv/child containers/unpriv/child/gchild on /unpriv/child/gchild # zfs zone /proc/1234/ns/user containers/unpriv ``` Back to the user namespace: ``` $ zfs list NAME USED AVAIL REFER MOUNTPOINT containers 129M 47.8G 24K /containers containers/unpriv 128M 47.8G 24K /unpriv containers/unpriv/child 128M 47.8G 128M /unpriv/child ``` Signed-off-by: Will Andrews Signed-off-by: Allan Jude Signed-off-by: Mateusz Piotrowski Co-authored-by: Allan Jude Co-authored-by: Mateusz Piotrowski Sponsored-by: Buddy --- cmd/zfs/zfs_main.c | 60 +++ config/kernel-user-ns-inum.m4 | 23 + config/kernel.m4 | 2 + contrib/pyzfs/libzfs_core/_constants.py | 1 + include/libzfs.h | 10 + include/os/linux/spl/sys/zone.h | 31 +- include/sys/fs/zfs.h | 3 + lib/libspl/include/sys/types.h | 2 +- lib/libspl/include/zone.h | 12 +- lib/libspl/os/linux/zone.c | 32 +- lib/libuutil/libuutil.abi | 2 +- lib/libzfs/libzfs.abi | 9 +- lib/libzfs/libzfs_util.c | 6 + lib/libzfs/os/linux/libzfs_util_os.c | 69 +++ lib/libzfs_core/libzfs_core.abi | 2 +- man/Makefile.am | 2 + man/man7/zfsprops.7 | 3 +- man/man8/zfs-unzone.8 | 1 + man/man8/zfs-zone.8 | 116 +++++ module/Kbuild.in | 3 +- module/os/linux/spl/spl-generic.c | 6 + module/os/linux/spl/spl-zone.c | 424 ++++++++++++++++++ module/os/linux/zfs/policy.c | 2 +- module/os/linux/zfs/zfs_ioctl_os.c | 47 ++ module/os/linux/zfs/zfs_vfsops.c | 20 + module/os/linux/zfs/zpl_super.c | 1 + tests/runfiles/linux.run | 3 +- tests/zfs-tests/include/commands.cfg | 2 + tests/zfs-tests/tests/Makefile.am | 3 + .../user_namespace/user_namespace_001.ksh | 5 + .../user_namespace/user_namespace_002.ksh | 115 +++++ .../user_namespace/user_namespace_003.ksh | 97 ++++ .../user_namespace/user_namespace_004.ksh | 67 +++ 33 files changed, 1166 insertions(+), 15 deletions(-) create mode 100644 config/kernel-user-ns-inum.m4 create mode 120000 man/man8/zfs-unzone.8 create mode 100644 man/man8/zfs-zone.8 create mode 100644 module/os/linux/spl/spl-zone.c create mode 100755 tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh create mode 100755 tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh create mode 100755 tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 6282d894638e..30b2ae0c4b4f 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv); static int zfs_do_unjail(int argc, char **argv); #endif +#ifdef __linux__ +static int zfs_do_zone(int argc, char **argv); +static int zfs_do_unzone(int argc, char **argv); +#endif + /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. */ @@ -184,6 +189,8 @@ typedef enum { HELP_JAIL, HELP_UNJAIL, HELP_WAIT, + HELP_ZONE, + HELP_UNZONE, } zfs_help_t; typedef struct zfs_command { @@ -254,6 +261,11 @@ static zfs_command_t command_table[] = { { "jail", zfs_do_jail, HELP_JAIL }, { "unjail", zfs_do_unjail, HELP_UNJAIL }, #endif + +#ifdef __linux__ + { "zone", zfs_do_zone, HELP_ZONE }, + { "unzone", zfs_do_unzone, HELP_UNZONE }, +#endif }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -415,6 +427,10 @@ get_usage(zfs_help_t idx) return (gettext("\tunjail \n")); case HELP_WAIT: return (gettext("\twait [-t ] \n")); + case HELP_ZONE: + return (gettext("\tzone \n")); + case HELP_UNZONE: + return (gettext("\tunzone \n")); default: __builtin_unreachable(); } @@ -8692,6 +8708,50 @@ main(int argc, char **argv) return (ret); } +/* + * zfs zone nsfile filesystem + * + * Add or delete the given dataset to/from the namespace. + */ +#ifdef __linux__ +static int +zfs_do_zone_impl(int argc, char **argv, boolean_t attach) +{ + zfs_handle_t *zhp; + int ret; + + if (argc < 3) { + (void) fprintf(stderr, gettext("missing argument(s)\n")); + usage(B_FALSE); + } + if (argc > 3) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM); + if (zhp == NULL) + return (1); + + ret = (zfs_userns(zhp, argv[1], attach) != 0); + + zfs_close(zhp); + return (ret); +} + +static int +zfs_do_zone(int argc, char **argv) +{ + return (zfs_do_zone_impl(argc, argv, B_TRUE)); +} + +static int +zfs_do_unzone(int argc, char **argv) +{ + return (zfs_do_zone_impl(argc, argv, B_FALSE)); +} +#endif + #ifdef __FreeBSD__ #include #include diff --git a/config/kernel-user-ns-inum.m4 b/config/kernel-user-ns-inum.m4 new file mode 100644 index 000000000000..2207a4aa6921 --- /dev/null +++ b/config/kernel-user-ns-inum.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # 3.18 API change +dnl # struct user_namespace inum moved from .proc_inum to .ns.inum. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [ + ZFS_LINUX_TEST_SRC([user_ns_common_inum], [ + #include + ], [ + struct user_namespace uns; + uns.ns.inum = 0; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [ + AC_MSG_CHECKING([whether user_namespace->ns.inum exists]) + ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1, + [user_namespace->ns.inum exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 9530367507d6..1f274cbe4f30 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -145,6 +145,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_KTHREAD ZFS_AC_KERNEL_SRC_ZERO_PAGE ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC + ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM AC_MSG_CHECKING([for available kernel interfaces]) ZFS_LINUX_TEST_COMPILE_ALL([kabi]) @@ -263,6 +264,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_KTHREAD ZFS_AC_KERNEL_ZERO_PAGE ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC + ZFS_AC_KERNEL_USER_NS_COMMON_INUM ]) dnl # diff --git a/contrib/pyzfs/libzfs_core/_constants.py b/contrib/pyzfs/libzfs_core/_constants.py index 3273652f758a..7ee2ef87df3e 100644 --- a/contrib/pyzfs/libzfs_core/_constants.py +++ b/contrib/pyzfs/libzfs_core/_constants.py @@ -100,6 +100,7 @@ def enum(*sequential, **named): 'ZFS_ERR_REBUILD_IN_PROGRESS', 'ZFS_ERR_BADPROP', 'ZFS_ERR_VDEV_NOTSUP', + 'ZFS_ERR_NOT_USER_NAMESPACE', ], {} ) diff --git a/include/libzfs.h b/include/libzfs.h index 2c2aa3faf14b..fe420de4d4de 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -150,6 +150,7 @@ typedef enum zfs_error { EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */ EZFS_REBUILDING, /* resilvering (sequential reconstrution) */ EZFS_VDEV_NOTSUP, /* ops not supported for this type of vdev */ + EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */ EZFS_UNKNOWN } zfs_error_t; @@ -979,6 +980,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, #endif /* __FreeBSD__ */ +#ifdef __linux__ + +/* + * Add or delete the given filesystem to/from the given user namespace. + */ +_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach); + +#endif + #ifdef __cplusplus } #endif diff --git a/include/os/linux/spl/sys/zone.h b/include/os/linux/spl/sys/zone.h index 00e30f690c38..5978a6285fa1 100644 --- a/include/os/linux/spl/sys/zone.h +++ b/include/os/linux/spl/sys/zone.h @@ -25,11 +25,34 @@ #define _SPL_ZONE_H #include +#include -#define GLOBAL_ZONEID 0 +#include +#include -#define zone_dataset_visible(x, y) (1) -#define crgetzoneid(x) (GLOBAL_ZONEID) -#define INGLOBALZONE(z) (1) +/* + * Attach the given dataset to the given user namespace. + */ +extern int zone_dataset_attach(cred_t *, const char *, int); + +/* + * Detach the given dataset from the given user namespace. + */ +extern int zone_dataset_detach(cred_t *, const char *, int); + +/* + * Returns true if the named pool/dataset is visible in the current zone. + */ +extern int zone_dataset_visible(const char *dataset, int *write); + +int spl_zone_init(void); +void spl_zone_fini(void); + +extern unsigned int crgetzoneid(const cred_t *); +extern unsigned int global_zoneid(void); +extern boolean_t inglobalzone(proc_t *); + +#define INGLOBALZONE(x) inglobalzone(x) +#define GLOBAL_ZONEID global_zoneid() #endif /* SPL_ZONE_H */ diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 9cd1e32cd053..bc6666a2a1b7 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1450,7 +1450,9 @@ typedef enum zfs_ioc { ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */ ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */ ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */ + ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL, /* 0x85 (Linux) */ ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */ + ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL, /* 0x86 (Linux) */ ZFS_IOC_SET_BOOTENV, /* 0x87 */ ZFS_IOC_GET_BOOTENV, /* 0x88 */ ZFS_IOC_LAST @@ -1531,6 +1533,7 @@ typedef enum { ZFS_ERR_REBUILD_IN_PROGRESS, ZFS_ERR_BADPROP, ZFS_ERR_VDEV_NOTSUP, + ZFS_ERR_NOT_USER_NAMESPACE, } zfs_errno_t; /* diff --git a/lib/libspl/include/sys/types.h b/lib/libspl/include/sys/types.h index f32c2188a111..8dc38ae3394f 100644 --- a/lib/libspl/include/sys/types.h +++ b/lib/libspl/include/sys/types.h @@ -44,7 +44,7 @@ #include #endif /* HAVE_INTTYPES */ -typedef int zoneid_t; +typedef uint_t zoneid_t; typedef int projid_t; /* diff --git a/lib/libspl/include/zone.h b/lib/libspl/include/zone.h index b0ac2d9bc610..0af4e7a2fa49 100644 --- a/lib/libspl/include/zone.h +++ b/lib/libspl/include/zone.h @@ -33,7 +33,17 @@ extern "C" { #endif -#define GLOBAL_ZONEID 0 +#ifdef __FreeBSD__ +#define GLOBAL_ZONEID 0 +#else +/* + * Hardcoded in the kernel's root user namespace. A "better" way to get + * this would be by using ioctl_ns(2), but this would need to be performed + * recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only + * supported since Linux 4.9. + */ +#define GLOBAL_ZONEID 4026531837U +#endif extern zoneid_t getzoneid(void); diff --git a/lib/libspl/os/linux/zone.c b/lib/libspl/os/linux/zone.c index 393a16ad5cdd..65c02dfe7aab 100644 --- a/lib/libspl/os/linux/zone.c +++ b/lib/libspl/os/linux/zone.c @@ -23,10 +23,40 @@ * Use is subject to license terms. */ +#include +#include +#include +#include +#include +#include + #include zoneid_t getzoneid(void) { - return (GLOBAL_ZONEID); + char path[PATH_MAX]; + char buf[128] = { '\0' }; + char *cp; + + int c = snprintf(path, sizeof (path), "/proc/self/ns/user"); + /* This API doesn't have any error checking... */ + if (c < 0) + return (0); + + ssize_t r = readlink(path, buf, sizeof (buf) - 1); + if (r < 0) + return (0); + + cp = strchr(buf, '['); + if (cp == NULL) + return (0); + cp++; + + unsigned long n = strtoul(cp, NULL, 10); + if (n == ULONG_MAX && errno == ERANGE) + return (0); + zoneid_t z = (zoneid_t)n; + + return (z); } diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi index 86220b44b229..766d8843000d 100644 --- a/lib/libuutil/libuutil.abi +++ b/lib/libuutil/libuutil.abi @@ -1081,7 +1081,7 @@ - + diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 9f9a2f9071d9..fb5e01b82c40 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -433,6 +433,7 @@ + @@ -1537,7 +1538,7 @@ - + @@ -4414,6 +4415,12 @@ + + + + + + diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 1c067e214800..1d40cbbfcaa7 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -299,6 +299,9 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_VDEV_NOTSUP: return (dgettext(TEXT_DOMAIN, "operation not supported " "on this type of vdev")); + case EZFS_NOT_USER_NAMESPACE: + return (dgettext(TEXT_DOMAIN, "the provided file " + "was not a user namespace file")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -485,6 +488,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ZFS_ERR_BADPROP: zfs_verror(hdl, EZFS_BADPROP, fmt, ap); break; + case ZFS_ERR_NOT_USER_NAMESPACE: + zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap); + break; default: zfs_error_aux(hdl, "%s", strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); diff --git a/lib/libzfs/os/linux/libzfs_util_os.c b/lib/libzfs/os/linux/libzfs_util_os.c index 9d6f574a5546..7bd26ea98131 100644 --- a/lib/libzfs/os/linux/libzfs_util_os.c +++ b/lib/libzfs/os/linux/libzfs_util_os.c @@ -19,6 +19,9 @@ * CDDL HEADER END */ +/* + * Copyright (c) 2021 Klara, Inc. + */ #include #include @@ -207,3 +210,69 @@ zfs_version_kernel(void) ret[read - 1] = '\0'; return (ret); } + +/* + * Add or delete the given filesystem to/from the given user namespace. + */ +int +zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach) +{ + libzfs_handle_t *hdl = zhp->zfs_hdl; + zfs_cmd_t zc = {"\0"}; + char errbuf[1024]; + unsigned long cmd; + int ret; + + if (attach) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"), + zhp->zfs_name); + } else { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"), + zhp->zfs_name); + } + + switch (zhp->zfs_type) { + case ZFS_TYPE_VOLUME: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "volumes can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_SNAPSHOT: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "snapshots can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_BOOKMARK: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "bookmarks can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_VDEV: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "vdevs can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_INVALID: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid zfs_type_t: ZFS_TYPE_INVALID")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_POOL: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pools can not be namespaced")); + return (zfs_error(hdl, EZFS_BADTYPE, errbuf)); + case ZFS_TYPE_FILESYSTEM: + zfs_fallthrough; + } + assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM); + + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + zc.zc_objset_type = DMU_OST_ZFS; + zc.zc_cleanup_fd = open(nspath, O_RDONLY); + if (zc.zc_cleanup_fd < 0) { + return (zfs_error(hdl, EZFS_NOT_USER_NAMESPACE, errbuf)); + } + + cmd = attach ? ZFS_IOC_USERNS_ATTACH : ZFS_IOC_USERNS_DETACH; + if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0) + zfs_standard_error(hdl, errno, errbuf); + + return (ret); +} diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 266007e4dcad..fae98469a04f 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -939,7 +939,7 @@ - + diff --git a/man/Makefile.am b/man/Makefile.am index 8fa21d2fd23e..12f818372f37 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -59,9 +59,11 @@ dist_man_MANS = \ %D%/man8/zfs-unjail.8 \ %D%/man8/zfs-unload-key.8 \ %D%/man8/zfs-unmount.8 \ + %D%/man8/zfs-unzone.8 \ %D%/man8/zfs-upgrade.8 \ %D%/man8/zfs-userspace.8 \ %D%/man8/zfs-wait.8 \ + %D%/man8/zfs-zone.8 \ %D%/man8/zfs_ids_to_path.8 \ %D%/man8/zgenhostid.8 \ %D%/man8/zinject.8 \ diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index b1e1ce377fe2..4d6fc613c851 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -1885,8 +1885,7 @@ feature and are not relevant on other platforms. The default value is .Sy off . .It Sy zoned Ns = Ns Sy on Ns | Ns Sy off -Controls whether the dataset is managed from a non-global zone. -Zones are a Solaris feature and are not relevant on other platforms. +Controls whether the dataset is managed from a non-global zone or namespace. The default value is .Sy off . .El diff --git a/man/man8/zfs-unzone.8 b/man/man8/zfs-unzone.8 new file mode 120000 index 000000000000..9052b28aa880 --- /dev/null +++ b/man/man8/zfs-unzone.8 @@ -0,0 +1 @@ +zfs-zone.8 \ No newline at end of file diff --git a/man/man8/zfs-zone.8 b/man/man8/zfs-zone.8 new file mode 100644 index 000000000000..2f975dde6799 --- /dev/null +++ b/man/man8/zfs-zone.8 @@ -0,0 +1,116 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2011 Joshua M. Clulow +.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved. +.\" Copyright (c) 2011, Pawel Jakub Dawidek +.\" Copyright (c) 2012, Glen Barber +.\" Copyright (c) 2012, Bryan Drewery +.\" Copyright (c) 2013, Steven Hartland +.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved. +.\" Copyright (c) 2014, Joyent, Inc. All rights reserved. +.\" Copyright (c) 2014 by Adam Stevko. All rights reserved. +.\" Copyright (c) 2014 Integros [integros.com] +.\" Copyright (c) 2014, Xin LI +.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved. +.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved. +.\" Copyright 2019 Richard Laager. All rights reserved. +.\" Copyright 2018 Nexenta Systems, Inc. +.\" Copyright 2019 Joyent, Inc. +.\" Copyright 2021 Klara, Inc. +.\" +.Dd June 3, 2022 +.Dt ZFS-ZONE 8 +.Os +. +.Sh NAME +.Nm zfs-zone , +.Nm zfs-unzone +.Nd attach and detach ZFS filesystems to user namespaces +.Sh SYNOPSIS +.Nm zfs Cm zone +.Ar nsfile +.Ar filesystem +.Nm zfs Cm unzone +.Ar nsfile +.Ar filesystem +. +.Sh DESCRIPTION +.Bl -tag -width "" +.It Xo +.Nm zfs +.Cm zone +.Ar nsfile +.Ar filesystem +.Xc +Attach the specified +.Ar filesystem +to the user namespace identified by +.Ar nsfile . +From now on this file system tree can be managed from within a user namespace +if the +.Sy zoned +property has been set. +.Pp +You cannot attach a zoned dataset's children to another user namespace. +You can also not attach the root file system +of the user namespace or any dataset +which needs to be mounted before the zfs service +is run inside the user namespace, +as it would be attached unmounted until it is +mounted from the service inside the user namespace. +.Pp +To allow management of the dataset from within a user namespace, the +.Sy zoned +property has to be set and the user namespaces needs access to the +.Pa /dev/zfs +device. +The +.Sy quota +property cannot be changed from within a user namespace. +.Pp +After a dataset is attached to a user namespace and the +.Sy zoned +property is set, +a zoned file system cannot be mounted outside the user namespace, +since the user namespace administrator might have set the mount point +to an unacceptable value. +.It Xo +.Nm zfs +.Cm unzone +.Ar nsfile +.Ar filesystem +.Xc +Detach the specified +.Ar filesystem +from the user namespace identified by +.Ar nsfile . +.El +.Sh EXAMPLES +.Ss Example 1 : No Delegating a Dataset to a User Namespace +The following example delegates the +.Ar tank/users +dataset to a user namespace identified by user namespace file +.Pa /proc/1234/ns/user . +.Dl # Nm zfs Cm zone Ar /proc/1234/ns/user Ar tank/users +. +.Sh SEE ALSO +.Xr zfsprops 7 diff --git a/module/Kbuild.in b/module/Kbuild.in index ed8dc23a90d3..14f2362810aa 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -65,7 +65,8 @@ SPL_OBJS := \ spl-tsd.o \ spl-vmem.o \ spl-xdr.o \ - spl-zlib.o + spl-zlib.o \ + spl-zone.o spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS)) diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index f99a2f966660..5179100d1665 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -780,8 +780,13 @@ spl_init(void) if ((rc = spl_zlib_init())) goto out7; + if ((rc = spl_zone_init())) + goto out8; + return (rc); +out8: + spl_zlib_fini(); out7: spl_kstat_fini(); out6: @@ -801,6 +806,7 @@ spl_init(void) static void __exit spl_fini(void) { + spl_zone_fini(); spl_zlib_fini(); spl_kstat_fini(); spl_proc_fini(); diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c new file mode 100644 index 000000000000..804c8010ccef --- /dev/null +++ b/module/os/linux/spl/spl-zone.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2021 Klara Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_USER_NS) +#include +#include +#endif + +static kmutex_t zone_datasets_lock; +static struct list_head zone_datasets; + +typedef struct zone_datasets { + struct list_head zds_list; /* zone_datasets linkage */ + struct user_namespace *zds_userns; /* namespace reference */ + struct list_head zds_datasets; /* datasets for the namespace */ +} zone_datasets_t; + +typedef struct zone_dataset { + struct list_head zd_list; /* zone_dataset linkage */ + size_t zd_dsnamelen; /* length of name */ + char zd_dsname[0]; /* name of the member dataset */ +} zone_dataset_t; + +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +/* + * Returns: + * - 0 on success + * - EBADF if it cannot open the provided file descriptor + * - ENOTTY if the file itself is a not a user namespace file. We want to + * intercept this error in the ZFS layer. We cannot just return one of the + * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS + * and the SPL layers. + */ +static int +user_ns_get(int fd, struct user_namespace **userns) +{ + struct kstatfs st; + struct file *nsfile; + struct ns_common *ns; + int error; + + if ((nsfile = fget(fd)) == NULL) + return (EBADF); + if (vfs_statfs(&nsfile->f_path, &st) != 0) { + error = ENOTTY; + goto done; + } + if (st.f_type != NSFS_MAGIC) { + error = ENOTTY; + goto done; + } + ns = get_proc_ns(file_inode(nsfile)); + if (ns->ops->type != CLONE_NEWUSER) { + error = ENOTTY; + goto done; + } + *userns = container_of(ns, struct user_namespace, ns); + + error = 0; +done: + fput(nsfile); + + return (error); +} +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ + +static unsigned int +user_ns_zoneid(struct user_namespace *user_ns) +{ + unsigned int r; + +#if defined(HAVE_USER_NS_COMMON_INUM) + r = user_ns->ns.inum; +#else + r = user_ns->proc_inum; +#endif + + return (r); +} + +static struct zone_datasets * +zone_datasets_lookup(unsigned int nsinum) +{ + zone_datasets_t *zds; + + list_for_each_entry(zds, &zone_datasets, zds_list) { + if (user_ns_zoneid(zds->zds_userns) == nsinum) + return (zds); + } + return (NULL); +} + +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) +static struct zone_dataset * +zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) +{ + zone_dataset_t *zd; + + list_for_each_entry(zd, &zds->zds_datasets, zd_list) { + if (zd->zd_dsnamelen != dsnamelen) + continue; + if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) + return (zd); + } + + return (NULL); +} + +static int +zone_dataset_cred_check(cred_t *cred) +{ + + if (!uid_eq(cred->uid, GLOBAL_ROOT_UID)) + return (EPERM); + + return (0); +} +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ + +static int +zone_dataset_name_check(const char *dataset, size_t *dsnamelen) +{ + + if (dataset[0] == '\0' || dataset[0] == '/') + return (ENOENT); + + *dsnamelen = strlen(dataset); + /* Ignore trailing slash, if supplied. */ + if (dataset[*dsnamelen - 1] == '/') + (*dsnamelen)--; + + return (0); +} + +int +zone_dataset_attach(cred_t *cred, const char *dataset, int cleanup_fd) +{ +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) + struct user_namespace *userns; + zone_datasets_t *zds; + zone_dataset_t *zd; + int error; + size_t dsnamelen; + + if ((error = zone_dataset_cred_check(cred)) != 0) + return (error); + if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) + return (error); + if ((error = user_ns_get(cleanup_fd, &userns)) != 0) + return (error); + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(user_ns_zoneid(userns)); + if (zds == NULL) { + zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP); + INIT_LIST_HEAD(&zds->zds_list); + INIT_LIST_HEAD(&zds->zds_datasets); + zds->zds_userns = userns; + /* + * Lock the namespace by incresing its refcount to prevent + * the namespace ID from being reused. + */ + get_user_ns(userns); + list_add_tail(&zds->zds_list, &zone_datasets); + } else { + zd = zone_dataset_lookup(zds, dataset, dsnamelen); + if (zd != NULL) { + mutex_exit(&zone_datasets_lock); + return (EEXIST); + } + } + + zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP); + zd->zd_dsnamelen = dsnamelen; + strncpy(zd->zd_dsname, dataset, dsnamelen); + zd->zd_dsname[dsnamelen] = '\0'; + INIT_LIST_HEAD(&zd->zd_list); + list_add_tail(&zd->zd_list, &zds->zds_datasets); + + mutex_exit(&zone_datasets_lock); + return (0); +#else + return (ENXIO); +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +} +EXPORT_SYMBOL(zone_dataset_attach); + +int +zone_dataset_detach(cred_t *cred, const char *dataset, int cleanup_fd) +{ +#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) + struct user_namespace *userns; + zone_datasets_t *zds; + zone_dataset_t *zd; + int error; + size_t dsnamelen; + + if ((error = zone_dataset_cred_check(cred)) != 0) + return (error); + if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) + return (error); + if ((error = user_ns_get(cleanup_fd, &userns)) != 0) + return (error); + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(user_ns_zoneid(userns)); + if (zds != NULL) + zd = zone_dataset_lookup(zds, dataset, dsnamelen); + if (zds == NULL || zd == NULL) { + mutex_exit(&zone_datasets_lock); + return (ENOENT); + } + + list_del(&zd->zd_list); + kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); + + /* Prune the namespace entry if it has no more delegations. */ + if (list_empty(&zds->zds_datasets)) { + /* + * Decrease the refcount now that the namespace is no longer + * used. It is no longer necessary to prevent the namespace ID + * from being reused. + */ + put_user_ns(userns); + list_del(&zds->zds_list); + kmem_free(zds, sizeof (*zds)); + } + + mutex_exit(&zone_datasets_lock); + return (0); +#else + return (ENXIO); +#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ +} +EXPORT_SYMBOL(zone_dataset_detach); + +/* + * A dataset is visible if: + * - It is a parent of a namespace entry. + * - It is one of the namespace entries. + * - It is a child of a namespace entry. + * + * A dataset is writable if: + * - It is one of the namespace entries. + * - It is a child of a namespace entry. + * + * The parent datasets of namespace entries are visible and + * read-only to provide a path back to the root of the pool. + */ +int +zone_dataset_visible(const char *dataset, int *write) +{ + zone_datasets_t *zds; + zone_dataset_t *zd; + size_t dsnamelen, zd_len; + int visible; + + /* Default to read-only, in case visible is returned. */ + if (write != NULL) + *write = 0; + if (zone_dataset_name_check(dataset, &dsnamelen) != 0) + return (0); + if (INGLOBALZONE(curproc)) { + if (write != NULL) + *write = 1; + return (1); + } + + mutex_enter(&zone_datasets_lock); + zds = zone_datasets_lookup(crgetzoneid(curproc->cred)); + if (zds == NULL) { + mutex_exit(&zone_datasets_lock); + return (0); + } + + visible = 0; + list_for_each_entry(zd, &zds->zds_datasets, zd_list) { + zd_len = strlen(zd->zd_dsname); + if (zd_len > dsnamelen) { + /* + * The name of the namespace entry is longer than that + * of the dataset, so it could be that the dataset is a + * parent of the namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, + dsnamelen) == 0 && + zd->zd_dsname[dsnamelen] == '/'; + if (visible) + break; + } else if (zd_len == dsnamelen) { + /* + * The name of the namespace entry is as long as that + * of the dataset, so perhaps the dataset itself is the + * namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0; + if (visible) { + if (write != NULL) + *write = 1; + break; + } + } else { + /* + * The name of the namespace entry is shorter than that + * of the dataset, so perhaps the dataset is a child of + * the namespace entry. + */ + visible = memcmp(zd->zd_dsname, dataset, + zd_len) == 0 && dataset[zd_len] == '/'; + if (visible) { + if (write != NULL) + *write = 1; + break; + } + } + } + + mutex_exit(&zone_datasets_lock); + return (visible); +} +EXPORT_SYMBOL(zone_dataset_visible); + +unsigned int +global_zoneid(void) +{ + unsigned int z = 0; + +#if defined(CONFIG_USER_NS) + z = user_ns_zoneid(&init_user_ns); +#endif + + return (z); +} +EXPORT_SYMBOL(global_zoneid); + +unsigned int +crgetzoneid(const cred_t *cr) +{ + unsigned int r = 0; + +#if defined(CONFIG_USER_NS) + r = user_ns_zoneid(cr->user_ns); +#endif + + return (r); +} +EXPORT_SYMBOL(crgetzoneid); + +boolean_t +inglobalzone(proc_t *proc) +{ +#if defined(CONFIG_USER_NS) + return (proc->cred->user_ns == &init_user_ns); +#else + return (B_TRUE); +#endif +} +EXPORT_SYMBOL(inglobalzone); + +int +spl_zone_init(void) +{ + mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL); + INIT_LIST_HEAD(&zone_datasets); + return (0); +} + +void +spl_zone_fini(void) +{ + zone_datasets_t *zds; + zone_dataset_t *zd; + + /* + * It would be better to assert an empty zone_datasets, but since + * there's no automatic mechanism for cleaning them up if the user + * namespace is destroyed, just do it here, since spl is about to go + * out of context. + */ + while (!list_empty(&zone_datasets)) { + zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list); + while (!list_empty(&zds->zds_datasets)) { + zd = list_entry(zds->zds_datasets.next, + zone_dataset_t, zd_list); + list_del(&zd->zd_list); + kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); + put_user_ns(zds->zds_userns); + } + list_del(&zds->zds_list); + kmem_free(zds, sizeof (*zds)); + } + mutex_destroy(&zone_datasets_lock); +} diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c index 5a52092bb90a..ab00d2ae14d2 100644 --- a/module/os/linux/zfs/policy.c +++ b/module/os/linux/zfs/policy.c @@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err, static int priv_policy(const cred_t *cr, int capability, int err) { - return (priv_policy_ns(cr, capability, err, NULL)); + return (priv_policy_ns(cr, capability, err, cr->user_ns)); } static int diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c index c65702e1a053..67b864aa77a9 100644 --- a/module/os/linux/zfs/zfs_ioctl_os.c +++ b/module/os/linux/zfs/zfs_ioctl_os.c @@ -37,6 +37,7 @@ * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. * Copyright (c) 2019 Datto Inc. + * Copyright (c) 2021 Klara, Inc. */ #include @@ -150,6 +151,48 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) } +static int +zfs_ioc_userns_attach(zfs_cmd_t *zc) +{ + int error; + + if (zc == NULL) + return (SET_ERROR(EINVAL)); + + error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd); + + /* + * Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived + * back from the SPL layer, which does not know about ZFS_ERR_* errors. + * See the comment at the user_ns_get() function in spl-zone.c for + * details. + */ + if (error == ENOTTY) + error = ZFS_ERR_NOT_USER_NAMESPACE; + + return (error); +} + +static int +zfs_ioc_userns_detach(zfs_cmd_t *zc) +{ + int error; + + if (zc == NULL) + return (SET_ERROR(EINVAL)); + + error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd); + + /* + * See the comment in zfs_ioc_userns_attach() for details on what is + * going on here. + */ + if (error == ENOTTY) + error = ZFS_ERR_NOT_USER_NAMESPACE; + + return (error); +} + uint64_t zfs_max_nvlist_src_size_os(void) { @@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname) void zfs_ioctl_init_os(void) { + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH, + zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE); + zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH, + zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE); } #ifdef CONFIG_COMPAT diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 81a059651e8a..a67ba821d06f 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) int error = 0; zfsvfs_t *zfsvfs = NULL; vfs_t *vfs = NULL; + int canwrite; + int dataset_visible_zone; ASSERT(zm); ASSERT(osname); + dataset_visible_zone = zone_dataset_visible(osname, &canwrite); + + /* + * Refuse to mount a filesystem if we are in a namespace and the + * dataset is not visible or writable in that namespace. + */ + if (!INGLOBALZONE(curproc) && + (!dataset_visible_zone || !canwrite)) { + return (SET_ERROR(EPERM)); + } + error = zfsvfs_parse_options(zm->mnt_data, &vfs); if (error) return (error); + /* + * If a non-writable filesystem is being mounted without the + * read-only flag, pretend it was set, as done for snapshots. + */ + if (!canwrite) + vfs->vfs_readonly = true; + error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs); if (error) { zfsvfs_vfs_free(vfs); diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index c2fd3fee1401..b18efde9b18a 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = { struct file_system_type zpl_fs_type = { .owner = THIS_MODULE, .name = ZFS_DRIVER, + .fs_flags = FS_USERNS_MOUNT, .mount = zpl_mount, .kill_sb = zpl_kill_sb, }; diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index fa71f412ba6c..9b32e73afb1e 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -177,7 +177,8 @@ tests = ['upgrade_projectquota_001_pos'] tags = ['functional', 'upgrade'] [tests/functional/user_namespace:Linux] -tests = ['user_namespace_001'] +tests = ['user_namespace_001', 'user_namespace_002', 'user_namespace_003', + 'user_namespace_004'] tags = ['functional', 'user_namespace'] [tests/functional/userquota:Linux] diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 1ee786d131d7..47357dca57fb 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -146,11 +146,13 @@ export SYSTEM_FILES_LINUX='attr mkswap modprobe mpstat + nsenter parted perf setfattr sha256sum udevadm + unshare useradd userdel usermod diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index d759e51968cd..e65a8bba2c2c 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1895,6 +1895,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/user_namespace/cleanup.ksh \ functional/user_namespace/setup.ksh \ functional/user_namespace/user_namespace_001.ksh \ + functional/user_namespace/user_namespace_002.ksh \ + functional/user_namespace/user_namespace_003.ksh \ + functional/user_namespace/user_namespace_004.ksh \ functional/userquota/cleanup.ksh \ functional/userquota/groupspace_001_pos.ksh \ functional/userquota/groupspace_002_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh index 3d19c4273e24..39aad91d0c61 100755 --- a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh +++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_001.ksh @@ -47,6 +47,11 @@ function cleanup done } +unshare -Urm echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi + log_onexit cleanup log_assert "Check root in user namespaces" diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh new file mode 100755 index 000000000000..a5f76014ab85 --- /dev/null +++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_002.ksh @@ -0,0 +1,115 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib + +# +# DESCRIPTION: +# Regression test for delegation of datasets to user namespaces. +# +# STRATEGY: +# 1. Delegate a dataset to a user namespace. +# 2. Check that 'zfs list' is only able to see inside the delegation. +# 3. Check that 'zfs create' is able to create only inside the delegation. +# 4. Check that the filesystems can be mounted inside the delegation, +# and that file permissions are appropriate. +# 5. Check that 'zfs destroy' is able to destroy only inside the delegation. +# 6. Check that 'zfs unzone' has a desirable effect. +# + +verify_runnable "both" + +user_ns_cleanup() { + if [ -n "$proc_ns_added" ]; then + log_must zfs unzone $proc_ns_added $TESTPOOL/userns + fi + if [ -n "$unshared_pid" ]; then + kill -9 $unshared_pid + # Give it a sec to make the global cleanup more reliable. + sleep 1 + fi + log_must zfs destroy -r $TESTPOOL/userns +} + +log_onexit user_ns_cleanup + +log_assert "Check zfs/zpool command delegation in user namespaces" + +# Create the baseline datasets. +log_must zfs create -o zoned=on $TESTPOOL/userns +log_must zfs create -o zoned=on $TESTPOOL/userns/testds +# Partial match should be denied; hence we also set this to be 'zoned'. +log_must zfs create -o zoned=on $TESTPOOL/user + +# 1. Create a user namespace with a cloned mount namespace, then delegate. +unshare -Urm echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi +unshare -Urm /usr/bin/sleep 1h & +unshared_pid=$! +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi +proc_ns=/proc/$unshared_pid/ns/user +sleep 2 # Wait for unshare to acquire user namespace +log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}" + +NSENTER="nsenter -t $unshared_pid --all" + +$NSENTER echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to enter user namespace" +fi + +# 1b. Pre-test by checking that 'zone' does something new. +list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')" +log_must test -z "$list" +log_must zfs zone $proc_ns $TESTPOOL/userns +proc_ns_added="$ns" + +# 2. 'zfs list' +list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')" +log_must test "$list" = "$TESTPOOL $TESTPOOL/userns $TESTPOOL/userns/testds " + +# 3. 'zfs create' +log_must $NSENTER zfs create $TESTPOOL/userns/created +log_mustnot $NSENTER zfs create $TESTPOOL/user/created + +# 4. Check file permissions (create mounts the filesystem). The 'permissions' +# check is simply, does it get mapped to user namespace's root/root? +log_must $NSENTER df -h /$TESTPOOL/userns/created +log_must $NSENTER mkfile 8192 /$TESTPOOL/userns/created/testfile +uidgid=$($NSENTER stat -c '%u %g' /$TESTPOOL/userns/created/testfile) +log_must test "${uidgid}" = "0 0" + +# 5. 'zfs destroy' +log_must $NSENTER zfs destroy $TESTPOOL/userns/created +log_mustnot $NSENTER zfs destroy $TESTPOOL/user + +# 6. 'zfs unzone' should have an effect +log_must zfs unzone $proc_ns $TESTPOOL/userns +proc_ns_added="" +list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')" +log_must test -z "$list" + +log_pass "Check zfs/zpool command delegation in user namespaces" diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh new file mode 100755 index 000000000000..20a7f6677d20 --- /dev/null +++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_003.ksh @@ -0,0 +1,97 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib + +# +# DESCRIPTION: +# Regression test for delegation of datasets to user namespaces. +# +# STRATEGY: +# 1. Delegate two datasets with distinctive names to a user namespace. +# 2. Check that 'zfs list' is not able to see datasets outside of the +# delegation, which have a prefix matching one of the delegated sets. +# Also, check that all the delegated sets are visible. +# + +verify_runnable "both" + +user_ns_cleanup() { + if [ -n "$proc_ns_added" ]; then + log_must zfs unzone $proc_ns_added $TESTPOOL/userns + log_must zfs unzone $proc_ns_added $TESTPOOL/otheruserns + fi + if [ -n "$unshared_pid" ]; then + kill -9 $unshared_pid + # Give it a sec to make the global cleanup more reliable. + sleep 1 + fi + log_must zfs destroy -r $TESTPOOL/userns + log_must zfs destroy -r $TESTPOOL/usernsisitnot + log_must zfs destroy -r $TESTPOOL/otheruserns +} + +log_onexit user_ns_cleanup + +log_assert "Check zfs list command handling of dataset visibility in user namespaces" + +# Create the baseline dataset. +log_must zfs create -o zoned=on $TESTPOOL/userns +# Datasets with a prefix matching the delegated dataset should not be +# automatically considered visible. +log_must zfs create -o zoned=on $TESTPOOL/usernsisitnot +# All delegated datasets should be visible. +log_must zfs create -o zoned=on $TESTPOOL/otheruserns + +# 1. Create a user namespace with a cloned mount namespace, then delegate. +unshare -Urm echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi +unshare -Urm /usr/bin/sleep 1h & +unshared_pid=$! +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi +proc_ns=/proc/$unshared_pid/ns/user +sleep 2 # Wait for unshare to acquire user namespace +log_note "unshare: child=${unshared_pid} proc_ns=${proc_ns}" + +NSENTER="nsenter -t $unshared_pid --all" + +$NSENTER echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to enter user namespace" +fi + +# 1b. Pre-test by checking that 'zone' does something new. +list="$($NSENTER zfs list -r -H -o name | tr '\n' ' ')" +log_must test -z "$list" +log_must zfs zone $proc_ns $TESTPOOL/userns +log_must zfs zone $proc_ns $TESTPOOL/otheruserns +proc_ns_added="$ns" + +# 2. 'zfs list' +list="$($NSENTER zfs list -r -H -o name $TESTPOOL | tr '\n' ' ')" +log_must test "$list" = "$TESTPOOL $TESTPOOL/otheruserns $TESTPOOL/userns " + +log_pass "Check zfs list command handling of dataset visibility in user namespaces" diff --git a/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh new file mode 100755 index 000000000000..6edb0413c98a --- /dev/null +++ b/tests/zfs-tests/tests/functional/user_namespace/user_namespace_004.ksh @@ -0,0 +1,67 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/tests/functional/user_namespace/user_namespace_common.kshlib + +# +# DESCRIPTION: +# Regression test for safeguards around the delegation of datasets to +# user namespaces. +# +# STRATEGY: +# 1. Check that 'zfs zone' correctly handles the case of the first +# argument being a non-namespace file. +# 2. Check that 'zfs zone' correctly handles the case of the first +# argument being a non-namespace and non-existent file. +# + +verify_runnable "both" + +user_ns_cleanup() { + if [ -n "$temp_file" ]; then + log_must rm -f "$temp_file" + fi + + log_must zfs destroy -r "$TESTPOOL/userns" +} + +log_onexit user_ns_cleanup + +log_assert "Check zfs zone command handling of non-namespace files" + +# Pass if user namespaces are not supported. +unshare -Urm echo test +if [ "$?" -ne "0" ]; then + log_unsupported "Failed to create user namespace" +fi + +# Create the baseline datasets. +log_must zfs create -o zoned=on "$TESTPOOL/userns" + +# 1. Try to pass a non-namespace file to zfs zone. +temp_file="$(TMPDIR=$TEST_BASE_DIR mktemp)" +log_mustnot zfs zone "$temp_file" "$TESTPOOL/userns" + +# 2. Try to pass a non-namespace and non-existent file to zfs zone. +log_mustnot zfs zone "$TEMP_BASE_DIR/nonexistent" "$TESTPOOL/userns" + +log_pass "Check zfs zone command handling of non-namespace files"