From 5f4fff447aa7e57301397490889edcc6600c30b8 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 22 Jun 2019 10:35:11 +1000 Subject: [PATCH] zfs_rename: support RENAME_* flags Implement support for Linux's RENAME_* flags (for renameat2). Aside from being quite useful for userspace (providing race-free ways to exchange paths and implement mv --no-clobber), they are used by overlayfs and are thus required in order to use overlayfs-on-ZFS. In order for us to represent the new renameat2(2) flags in the ZIL, we create two new transaction types for the two flags which need transactional-level support (RENAME_EXCHANGE and RENAME_WHITEOUT). RENAME_NOREPLACE does not need any ZIL support because we know that if the operation succeeded before creating the ZIL entry, there was no file to be clobbered and thus it can be treated as a regular TX_RENAME. Reviewed-by: Ryan Moeller Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Reviewed-by: Pavel Snajdr Signed-off-by: Aleksa Sarai Closes #12209 Closes #14070 --- AUTHORS | 2 + cmd/zdb/zdb_il.c | 10 + cmd/ztest.c | 2 + config/kernel-rename.m4 | 71 +++++- include/os/freebsd/zfs/sys/zfs_vnops_os.h | 3 +- include/os/linux/kernel/linux/vfs_compat.h | 13 ++ include/os/linux/spl/sys/sysmacros.h | 10 + include/os/linux/zfs/sys/zfs_vnops_os.h | 3 +- include/os/linux/zfs/sys/zpl.h | 4 + include/sys/zfs_znode.h | 6 + include/sys/zil.h | 17 +- module/os/freebsd/zfs/zfs_vnops_os.c | 5 +- module/os/linux/zfs/zfs_dir.c | 3 +- module/os/linux/zfs/zfs_vnops_os.c | 205 +++++++++++++++--- module/os/linux/zfs/zfs_znode.c | 5 + module/os/linux/zfs/zpl_inode.c | 37 +++- module/zfs/zfs_log.c | 84 ++++++- module/zfs/zfs_replay.c | 106 ++++++++- module/zfs/zil.c | 6 +- module/zfs/zvol.c | 2 + tests/runfiles/linux.run | 4 + tests/test-runner/bin/zts-report.py.in | 6 + tests/zfs-tests/cmd/.gitignore | 1 + tests/zfs-tests/cmd/Makefile.am | 3 +- tests/zfs-tests/cmd/renameat2.c | 128 +++++++++++ tests/zfs-tests/include/commands.cfg | 1 + .../tests/functional/renameat2/Makefile.am | 7 + .../tests/functional/renameat2/cleanup.ksh | 34 +++ .../renameat2/renameat2_exchange.ksh | 61 ++++++ .../renameat2/renameat2_noreplace.ksh | 51 +++++ .../renameat2/renameat2_whiteout.ksh | 50 +++++ .../tests/functional/renameat2/setup.ksh | 37 ++++ .../functional/slog/slog_replay_fs_001.ksh | 23 ++ 33 files changed, 929 insertions(+), 71 deletions(-) create mode 100644 tests/zfs-tests/cmd/renameat2.c create mode 100644 tests/zfs-tests/tests/functional/renameat2/Makefile.am create mode 100755 tests/zfs-tests/tests/functional/renameat2/cleanup.ksh create mode 100755 tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh create mode 100755 tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh create mode 100755 tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh create mode 100755 tests/zfs-tests/tests/functional/renameat2/setup.ksh diff --git a/AUTHORS b/AUTHORS index 86083ba87715..c2af58d75085 100644 --- a/AUTHORS +++ b/AUTHORS @@ -20,6 +20,7 @@ CONTRIBUTORS: Alec Salazar Alejandro R. SedeƱo Alek Pinchuk + Aleksa Sarai Alex Braunegg Alex McWhirter Alex Reece @@ -236,6 +237,7 @@ CONTRIBUTORS: Paul Dagnelie Paul Zuchowski Pavel Boldin + Pavel Snajdr Pavel Zakharov Pawel Jakub Dawidek Pedro Giffuni diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index 02cc10fb7817..55df1f559f6e 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -128,6 +128,14 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, const void *arg) (void) printf("%ssdoid %llu, tdoid %llu\n", tab_prefix, (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid); (void) printf("%ssrc %s tgt %s\n", tab_prefix, snm, tnm); + switch (txtype) { + case TX_RENAME_EXCHANGE: + (void) printf("%sflags RENAME_EXCHANGE\n", tab_prefix); + break; + case TX_RENAME_WHITEOUT: + (void) printf("%sflags RENAME_WHITEOUT\n", tab_prefix); + break; + } } static int @@ -330,6 +338,8 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { {.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "}, {.zri_print = zil_prt_rec_setsaxattr, .zri_name = "TX_SETSAXATTR "}, + {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_EXCHANGE "}, + {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_WHITEOUT "}, }; static int diff --git a/cmd/ztest.c b/cmd/ztest.c index a8f9e6b8760a..19edab4eb7a2 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -2368,6 +2368,8 @@ static zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { NULL, /* TX_MKDIR_ACL_ATTR */ NULL, /* TX_WRITE2 */ NULL, /* TX_SETSAXATTR */ + NULL, /* TX_RENAME_EXCHANGE */ + NULL, /* TX_RENAME_WHITEOUT */ }; /* diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index 302db43f5748..a2b0800ab4d2 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -1,8 +1,28 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ + dnl # + dnl # 3.9 (to 4.9) API change, + dnl # + dnl # A new version of iops->rename() was added (rename2) that takes a flag + dnl # argument (to support renameat2). However this separate function was + dnl # merged back into iops->rename() in Linux 4.9. + dnl # + ZFS_LINUX_TEST_SRC([inode_operations_rename2], [ + #include + int rename2_fn(struct inode *sip, struct dentry *sdp, + struct inode *tip, struct dentry *tdp, + unsigned int flags) { return 0; } + + static const struct inode_operations + iops __attribute__ ((unused)) = { + .rename2 = rename2_fn, + }; + ],[]) + dnl # dnl # 4.9 API change, - dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants - dnl # flags. + dnl # + dnl # iops->rename2() merged into iops->rename(), and iops->rename() now + dnl # wants flags. dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [ #include @@ -16,11 +36,29 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ }; ],[]) + dnl # + dnl # EL7 compatibility + dnl # + dnl # EL7 has backported renameat2 support, but it's done by defining a + dnl # separate iops wrapper structure that takes the .renameat2 function. + dnl # + ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [ + #include + int rename2_fn(struct inode *sip, struct dentry *sdp, + struct inode *tip, struct dentry *tdp, + unsigned int flags) { return 0; } + + static const struct inode_operations_wrapper + iops __attribute__ ((unused)) = { + .rename2 = rename2_fn, + }; + ],[]) + dnl # dnl # 5.12 API change, dnl # - dnl # Linux 5.12 introduced passing struct user_namespace* as the first argument - dnl # of the rename() and other inode_operations members. + dnl # Linux 5.12 introduced passing struct user_namespace* as the first + dnl # argument of the rename() and other inode_operations members. dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [ #include @@ -44,13 +82,30 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ ],[ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether iop->rename() wants flags]) - ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ + AC_MSG_CHECKING([whether iops->rename2() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename2], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, - [iops->rename() wants flags]) + AC_DEFINE(HAVE_RENAME2, 1, [iops->rename2() exists]) ],[ AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether iops->rename() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, + [iops->rename() wants flags]) + ],[ + AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether struct inode_operations_wrapper takes .rename2()]) + ZFS_LINUX_TEST_RESULT([dir_inode_operations_wrapper_rename2], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_RENAME2_OPERATIONS_WRAPPER, 1, + [struct inode_operations_wrapper takes .rename2()]) + ],[ + AC_MSG_RESULT(no) + ]) + ]) ]) ]) ]) diff --git a/include/os/freebsd/zfs/sys/zfs_vnops_os.h b/include/os/freebsd/zfs/sys/zfs_vnops_os.h index 460aecd2e708..839ee629a5ab 100644 --- a/include/os/freebsd/zfs/sys/zfs_vnops_os.h +++ b/include/os/freebsd/zfs/sys/zfs_vnops_os.h @@ -41,7 +41,8 @@ extern int zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr, zuserns_t *mnt_ns); extern int zfs_rename(znode_t *sdzp, const char *snm, znode_t *tdzp, - const char *tnm, cred_t *cr, int flags, zuserns_t *mnt_ns); + const char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, + zuserns_t *mnt_ns); extern int zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, const char *link, znode_t **zpp, cred_t *cr, int flags, zuserns_t *mnt_ns); extern int zfs_link(znode_t *tdzp, znode_t *sp, diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index eeed0a388ce4..fd0b9e8e1068 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -324,6 +324,19 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid) ip->i_gid = make_kgid(kcred->user_ns, gid); } +/* + * 3.15 API change + */ +#ifndef RENAME_NOREPLACE +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#endif +#ifndef RENAME_EXCHANGE +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#endif +#ifndef RENAME_WHITEOUT +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +#endif + /* * 4.9 API change */ diff --git a/include/os/linux/spl/sys/sysmacros.h b/include/os/linux/spl/sys/sysmacros.h index be1f77e43bda..99e3a6fb41c6 100644 --- a/include/os/linux/spl/sys/sysmacros.h +++ b/include/os/linux/spl/sys/sysmacros.h @@ -120,6 +120,16 @@ extern uint32_t zone_get_hostid(void *zone); extern void spl_setup(void); extern void spl_cleanup(void); +/* + * Only handles the first 4096 majors and first 256 minors. We don't have a + * libc for the kernel module so we define this inline. + */ +static inline dev_t +makedev(unsigned int major, unsigned int minor) +{ + return ((major & 0xFFF) << 8) | (minor & 0xFF); +} + #define highbit(x) __fls(x) #define lowbit(x) __ffs(x) diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h index 787d258e1388..197ea9bec500 100644 --- a/include/os/linux/zfs/sys/zfs_vnops_os.h +++ b/include/os/linux/zfs/sys/zfs_vnops_os.h @@ -61,7 +61,8 @@ extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip, extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr, zuserns_t *mnt_ns); extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, - char *tnm, cred_t *cr, int flags, zuserns_t *mnt_ns); + char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, + zuserns_t *mnt_ns); extern int zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link, znode_t **zpp, cred_t *cr, int flags, zuserns_t *mnt_ns); extern int zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr); diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index 83416d64744c..c3ee0ae4a600 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -42,7 +42,11 @@ extern void zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr, zuserns_t *mnt_ns); extern const struct inode_operations zpl_inode_operations; +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER +extern const struct inode_operations_wrapper zpl_dir_inode_operations; +#else extern const struct inode_operations zpl_dir_inode_operations; +#endif extern const struct inode_operations zpl_symlink_inode_operations; extern const struct inode_operations zpl_special_inode_operations; diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index c8656b3f6162..88d642350691 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -299,6 +299,12 @@ extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp); +extern void zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, + uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, + const char *dname, znode_t *szp); +extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, + uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, + const char *dname, znode_t *szp, znode_t *wzp); extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t len, int ioflag, zil_callback_t callback, void *callback_data); diff --git a/include/sys/zil.h b/include/sys/zil.h index cec04f120ce3..9591fb4f6440 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -164,7 +164,9 @@ typedef enum zil_create { #define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */ #define TX_WRITE2 20 /* dmu_sync EALREADY write */ #define TX_SETSAXATTR 21 /* Set sa xattrs on file */ -#define TX_MAX_TYPE 22 /* Max transaction type */ +#define TX_RENAME_EXCHANGE 22 /* Atomic swap via renameat2 */ +#define TX_RENAME_WHITEOUT 23 /* Atomic whiteout via renameat2 */ +#define TX_MAX_TYPE 24 /* Max transaction type */ /* * The transactions for mkdir, symlink, remove, rmdir, link, and rename @@ -317,6 +319,19 @@ typedef struct { /* 2 strings: names of source and destination follow this */ } lr_rename_t; +typedef struct { + lr_rename_t lr_rename; /* common rename portion */ + /* members related to the whiteout file (based on lr_create_t) */ + uint64_t lr_wfoid; /* obj id of the new whiteout file */ + uint64_t lr_wmode; /* mode of object */ + uint64_t lr_wuid; /* uid of whiteout */ + uint64_t lr_wgid; /* gid of whiteout */ + uint64_t lr_wgen; /* generation (txg of creation) */ + uint64_t lr_wcrtime[2]; /* creation time */ + uint64_t lr_wrdev; /* always makedev(0, 0) */ + /* 2 strings: names of source and destination follow this */ +} lr_rename_whiteout_t; + typedef struct { lr_t lr_common; /* common portion of log record */ uint64_t lr_foid; /* file object to write */ diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 362e02751ee4..bcf4e2f18d83 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -3420,7 +3420,7 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, int zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, - cred_t *cr, int flags, zuserns_t *mnt_ns) + cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zuserns_t *mnt_ns) { struct componentname scn, tcn; vnode_t *sdvp, *tdvp; @@ -3428,6 +3428,9 @@ zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, int error; svp = tvp = NULL; + if (rflags != 0 || wo_vap != NULL) + return (SET_ERROR(EINVAL)); + sdvp = ZTOV(sdzp); tdvp = ZTOV(tdzp); error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE); diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c index fb6c28f95c3b..b4e4146b09e9 100644 --- a/module/os/linux/zfs/zfs_dir.c +++ b/module/os/linux/zfs/zfs_dir.c @@ -1035,7 +1035,8 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, } /* The only error is !zfs_dirempty() and we checked earlier. */ - ASSERT3U(zfs_drop_nlink_locked(zp, tx, &unlinked), ==, 0); + error = zfs_drop_nlink_locked(zp, tx, &unlinked); + ASSERT3U(error, ==, 0); mutex_exit(&zp->z_lock); } else { error = zfs_dropname(dl, zp, dzp, tx, flag); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index f02cefea222b..545d8ad8d79c 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -2655,6 +2655,8 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) * tnm - New entry name. * cr - credentials of caller. * flags - case flags + * rflags - RENAME_* flags + * wa_vap - attributes for RENAME_WHITEOUT (must be a char 0:0). * mnt_ns - user namespace of the mount * * RETURN: 0 on success, error code on failure. @@ -2664,7 +2666,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) */ int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, - cred_t *cr, int flags, zuserns_t *mnt_ns) + cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zuserns_t *mnt_ns) { znode_t *szp, *tzp; zfsvfs_t *zfsvfs = ZTOZSB(sdzp); @@ -2676,10 +2678,33 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, int error = 0; int zflg = 0; boolean_t waited = B_FALSE; + /* Needed for whiteout inode creation. */ + boolean_t fuid_dirtied; + zfs_acl_ids_t acl_ids; + boolean_t have_acl = B_FALSE; + znode_t *wzp = NULL; + if (snm == NULL || tnm == NULL) return (SET_ERROR(EINVAL)); + if (rflags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) + return (SET_ERROR(EINVAL)); + + /* Already checked by Linux VFS, but just to make sure. */ + if (rflags & RENAME_EXCHANGE && + (rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT))) + return (SET_ERROR(EINVAL)); + + /* + * Make sure we only get wo_vap iff. RENAME_WHITEOUT and that it's the + * right kind of vattr_t for the whiteout file. These are set + * internally by ZFS so should never be incorrect. + */ + VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL); + VERIFY_IMPLY(wo_vap, wo_vap->va_mode == S_IFCHR); + VERIFY_IMPLY(wo_vap, wo_vap->va_rdev == makedevice(0, 0)); + if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0) return (error); zilog = zfsvfs->z_log; @@ -2856,7 +2881,6 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, * Note that if target and source are the same, this can be * done in a single check. */ - if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, mnt_ns))) goto out; @@ -2873,15 +2897,21 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, * Does target exist? */ if (tzp) { + if (rflags & RENAME_NOREPLACE) { + error = SET_ERROR(EEXIST); + goto out; + } /* - * Source and target must be the same type. + * Source and target must be the same type (unless exchanging). */ - boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0; - boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0; + if (!(rflags & RENAME_EXCHANGE)) { + boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0; + boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0; - if (s_is_dir != t_is_dir) { - error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR); - goto out; + if (s_is_dir != t_is_dir) { + error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR); + goto out; + } } /* * POSIX dictates that when the source and target @@ -2892,12 +2922,43 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, error = 0; goto out; } + } else if (rflags & RENAME_EXCHANGE) { + /* Target must exist for RENAME_EXCHANGE. */ + error = SET_ERROR(ENOENT); + goto out; + } + + /* Set up inode creation for RENAME_WHITEOUT. */ + if (rflags & RENAME_WHITEOUT) { + /* + * Whiteout files are not regular files or directories, so to + * match zfs_create() we do not inherit the project id. + */ + uint64_t wo_projid = ZFS_DEFAULT_PROJID; + + error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns); + if (error) + goto out; + + if (!have_acl) { + error = zfs_acl_ids_create(sdzp, 0, wo_vap, cr, NULL, + &acl_ids, mnt_ns); + if (error) + goto out; + have_acl = B_TRUE; + } + + if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) { + error = SET_ERROR(EDQUOT); + goto out; + } } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); - dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); + dmu_tx_hold_zap(tx, sdzp->z_id, + (rflags & RENAME_EXCHANGE) ? TRUE : FALSE, snm); dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); if (sdzp != tdzp) { dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); @@ -2907,7 +2968,21 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, tzp); } + if (rflags & RENAME_WHITEOUT) { + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm); + dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); + if (!zfsvfs->z_use_sa && + acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } + } + fuid_dirtied = zfsvfs->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); zfs_sa_upgrade_txholds(tx, szp); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); @@ -2946,7 +3021,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), (void *)&szp->z_pflags, sizeof (uint64_t), tx); - ASSERT0(error); + VERIFY0(error); error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); if (error) @@ -2956,13 +3031,30 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, * Unlink the target. */ if (tzp) { - error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); + int tzflg = zflg; + + if (rflags & RENAME_EXCHANGE) { + /* This inode will be re-linked soon. */ + tzflg |= ZRENAMING; + + tzp->z_pflags |= ZFS_AV_MODIFIED; + if (sdzp->z_pflags & ZFS_PROJINHERIT) + tzp->z_pflags |= ZFS_PROJINHERIT; + + error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + (void *)&tzp->z_pflags, sizeof (uint64_t), tx); + ASSERT0(error); + } + error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL); if (error) goto commit_link_szp; } /* - * Create a new link at the target. + * Create the new target links: + * * We always link the target. + * * RENAME_EXCHANGE: Link the old target to the source. + * * RENAME_WHITEOUT: Create a whiteout inode in-place of the source. */ error = zfs_link_create(tdl, szp, tx, ZRENAMING); if (error) { @@ -2975,18 +3067,55 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, goto commit_link_tzp; } - zfs_log_rename(zilog, tx, TX_RENAME | - (flags & FIGNORECASE ? TX_CI : 0), sdzp, - sdl->dl_name, tdzp, tdl->dl_name, szp); + switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) { + case RENAME_EXCHANGE: + error = zfs_link_create(sdl, tzp, tx, ZRENAMING); + /* + * The same argument as zfs_link_create() failing for + * szp applies here, since the source directory must + * have had an entry we are replacing. + */ + ASSERT0(error); + if (error) + goto commit_unlink_td_szp; + break; + case RENAME_WHITEOUT: + zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids); + error = zfs_link_create(sdl, wzp, tx, ZNEW); + if (error) { + zfs_znode_delete(wzp, tx); + remove_inode_hash(ZTOI(wzp)); + goto commit_unlink_td_szp; + } + break; + } + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + + switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) { + case RENAME_EXCHANGE: + zfs_log_rename_exchange(zilog, tx, + (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, + tdzp, tdl->dl_name, szp); + break; + case RENAME_WHITEOUT: + zfs_log_rename_whiteout(zilog, tx, + (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, + tdzp, tdl->dl_name, szp, wzp); + break; + default: + ASSERT0(rflags & ~RENAME_NOREPLACE); + zfs_log_rename(zilog, tx, (flags & FIGNORECASE ? TX_CI : 0), + sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); + break; + } commit: dmu_tx_commit(tx); out: - if (zl != NULL) - zfs_rename_unlock(&zl); - - zfs_dirent_unlock(sdl); - zfs_dirent_unlock(tdl); + if (have_acl) + zfs_acl_ids_free(&acl_ids); zfs_znode_update_vfs(sdzp); if (sdzp == tdzp) @@ -2997,11 +3126,21 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, zfs_znode_update_vfs(szp); zrele(szp); + if (wzp) { + zfs_znode_update_vfs(wzp); + zrele(wzp); + } if (tzp) { zfs_znode_update_vfs(tzp); zrele(tzp); } + if (zl != NULL) + zfs_rename_unlock(&zl); + + zfs_dirent_unlock(sdl); + zfs_dirent_unlock(tdl); + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); @@ -3012,23 +3151,31 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, * Clean-up path for broken link state. * * At this point we are in a (very) bad state, so we need to do our - * best to correct the state. In particular, the nlink of szp is wrong - * because we were destroying and creating links with ZRENAMING. + * best to correct the state. In particular, all of the nlinks are + * wrong because we were destroying and creating links with ZRENAMING. + * + * In some form, all of these operations have to resolve the state: + * + * * link_destroy() *must* succeed. Fortunately, this is very likely + * since we only just created it. * - * link_create()s are allowed to fail (though they shouldn't because we - * only just unlinked them and are putting the entries back during - * clean-up). But if they fail, we can just forcefully drop the nlink - * value to (at the very least) avoid broken nlink values -- though in - * the case of non-empty directories we will have to panic. + * * link_create()s are allowed to fail (though they shouldn't because + * we only just unlinked them and are putting the entries back + * during clean-up). But if they fail, we can just forcefully drop + * the nlink value to (at the very least) avoid broken nlink values + * -- though in the case of non-empty directories we will have to + * panic (otherwise we'd have a leaked directory with a broken ..). */ +commit_unlink_td_szp: + VERIFY0(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL)); commit_link_tzp: if (tzp) { if (zfs_link_create(tdl, tzp, tx, ZRENAMING)) - VERIFY3U(zfs_drop_nlink(tzp, tx, NULL), ==, 0); + VERIFY0(zfs_drop_nlink(tzp, tx, NULL)); } commit_link_szp: if (zfs_link_create(sdl, szp, tx, ZRENAMING)) - VERIFY3U(zfs_drop_nlink(szp, tx, NULL), ==, 0); + VERIFY0(zfs_drop_nlink(szp, tx, NULL)); goto commit; } diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index 3ded79a30a6f..c8f6e02bd224 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -422,7 +422,12 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) break; case S_IFDIR: +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER + ip->i_flags |= S_IOPS_WRAPPER; + ip->i_op = &zpl_dir_inode_operations.ops; +#else ip->i_op = &zpl_dir_inode_operations; +#endif ip->i_fop = &zpl_dir_file_operations; ITOZ(ip)->z_zn_prefetch = B_TRUE; break; diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index 9b702c535ea7..64016f9ac1de 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -24,6 +24,7 @@ */ +#include #include #include #include @@ -498,35 +499,42 @@ static int #ifdef HAVE_IOPS_RENAME_USERNS zpl_rename2(struct user_namespace *user_ns, struct inode *sdip, struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry, - unsigned int flags) + unsigned int rflags) #else zpl_rename2(struct inode *sdip, struct dentry *sdentry, - struct inode *tdip, struct dentry *tdentry, unsigned int flags) + struct inode *tdip, struct dentry *tdentry, unsigned int rflags) #endif { cred_t *cr = CRED(); + vattr_t *wo_vap = NULL; int error; fstrans_cookie_t cookie; #ifndef HAVE_IOPS_RENAME_USERNS zuserns_t *user_ns = NULL; #endif - /* We don't have renameat2(2) support */ - if (flags) - return (-EINVAL); - crhold(cr); + if (rflags & RENAME_WHITEOUT) { + wo_vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); + zpl_vap_init(wo_vap, sdip, S_IFCHR, cr, user_ns); + wo_vap->va_rdev = makedevice(0, 0); + } + cookie = spl_fstrans_mark(); error = -zfs_rename(ITOZ(sdip), dname(sdentry), ITOZ(tdip), - dname(tdentry), cr, 0, user_ns); + dname(tdentry), cr, 0, rflags, wo_vap, user_ns); spl_fstrans_unmark(cookie); + if (wo_vap) + kmem_free(wo_vap, sizeof (vattr_t)); crfree(cr); ASSERT3S(error, <=, 0); return (error); } -#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS) +#if !defined(HAVE_IOPS_RENAME_USERNS) && \ + !defined(HAVE_RENAME_WANTS_FLAGS) && \ + !defined(HAVE_RENAME2) static int zpl_rename(struct inode *sdip, struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry) @@ -745,7 +753,12 @@ const struct inode_operations zpl_inode_operations = { #endif /* CONFIG_FS_POSIX_ACL */ }; +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER +const struct inode_operations_wrapper zpl_dir_inode_operations = { + .ops = { +#else const struct inode_operations zpl_dir_inode_operations = { +#endif .create = zpl_create, .lookup = zpl_lookup, .link = zpl_link, @@ -754,7 +767,9 @@ const struct inode_operations zpl_dir_inode_operations = { .mkdir = zpl_mkdir, .rmdir = zpl_rmdir, .mknod = zpl_mknod, -#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) +#ifdef HAVE_RENAME2 + .rename2 = zpl_rename2, +#elif defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) .rename = zpl_rename2, #else .rename = zpl_rename, @@ -776,6 +791,10 @@ const struct inode_operations zpl_dir_inode_operations = { #endif /* HAVE_SET_ACL */ .get_acl = zpl_get_acl, #endif /* CONFIG_FS_POSIX_ACL */ +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER + }, + .rename2 = zpl_rename2, +#endif }; const struct inode_operations zpl_symlink_inode_operations = { diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 245699882aa9..77bf9140d52d 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -494,25 +494,101 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zil_itx_assign(zilog, itx, tx); } +static void +do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, + const char *sname, znode_t *tdzp, const char *dname, znode_t *szp) +{ + itx_t *itx; + lr_rename_t *lr; + size_t snamesize = strlen(sname) + 1; + size_t dnamesize = strlen(dname) + 1; + + if (zil_replaying(zilog, tx)) + return; + + itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); + lr = (lr_rename_t *)&itx->itx_lr; + lr->lr_sdoid = sdzp->z_id; + lr->lr_tdoid = tdzp->z_id; + memcpy((char *)(lr + 1), sname, snamesize); + memcpy((char *)(lr + 1) + snamesize, dname, dnamesize); + itx->itx_oid = szp->z_id; + + zil_itx_assign(zilog, itx, tx); +} + /* * Handles TX_RENAME transactions. */ void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp) +{ + txtype |= TX_RENAME; + do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); +} + +/* + * Handles TX_RENAME_EXCHANGE transactions. + */ +void +zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, + znode_t *szp) +{ + txtype |= TX_RENAME_EXCHANGE; + do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); +} + +/* + * Handles TX_RENAME_WHITEOUT transactions. + * + * Unfortunately we cannot reuse do_zfs_log_rename because we we need to call + * zfs_mknode() on replay which requires stashing bits as with TX_CREATE. + */ +void +zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, + znode_t *szp, znode_t *wzp) { itx_t *itx; - lr_rename_t *lr; + lr_rename_whiteout_t *lr; size_t snamesize = strlen(sname) + 1; size_t dnamesize = strlen(dname) + 1; if (zil_replaying(zilog, tx)) return; + txtype |= TX_RENAME_WHITEOUT; itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); - lr = (lr_rename_t *)&itx->itx_lr; - lr->lr_sdoid = sdzp->z_id; - lr->lr_tdoid = tdzp->z_id; + lr = (lr_rename_whiteout_t *)&itx->itx_lr; + lr->lr_rename.lr_sdoid = sdzp->z_id; + lr->lr_rename.lr_tdoid = tdzp->z_id; + + /* + * RENAME_WHITEOUT will create an entry at the source znode, so we need + * to store the same data that the equivalent call to zfs_log_create() + * would. + */ + lr->lr_wfoid = wzp->z_id; + LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT); + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen, + sizeof (uint64_t)); + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)), + lr->lr_wcrtime, sizeof (uint64_t) * 2); + lr->lr_wmode = wzp->z_mode; + lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp)); + lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp)); + + /* + * This rdev will always be makdevice(0, 0) but because the ZIL log and + * replay code needs to be platform independent (and there is no + * platform independent makdev()) we need to copy the one created + * during the rename operation. + */ + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev, + sizeof (lr->lr_wrdev)); + memcpy((char *)(lr + 1), sname, snamesize); memcpy((char *)(lr + 1) + snamesize, dname, dnamesize); itx->itx_oid = szp->z_id; diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 45c2fa3720cf..5e20ce3319b4 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -643,18 +643,21 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) } static int -zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) +do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname, + char *tname, uint64_t rflags, vattr_t *wo_vap) { - zfsvfs_t *zfsvfs = arg1; - lr_rename_t *lr = arg2; - char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ - char *tname = sname + strlen(sname) + 1; znode_t *sdzp, *tdzp; - int error; - int vflg = 0; + int error, vflg = 0; - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); + /* Only Linux currently supports RENAME_* flags. */ +#ifdef __linux__ + VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT)); + + /* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */ + VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL); +#else + VERIFY0(rflags); +#endif if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) return (error); @@ -667,13 +670,94 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; - error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, NULL); + error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags, + wo_vap, NULL); zrele(tdzp); zrele(sdzp); return (error); } +static int +zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) +{ + zfsvfs_t *zfsvfs = arg1; + lr_rename_t *lr = arg2; + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL)); +} + +static int +zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap) +{ +#ifdef __linux__ + zfsvfs_t *zfsvfs = arg1; + lr_rename_t *lr = arg2; + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE, + NULL)); +#else + return (SET_ERROR(ENOTSUP)); +#endif +} + +static int +zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) +{ +#ifdef __linux__ + zfsvfs_t *zfsvfs = arg1; + lr_rename_whiteout_t *lr = arg2; + int error; + /* sname and tname follow lr_rename_whiteout_t */ + char *sname = (char *)(lr + 1); + char *tname = sname + strlen(sname) + 1; + /* For the whiteout file. */ + xvattr_t xva; + uint64_t objid; + uint64_t dnodesize; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + objid = LR_FOID_GET_OBJ(lr->lr_wfoid); + dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT; + + xva_init(&xva); + zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, + lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid); + + /* + * As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which + * assigns the object's creation time, generation number, and dnode + * slot count. The generic zfs_rename() has no concept of these + * attributes, so we smuggle the values inside the vattr's otherwise + * unused va_ctime, va_nblocks, and va_fsid fields. + */ + ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime); + xva.xva_vattr.va_nblocks = lr->lr_wgen; + xva.xva_vattr.va_fsid = dnodesize; + + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) + return (error); + + return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname, + RENAME_WHITEOUT, &xva.xva_vattr)); +#else + return (SET_ERROR(ENOTSUP)); +#endif +} + static int zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap) { @@ -1069,4 +1153,6 @@ zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = { zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ zfs_replay_write2, /* TX_WRITE2 */ zfs_replay_setsaxattr, /* TX_SETSAXATTR */ + zfs_replay_rename_exchange, /* TX_RENAME_EXCHANGE */ + zfs_replay_rename_whiteout, /* TX_RENAME_WHITEOUT */ }; diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 6bb99c4b1cdf..23afc8a40bb4 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -759,11 +759,9 @@ zil_commit_activate_saxattr_feature(zilog_t *zilog) uint64_t txg = 0; dmu_tx_t *tx = NULL; - if (spa_feature_is_enabled(zilog->zl_spa, - SPA_FEATURE_ZILSAXATTR) && + if (spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) && dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL && - !dsl_dataset_feature_is_active(ds, - SPA_FEATURE_ZILSAXATTR)) { + !dsl_dataset_feature_is_active(ds, SPA_FEATURE_ZILSAXATTR)) { tx = dmu_tx_create(zilog->zl_os); VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); dsl_dataset_dirty(ds, tx); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index be8ee34f27ae..20578a8223b2 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -514,6 +514,8 @@ zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ zvol_replay_err, /* TX_WRITE2 */ zvol_replay_err, /* TX_SETSAXATTR */ + zvol_replay_err, /* TX_RENAME_EXCHANGE */ + zvol_replay_err, /* TX_RENAME_WHITEOUT */ }; /* diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 21e0f882dc40..13f7efd96bd3 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -157,6 +157,10 @@ tags = ['functional', 'projectquota'] tests = ['read_dos_attrs_001', 'write_dos_attrs_001'] tags = ['functional', 'dos_attributes'] +[tests/functional/renameat2:Linux] +tests = ['renameat2_noreplace', 'renameat2_exchange', 'renameat2_whiteout'] +tags = ['functional', 'renameat2'] + [tests/functional/rsend:Linux] tests = ['send_realloc_dnode_size', 'send_encrypted_files'] tags = ['functional', 'rsend'] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index e7d338fcf8a9..1cebf50827b9 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -69,6 +69,11 @@ exec_reason = 'Test user execute permissions required for utilities' # python_deps_reason = 'Python modules missing: python3-cffi' +# +# Some tests require that the kernel supports renameat2 syscall. +# +renameat2_reason = 'Kernel renameat2 support required' + # # Some tests require the O_TMPFILE flag which was first introduced in the # 3.11 kernel. @@ -231,6 +236,7 @@ maybe = { 'pool_checkpoint/checkpoint_discard_busy': ['FAIL', 11946], 'projectquota/setup': ['SKIP', exec_reason], 'removal/removal_condense_export': ['FAIL', known_reason], + 'renameat2/setup': ['SKIP', renameat2_reason], 'reservation/reservation_008_pos': ['FAIL', 7741], 'reservation/reservation_018_pos': ['FAIL', 5642], 'snapshot/clone_001_pos': ['FAIL', known_reason], diff --git a/tests/zfs-tests/cmd/.gitignore b/tests/zfs-tests/cmd/.gitignore index 0ec450e248db..f68f58072818 100644 --- a/tests/zfs-tests/cmd/.gitignore +++ b/tests/zfs-tests/cmd/.gitignore @@ -27,6 +27,7 @@ /randwritecomp /read_dos_attributes /readmmap +/renameat2 /rename_dir /rm_lnkcnt_zero_file /send_doall diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 673a18b4c083..066abb6ce3b5 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -112,10 +112,10 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/edonr_test %D%/skein_test \ %C%_edonr_test_LDADD = $(%C%_skein_test_LDADD) %C%_blake3_test_LDADD = $(%C%_skein_test_LDADD) - if BUILD_LINUX scripts_zfs_tests_bin_PROGRAMS += %D%/getversion scripts_zfs_tests_bin_PROGRAMS += %D%/user_ns_exec +scripts_zfs_tests_bin_PROGRAMS += %D%/renameat2 scripts_zfs_tests_bin_PROGRAMS += %D%/xattrtest scripts_zfs_tests_bin_PROGRAMS += %D%/zed_fd_spill-zedlet scripts_zfs_tests_bin_PROGRAMS += %D%/idmap_util @@ -127,7 +127,6 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/read_dos_attributes %D%/write_dos_attribu %C%_read_dos_attributes_SOURCES = %D%/linux_dos_attributes/read_dos_attributes.c %C%_write_dos_attributes_SOURCES = %D%/linux_dos_attributes/write_dos_attributes.c - scripts_zfs_tests_bin_PROGRAMS += %D%/randfree_file %C%_randfree_file_SOURCES = %D%/file/randfree_file.c diff --git a/tests/zfs-tests/cmd/renameat2.c b/tests/zfs-tests/cmd/renameat2.c new file mode 100644 index 000000000000..a9d0a8b20adf --- /dev/null +++ b/tests/zfs-tests/cmd/renameat2.c @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: CDDL-1.0 OR MPL-2.0 */ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (C) 2019 Aleksa Sarai + * Copyright (C) 2019 SUSE LLC + */ + +/* + * mv(1) doesn't currently support RENAME_{EXCHANGE,WHITEOUT} so this is a very + * simple renameat2(2) wrapper for the OpenZFS self-tests. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifndef SYS_renameat2 +#ifdef __NR_renameat2 +#define SYS_renameat2 __NR_renameat2 +#elif defined(__x86_64__) +#define SYS_renameat2 316 +#elif defined(__i386__) +#define SYS_renameat2 353 +#elif defined(__arm__) || defined(__aarch64__) +#define SYS_renameat2 382 +#else +#error "SYS_renameat2 not known for this architecture." +#endif +#endif + +#ifndef RENAME_NOREPLACE +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#endif +#ifndef RENAME_EXCHANGE +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#endif +#ifndef RENAME_WHITEOUT +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +#endif + +/* glibc doesn't provide renameat2 wrapper, let's use our own */ +static int +sys_renameat2(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, unsigned int flags) +{ + int ret = syscall(SYS_renameat2, olddirfd, oldpath, newdirfd, newpath, + flags); + return ((ret < 0) ? -errno : ret); +} + +static void +usage(void) +{ + fprintf(stderr, "usage: renameat2 [-Cnwx] src dst\n"); + exit(1); +} + +static void +check(void) +{ + int err = sys_renameat2(AT_FDCWD, ".", AT_FDCWD, ".", RENAME_EXCHANGE); + exit(err == -ENOSYS); +} + +int +main(int argc, char **argv) +{ + char *src, *dst; + int ch, err; + unsigned int flags = 0; + + while ((ch = getopt(argc, argv, "Cnwx")) >= 0) { + switch (ch) { + case 'C': + check(); + break; + case 'n': + flags |= RENAME_NOREPLACE; + break; + case 'w': + flags |= RENAME_WHITEOUT; + break; + case 'x': + flags |= RENAME_EXCHANGE; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (argc != 2) + usage(); + src = argv[0]; + dst = argv[1]; + + err = sys_renameat2(AT_FDCWD, src, AT_FDCWD, dst, flags); + if (err < 0) + fprintf(stderr, "renameat2: %s", strerror(-err)); + return (err != 0); +} diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 30514361ad57..b3cfe149ffa7 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -208,6 +208,7 @@ export ZFSTEST_FILES='badsend randwritecomp readmmap read_dos_attributes + renameat2 rename_dir rm_lnkcnt_zero_file send_doall diff --git a/tests/zfs-tests/tests/functional/renameat2/Makefile.am b/tests/zfs-tests/tests/functional/renameat2/Makefile.am new file mode 100644 index 000000000000..bd8d6c9d68bf --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/Makefile.am @@ -0,0 +1,7 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/renameat2 +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + renameat2_noreplace.ksh \ + renameat2_exchange.ksh \ + renameat2_whiteout.ksh diff --git a/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh b/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh new file mode 100755 index 000000000000..3166bd6ec16e --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh new file mode 100755 index 000000000000..94e56231feb1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh @@ -0,0 +1,61 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_EXCHANGE." +log_onexit cleanup + +cd $TESTDIR +echo "foo" > foo +echo "bar" > bar + +# Self-exchange is a no-op. +log_must renameat2 -x foo foo +log_must grep '^foo$' foo + +# Basic exchange. +log_must renameat2 -x foo bar +log_must grep '^bar$' foo +log_must grep '^foo$' bar + +# And exchange back. +log_must renameat2 -x foo bar +log_must grep '^foo$' foo +log_must grep '^bar$' bar + +# Exchange with a bad path should fail. +log_mustnot renameat2 -x bar baz + +log_pass "ZFS supports RENAME_EXCHANGE as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh new file mode 100755 index 000000000000..d75b94fab465 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh @@ -0,0 +1,51 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_NOREPLACE." +log_onexit cleanup + +cd $TESTDIR +touch foo bar + +# Clobbers should always fail. +log_mustnot renameat2 -n foo foo +log_mustnot renameat2 -n foo bar +log_mustnot renameat2 -n bar foo + +# Regular renames should succeed. +log_must renameat2 -n bar baz + +log_pass "ZFS supports RENAME_NOREPLACE as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh new file mode 100755 index 000000000000..8ecb074dbbdb --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh @@ -0,0 +1,50 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_WHITEOUT." +log_onexit cleanup + +cd $TESTDIR +echo "whiteout" > whiteout + +# Straight-forward rename-with-whiteout. +log_must renameat2 -w whiteout new +# Check new file. +log_must grep '^whiteout$' new +# Check that the whiteout is actually a {0,0} char device. +log_must grep '^character special file:0:0$' <<<"$(stat -c '%F:%t:%T' whiteout)" + +log_pass "ZFS supports RENAME_WHITEOUT as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/setup.ksh b/tests/zfs-tests/tests/functional/renameat2/setup.ksh new file mode 100755 index 000000000000..b8c26d5ba062 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/setup.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +if ! is_linux ; then + log_unsupported "renameat2 is linux-only" +elif ! renameat2 -C ; then + log_unsupported "renameat2 not supported on this (pre-3.15) linux kernel" +fi + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index eddecbc2db7e..8f3585a5997f 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -175,6 +175,29 @@ log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \ /$TESTPOOL/$TESTFS/link_and_unlink.link log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link +# We can't test RENAME_* flags without renameat2(2) support. +if ! is_linux ; then + log_note "renameat2 is linux-only" +elif ! renameat2 -C ; then + log_note "renameat2 not supported on this (pre-3.15) linux kernel" +else + # TX_RENAME_EXCHANGE + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-a bs=1k count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-b bs=1k count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-c bs=1k count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-d bs=1k count=1 + # rotate the files around + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{a,b} + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{b,c} + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{c,a} + # exchange same path + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{d,d} + + # TX_RENAME_WHITEOUT + log_must mkfile 1k /$TESTPOOL/$TESTFS/whiteout + log_must renameat2 -w /$TESTPOOL/$TESTFS/whiteout{,-moved} +fi + # # 4. Copy TESTFS to temporary location (TESTDIR/copy) #