diff --git a/AUTHORS b/AUTHORS index 8314a1c21472..e5afe8e52f75 100644 --- a/AUTHORS +++ b/AUTHORS @@ -19,6 +19,7 @@ CONTRIBUTORS: Albert Lee Alec Salazar Alejandro R. SedeƱo + Aleksa Sarai Alek Pinchuk Alex Braunegg Alex McWhirter @@ -233,6 +234,7 @@ CONTRIBUTORS: Paul Dagnelie Paul Zuchowski Pavel Boldin + Pavel Snajdr Pavel Zakharov Pawel Jakub Dawidek Pedro Giffuni diff --git a/config/kernel-dentry-alias.m4 b/config/kernel-dentry-alias.m4 new file mode 100644 index 000000000000..f0ddb8d010b0 --- /dev/null +++ b/config/kernel-dentry-alias.m4 @@ -0,0 +1,30 @@ +dnl # +dnl # 3.18 API change +dnl # Dentry aliases are in d_u struct dentry member +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [ + ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [ + #include + #include + #include + ], [ + struct inode *inode __attribute__ ((unused)) = NULL; + struct dentry *dentry __attribute__ ((unused)) = NULL; + hlist_for_each_entry(dentry, &inode->i_dentry, + d_u.d_alias) { + d_drop(dentry); + } + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [ + AC_MSG_CHECKING([whether dentry aliases are in d_u member]) + ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1, + [dentry aliases are in d_u member]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index f707391539d8..b97d95b46bd6 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -3,8 +3,20 @@ dnl # 4.9 API change, dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants dnl # flags. dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [ - ZFS_LINUX_TEST_SRC([inode_operations_rename], [ +AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ + ZFS_LINUX_TEST_SRC([inode_operations_rename2], [ + #include + int rename2_fn(struct inode *sip, struct dentry *sdp, + struct inode *tip, struct dentry *tdp, + unsigned int flags) { return 0; } + + static const struct inode_operations + iops __attribute__ ((unused)) = { + .rename2 = rename2_fn, + }; + ],[]) + + ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [ #include int rename_fn(struct inode *sip, struct dentry *sdp, struct inode *tip, struct dentry *tdp, @@ -15,15 +27,45 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [ .rename = rename_fn, }; ],[]) + + ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [ + #include + int rename2_fn(struct inode *sip, struct dentry *sdp, + struct inode *tip, struct dentry *tdp, + unsigned int flags) { return 0; } + + static const struct inode_operations_wrapper + iops __attribute__ ((unused)) = { + .rename2 = rename2_fn, + }; + ],[]) ]) -AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [ - AC_MSG_CHECKING([whether iops->rename() wants flags]) - ZFS_LINUX_TEST_RESULT([inode_operations_rename], [ +AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ + AC_MSG_CHECKING([whether iops->rename2() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename2], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, - [iops->rename() wants flags]) + AC_DEFINE(HAVE_RENAME2, 1, [iops->rename2() exists]) ],[ AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether iops->rename() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, + [iops->rename() wants flags]) + ],[ + AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether struct inode_operations_wrapper takes .rename2()]) + ZFS_LINUX_TEST_RESULT([dir_inode_operations_wrapper_rename2], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_RENAME2_OPERATIONS_WRAPPER, 1, + [struct inode_operations_wrapper takes .rename2()]) + ],[ + AC_MSG_RESULT(no) + ]) + ]) ]) ]) + diff --git a/config/kernel.m4 b/config/kernel.m4 index 4309d5456e2c..67670c021e9d 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -87,6 +87,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED ZFS_AC_KERNEL_SRC_DENTRY + ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT @@ -110,7 +111,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_KUIDGID_T ZFS_AC_KERNEL_SRC_KUID_HELPERS ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST - ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS + ZFS_AC_KERNEL_SRC_RENAME ZFS_AC_KERNEL_SRC_CURRENT_TIME ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL @@ -181,6 +182,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_DENTRY + ZFS_AC_KERNEL_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT @@ -204,7 +206,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_KUIDGID_T ZFS_AC_KERNEL_KUID_HELPERS ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST - ZFS_AC_KERNEL_RENAME_WANTS_FLAGS + ZFS_AC_KERNEL_RENAME ZFS_AC_KERNEL_CURRENT_TIME ZFS_AC_KERNEL_USERNS_CAPABILITIES ZFS_AC_KERNEL_IN_COMPAT_SYSCALL diff --git a/configure.ac b/configure.ac index 723407558f80..34ca445652a6 100644 --- a/configure.ac +++ b/configure.ac @@ -207,6 +207,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/cmd/randfree_file/Makefile tests/zfs-tests/cmd/randwritecomp/Makefile tests/zfs-tests/cmd/readmmap/Makefile + tests/zfs-tests/cmd/renameat2/Makefile tests/zfs-tests/cmd/rename_dir/Makefile tests/zfs-tests/cmd/rm_lnkcnt_zero_file/Makefile tests/zfs-tests/cmd/stride_dd/Makefile @@ -332,6 +333,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/no_space/Makefile tests/zfs-tests/tests/functional/nopwrite/Makefile tests/zfs-tests/tests/functional/online_offline/Makefile + tests/zfs-tests/tests/functional/overlayfs/Makefile tests/zfs-tests/tests/functional/pool_checkpoint/Makefile tests/zfs-tests/tests/functional/pool_names/Makefile tests/zfs-tests/tests/functional/poolversion/Makefile @@ -346,6 +348,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/tests/functional/refquota/Makefile tests/zfs-tests/tests/functional/refreserv/Makefile tests/zfs-tests/tests/functional/removal/Makefile + tests/zfs-tests/tests/functional/renameat2/Makefile tests/zfs-tests/tests/functional/rename_dirs/Makefile tests/zfs-tests/tests/functional/replacement/Makefile tests/zfs-tests/tests/functional/reservation/Makefile diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h index d0588a82e9ad..e2aca1629735 100644 --- a/include/os/linux/kernel/linux/dcache_compat.h +++ b/include/os/linux/kernel/linux/dcache_compat.h @@ -61,4 +61,25 @@ d_clear_d_op(struct dentry *dentry) DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE); } +/* + * Walk and invalidate all dentry aliases of an inode + * unless it's a mountpoint + */ +static inline void +zpl_d_drop_aliases(struct inode *inode) +{ + struct dentry *dentry; + spin_lock(&inode->i_lock); +#ifdef HAVE_DENTRY_D_U_ALIASES + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { +#else + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { +#endif + if (!IS_ROOT(dentry) && !d_mountpoint(dentry) && + (dentry->d_inode == inode)) { + d_drop(dentry); + } + } + spin_unlock(&inode->i_lock); +} #endif /* _ZFS_DCACHE_H */ diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index c35e80d31cd7..852db6dfad49 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -340,6 +340,19 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid) #endif } +/* + * 3.15 API change + */ +#ifndef RENAME_NOREPLACE +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#endif +#ifndef RENAME_EXCHANGE +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#endif +#ifndef RENAME_WHITEOUT +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +#endif + /* * 4.9 API change */ diff --git a/include/os/linux/zfs/sys/zfs_dir.h b/include/os/linux/zfs/sys/zfs_dir.h index bcd4ec2c1de5..2fd06c4a170e 100644 --- a/include/os/linux/zfs/sys/zfs_dir.h +++ b/include/os/linux/zfs/sys/zfs_dir.h @@ -52,6 +52,7 @@ extern "C" { extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **, int, int *, pathname_t *); extern void zfs_dirent_unlock(zfs_dirlock_t *); +extern int zfs_drop_nlink(znode_t *, dmu_tx_t *, boolean_t *); extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int); extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int, boolean_t *); diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index 20a3dc674988..e2d11ca9a5c8 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -42,10 +42,16 @@ extern void zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr); extern const struct inode_operations zpl_inode_operations; +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER +extern const struct inode_operations_wrapper zpl_dir_inode_operations; +#else extern const struct inode_operations zpl_dir_inode_operations; +#endif extern const struct inode_operations zpl_symlink_inode_operations; extern const struct inode_operations zpl_special_inode_operations; +#if 0 extern dentry_operations_t zpl_dentry_operations; +#endif /* zpl_file.c */ extern ssize_t zpl_read_common(struct inode *ip, const char *buf, diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 0e8d44637d13..f71204208bf6 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -273,6 +273,13 @@ extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name); extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name, char *link); +extern void zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, + uint64_t txtype, znode_t *sdzp, char *sname, znode_t *tdzp, + char *dname, znode_t *szp); +extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, + uint64_t txtype, znode_t *sdzp, char *sname, znode_t *tdzp, + char *dname, znode_t *szp, znode_t *wzp, vsecattr_t *vsecp, + zfs_fuid_info_t *fuidp, vattr_t *vap); extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp); extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c index 89704d0e4b88..fff924b17523 100644 --- a/module/os/linux/zfs/zfs_dir.c +++ b/module/os/linux/zfs/zfs_dir.c @@ -905,6 +905,74 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, return (error); } +static int +zfs_drop_nlink_locked(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp) +{ + zfsvfs_t *zfsvfs = ZTOZSB(zp); + int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode); + boolean_t unlinked = B_FALSE; + sa_bulk_attr_t bulk[3]; + uint64_t mtime[2], ctime[2]; + uint64_t links; + int count = 0; + int error; + + if (zp_is_dir && !zfs_dirempty(zp)) + return (SET_ERROR(ENOTEMPTY)); + + if (ZTOI(zp)->i_nlink <= zp_is_dir) { + zfs_panic_recover("zfs: link count on %lu is %u, " + "should be at least %u", zp->z_id, + (int)ZTOI(zp)->i_nlink, zp_is_dir + 1); + set_nlink(ZTOI(zp), zp_is_dir + 1); + } + drop_nlink(ZTOI(zp)); + if (ZTOI(zp)->i_nlink == zp_is_dir) { + zp->z_unlinked = B_TRUE; + clear_nlink(ZTOI(zp)); + unlinked = B_TRUE; + } else { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), + NULL, &ctime, sizeof (ctime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + NULL, &zp->z_pflags, sizeof (zp->z_pflags)); + zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, + ctime); + } + links = ZTOI(zp)->i_nlink; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), + NULL, &links, sizeof (links)); + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + ASSERT3U(error, ==, 0); + + if (unlinkedp != NULL) + *unlinkedp = unlinked; + else if (unlinked) + zfs_unlinked_add(zp, tx); + + return (0); +} + +/* + * Forcefully drop an nlink reference from (zp) and mark it for deletion if it + * was the last link. This *must* only be done to znodes which have already + * been zfs_link_destroy()'d with ZRENAMING. This is explicitly only used in + * the error path of zfs_rename(), where we have to correct the nlink count if + * we failed to link the target as well as failing to re-link the original + * znodes. + */ +int +zfs_drop_nlink(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp) +{ + int error; + + mutex_enter(&zp->z_lock); + error = zfs_drop_nlink_locked(zp, tx, unlinkedp); + mutex_exit(&zp->z_lock); + + return (error); +} + /* * Unlink zp from dl, and mark zp for deletion if this was the last link. Can * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY). @@ -945,31 +1013,9 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, return (error); } - if (ZTOI(zp)->i_nlink <= zp_is_dir) { - zfs_panic_recover("zfs: link count on %lu is %u, " - "should be at least %u", zp->z_id, - (int)ZTOI(zp)->i_nlink, zp_is_dir + 1); - set_nlink(ZTOI(zp), zp_is_dir + 1); - } - drop_nlink(ZTOI(zp)); - if (ZTOI(zp)->i_nlink == zp_is_dir) { - zp->z_unlinked = B_TRUE; - clear_nlink(ZTOI(zp)); - unlinked = B_TRUE; - } else { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), - NULL, &ctime, sizeof (ctime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), - NULL, &zp->z_pflags, sizeof (zp->z_pflags)); - zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, - ctime); - } - links = ZTOI(zp)->i_nlink; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), - NULL, &links, sizeof (links)); - error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - count = 0; - ASSERT(error == 0); + /* The only error is !zfs_dirempty() and we checked earlier. */ + error = zfs_drop_nlink_locked(zp, tx, &unlinked); + ASSERT3U(error, ==, 0); mutex_exit(&zp->z_lock); } else { error = zfs_dropname(dl, zp, dzp, tx, flag); diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 154933a4cc0a..7c3331f3d677 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1926,7 +1926,9 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) sb->s_op = &zpl_super_operations; sb->s_xattr = zpl_xattr_handlers; sb->s_export_op = &zpl_export_operations; +#if 0 sb->s_d_op = &zpl_dentry_operations; +#endif /* Set features for file system. */ zfs_set_fuid_feature(zfsvfs); @@ -2275,6 +2277,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) zp = list_next(&zfsvfs->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { + zpl_d_drop_aliases(ZTOI(zp)); remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } diff --git a/module/os/linux/zfs/zfs_vnops.c b/module/os/linux/zfs/zfs_vnops.c index e7d0e8933f2f..113bf45c95db 100644 --- a/module/os/linux/zfs/zfs_vnops.c +++ b/module/os/linux/zfs/zfs_vnops.c @@ -3659,8 +3659,7 @@ int zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, cred_t *cr, int flags) { - znode_t *tdzp, *szp, *tzp; - znode_t *sdzp = ITOZ(sdip); + znode_t *sdzp, *tdzp, *szp, *tzp; zfsvfs_t *zfsvfs = ITOZSB(sdip); zilog_t *zilog; zfs_dirlock_t *sdl, *tdl; @@ -3670,14 +3669,23 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, int error = 0; int zflg = 0; boolean_t waited = B_FALSE; + /* Needed for whiteout inode creation. */ + vattr_t wo_vap; + uint64_t wo_projid; + boolean_t fuid_dirtied; + zfs_acl_ids_t acl_ids; + boolean_t have_acl = B_FALSE; + znode_t *wzp = NULL; + if (snm == NULL || tnm == NULL) return (SET_ERROR(EINVAL)); ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(sdzp); zilog = zfsvfs->z_log; + sdzp = ITOZ(sdip); + ZFS_VERIFY_ZP(sdzp); tdzp = ITOZ(tdip); ZFS_VERIFY_ZP(tdzp); @@ -3840,6 +3848,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, error = SET_ERROR(EXDEV); goto out; } + wo_projid = szp->z_projid; /* * Must have write access at the source to remove the old entry @@ -3847,7 +3856,6 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, * Note that if target and source are the same, this can be * done in a single check. */ - if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) goto out; @@ -3864,17 +3872,19 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, * Does target exist? */ if (tzp) { + if (flags & RENAME_NOREPLACE) { + error = SET_ERROR(EEXIST); + goto out; + } /* - * Source and target must be the same type. + * Source and target must be the same type (unless exchanging). */ - if (S_ISDIR(ZTOI(szp)->i_mode)) { - if (!S_ISDIR(ZTOI(tzp)->i_mode)) { - error = SET_ERROR(ENOTDIR); - goto out; - } - } else { - if (S_ISDIR(ZTOI(tzp)->i_mode)) { - error = SET_ERROR(EISDIR); + if (!(flags & RENAME_EXCHANGE)) { + boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0; + boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0; + + if (s_is_dir != t_is_dir) { + error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR); goto out; } } @@ -3887,12 +3897,38 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, error = 0; goto out; } + } else if (flags & RENAME_EXCHANGE) { + /* Target must exist for RENAME_EXCHANGE. */ + error = SET_ERROR(ENOENT); + goto out; + } + + /* Set up inode creation for RENAME_WHITEOUT. */ + if (flags & RENAME_WHITEOUT) { + error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr); + if (error) + goto out; + + zpl_vap_init(&wo_vap, ZTOI(sdzp), S_IFCHR, cr); + /* Can't use of makedevice() here, so hard-code it. */ + wo_vap.va_rdev = 0; + + error = zfs_acl_ids_create(sdzp, 0, &wo_vap, cr, NULL, + &acl_ids); + if (error) + goto out; + have_acl = B_TRUE; + + if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) { + error = SET_ERROR(EDQUOT); + goto out; + } } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); - dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); + dmu_tx_hold_zap(tx, sdzp->z_id, !!(flags & RENAME_EXCHANGE), snm); dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); if (sdzp != tdzp) { dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); @@ -3902,7 +3938,21 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, tzp); } + if (flags & RENAME_WHITEOUT) { + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm); + dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); + if (!zfsvfs->z_use_sa && + acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } + } + fuid_dirtied = zfsvfs->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); zfs_sa_upgrade_txholds(tx, szp); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); @@ -3932,58 +3982,111 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, return (error); } - if (tzp) /* Attempt to remove the existing target */ - error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); + /* + * Unlink the source. + */ + szp->z_pflags |= ZFS_AV_MODIFIED; + if (tdzp->z_pflags & ZFS_PROJINHERIT) + szp->z_pflags |= ZFS_PROJINHERIT; - if (error == 0) { - error = zfs_link_create(tdl, szp, tx, ZRENAMING); - if (error == 0) { - szp->z_pflags |= ZFS_AV_MODIFIED; - if (tdzp->z_pflags & ZFS_PROJINHERIT) - szp->z_pflags |= ZFS_PROJINHERIT; + error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + (void *)&szp->z_pflags, sizeof (uint64_t), tx); + VERIFY0(error); + + error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); + if (error) + goto commit; + + /* + * Unlink the target. + */ + if (tzp) { + int tzflg = zflg; + + if (flags & RENAME_EXCHANGE) { + /* This inode will be re-linked soon. */ + tzflg |= ZRENAMING; - error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), - (void *)&szp->z_pflags, sizeof (uint64_t), tx); + tzp->z_pflags |= ZFS_AV_MODIFIED; + if (sdzp->z_pflags & ZFS_PROJINHERIT) + tzp->z_pflags |= ZFS_PROJINHERIT; + + error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + (void *)&tzp->z_pflags, sizeof (uint64_t), tx); ASSERT0(error); + } + error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL); + if (error) + goto commit_link_szp; + } - error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); - if (error == 0) { - zfs_log_rename(zilog, tx, TX_RENAME | - (flags & FIGNORECASE ? TX_CI : 0), sdzp, - sdl->dl_name, tdzp, tdl->dl_name, szp); - } else { - /* - * At this point, we have successfully created - * the target name, but have failed to remove - * the source name. Since the create was done - * with the ZRENAMING flag, there are - * complications; for one, the link count is - * wrong. The easiest way to deal with this - * is to remove the newly created target, and - * return the original error. This must - * succeed; fortunately, it is very unlikely to - * fail, since we just created it. - */ - VERIFY3U(zfs_link_destroy(tdl, szp, tx, - ZRENAMING, NULL), ==, 0); - } - } else { - /* - * If we had removed the existing target, subsequent - * call to zfs_link_create() to add back the same entry - * but, the new dnode (szp) should not fail. - */ - ASSERT(tzp == NULL); + /* + * Create the new target links: + * * We always link the target. + * * RENAME_WHITEOUT: Create a whiteout inode in-place of the source. + * * RENAME_EXCHANGE: Link the old target to the source. + */ + error = zfs_link_create(tdl, szp, tx, ZRENAMING); + if (error) { + /* + * If we have removed the existing target, a subsequent call to + * zfs_link_create() to add back the same entry, but with a new + * dnode (szp), should not fail. + */ + ASSERT3P(tzp, ==, NULL); + goto commit_link_tzp; + } + + if (flags & RENAME_EXCHANGE) { + error = zfs_link_create(sdl, tzp, tx, ZRENAMING); + /* + * The same argument as zfs_link_create() failing for + * szp applies here, since the source directory must + * have had an entry we are replacing. + */ + ASSERT3U(error, ==, 0); + if (error) + goto commit_unlink_td_szp; + } else if (flags & RENAME_WHITEOUT) { + zfs_mknode(sdzp, &wo_vap, tx, cr, 0, &wzp, &acl_ids); + error = zfs_link_create(sdl, wzp, tx, ZNEW); + if (error) { + zfs_znode_delete(wzp, tx); + remove_inode_hash(ZTOI(wzp)); + goto commit_unlink_td_szp; } + /* No need to zfs_log_create_txtype here. */ + } + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + + if (flags & RENAME_EXCHANGE) { + zfs_log_rename_exchange(zilog, tx, + (flags & FIGNORECASE ? TX_CI : 0), sdzp, + sdl->dl_name, tdzp, tdl->dl_name, szp); + } else if (flags & RENAME_WHITEOUT) { + vsecattr_t vsecp; + + vsecp.vsa_mask |= VSA_ACE_ALLTYPES; + error = zfs_getacl(szp, &vsecp, B_TRUE, cr); + VERIFY0(error); + + zfs_log_rename_whiteout(zilog, tx, + (flags & FIGNORECASE ? TX_CI : 0), sdzp, + sdl->dl_name, tdzp, tdl->dl_name, szp, wzp, + &vsecp, acl_ids.z_fuidp, &wo_vap); + } else { + zfs_log_rename(zilog, tx, + (flags & FIGNORECASE ? TX_CI : 0), sdzp, + sdl->dl_name, tdzp, tdl->dl_name, szp); } +commit: dmu_tx_commit(tx); out: - if (zl != NULL) - zfs_rename_unlock(&zl); - - zfs_dirent_unlock(sdl); - zfs_dirent_unlock(tdl); + if (have_acl) + zfs_acl_ids_free(&acl_ids); zfs_inode_update(sdzp); if (sdzp == tdzp) @@ -3994,16 +4097,57 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, zfs_inode_update(szp); iput(ZTOI(szp)); + if (wzp) { + zfs_inode_update(wzp); + iput(ZTOI(wzp)); + } if (tzp) { zfs_inode_update(tzp); iput(ZTOI(tzp)); } + if (zl != NULL) + zfs_rename_unlock(&zl); + + zfs_dirent_unlock(sdl); + zfs_dirent_unlock(tdl); + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (error); + + /* + * Clean-up path for broken link state. + * + * At this point we are in a (very) bad state, so we need to do our + * best to correct the state. In particular, all of the nlinks are + * wrong because we were destroying and creating links with ZRENAMING. + * + * In some form, all of thee operations have to resolve the state: + * + * * link_destroy() *must* succeed. Fortunately, this is very likely + * since we only just created it. + * + * * link_create()s are allowed to fail (though they shouldn't because + * we only just unlinked them and are putting the entries back + * during clean-up). But if they fail, we can just forcefully drop + * the nlink value to (at the very least) avoid broken nlink values + * -- though in the case of non-empty directories we will have to + * panic (otherwise we'd have a leaked directory with a broken ..). + */ +commit_unlink_td_szp: + VERIFY3U(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL), ==, 0); +commit_link_tzp: + if (tzp) { + if (zfs_link_create(tdl, tzp, tx, ZRENAMING)) + VERIFY3U(zfs_drop_nlink(tzp, tx, NULL), ==, 0); + } +commit_link_szp: + if (zfs_link_create(sdl, szp, tx, ZRENAMING)) + VERIFY3U(zfs_drop_nlink(szp, tx, NULL), ==, 0); + goto commit; } /* diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index a476e58dd819..36453aa07ab1 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -409,7 +409,12 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) break; case S_IFDIR: +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER + ip->i_flags |= S_IOPS_WRAPPER; + ip->i_op = &zpl_dir_inode_operations.ops; +#else ip->i_op = &zpl_dir_inode_operations; +#endif ip->i_fop = &zpl_dir_file_operations; ITOZ(ip)->z_zn_prefetch = B_TRUE; break; diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index 124021166291..fc0f16b3deb3 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -397,13 +397,13 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, int error; fstrans_cookie_t cookie; - /* We don't have renameat2(2) support */ - if (flags) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return (-EINVAL); crhold(cr); cookie = spl_fstrans_mark(); - error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0); + error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, + flags); spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -411,7 +411,7 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, return (error); } -#ifndef HAVE_RENAME_WANTS_FLAGS +#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_RENAME2) static int zpl_rename(struct inode *sdip, struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry) @@ -602,6 +602,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) return (error); } +#if 0 static int #ifdef HAVE_D_REVALIDATE_NAMEIDATA zpl_revalidate(struct dentry *dentry, struct nameidata *nd) @@ -641,6 +642,7 @@ zpl_revalidate(struct dentry *dentry, unsigned int flags) return (1); } +#endif const struct inode_operations zpl_inode_operations = { .setattr = zpl_setattr, @@ -659,7 +661,12 @@ const struct inode_operations zpl_inode_operations = { #endif /* CONFIG_FS_POSIX_ACL */ }; +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER +const struct inode_operations_wrapper zpl_dir_inode_operations = { + .ops = { +#else const struct inode_operations zpl_dir_inode_operations = { +#endif .create = zpl_create, .lookup = zpl_lookup, .link = zpl_link, @@ -668,7 +675,9 @@ const struct inode_operations zpl_dir_inode_operations = { .mkdir = zpl_mkdir, .rmdir = zpl_rmdir, .mknod = zpl_mknod, -#ifdef HAVE_RENAME_WANTS_FLAGS +#ifdef HAVE_RENAME2 + .rename2 = zpl_rename2, +#elif defined(HAVE_RENAME_WANTS_FLAGS) .rename = zpl_rename2, #else .rename = zpl_rename, @@ -690,6 +699,10 @@ const struct inode_operations zpl_dir_inode_operations = { #endif /* HAVE_SET_ACL */ .get_acl = zpl_get_acl, #endif /* CONFIG_FS_POSIX_ACL */ +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER + }, + .rename2 = zpl_rename2, +#endif }; const struct inode_operations zpl_symlink_inode_operations = { @@ -731,6 +744,8 @@ const struct inode_operations zpl_special_inode_operations = { #endif /* CONFIG_FS_POSIX_ACL */ }; +#if 0 dentry_operations_t zpl_dentry_operations = { .d_revalidate = zpl_revalidate, }; +#endif diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 9cf55c29b9f1..3cf7fe927c18 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -487,6 +487,7 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, if (zil_replaying(zilog, tx)) return; + txtype |= TX_RENAME; itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); lr = (lr_rename_t *)&itx->itx_lr; lr->lr_sdoid = sdzp->z_id; @@ -498,6 +499,102 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zil_itx_assign(zilog, itx, tx); } +/* + * At the moment, only Linux supports the renameat2 variant of renameat, which + * adds three new flags of interest for us: + * + * RENAME_NOREPLACE: if the target name at the moment of the call exists, + * don't rewrite it and return error + * RENAME_EXCHANGE: atomically swap the two names on the filesystem + * RENAME_WHITEOUT: creates a whiteout inode in place of renamed file as + * an atomic operation + * + * Ideally, these operations should be represented as new ZFS Intent Log + * txtypes, which would mandate a new ZFS feature flag due to the on-disk + * format change. One would then use spa_feature_incr/decr functions to + * indicate that the on-disk log contains these new txtypes. However, these + * functions are only supposed to be called from the txg syncing context. + * + * This means that we would need to force out an in-progress txg to disk and + * start a new one before writing any ZIL records. This would ensure that + * previous versions of ZFS which do not support these log txtypes would + * never encounter them during ZIL replay. Doing this would hurt performance. + * + * Alternatively, we could just activate the feature on a pool when these + * renameat2 flags get first used and leave it at that. This would render + * the pool read-only importable on implementations without the new feature + * flag, even when no new txtypes were present on-disk. This could be almost + * all of the time, so it'd be a shame to render the pool read-only on + * non-Linux platforms. + * + * Instead, we choose to rely on the fact that the ZIL is replayed in single- + * threaded mode before the dataset is mounted. This means we can represent + * the otherwise atomic operations as a series of plain good old txtypes + * known to all current OpenZFS implementations. To do that, we use the + * following functions (at least until more platforms implement renameat2). + * + * zfs_log_rename_exchange + * zfs_log_rename_whiteout + */ + +void +zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) +{ + zfs_dirlock_t *tmpdl; + znode_t *tmpzp = NULL; + char *tmpname; + int retries = 0; + int pos = 0; + int error; + + tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + /* + * To represent atomic rename with old non-atomic operations, we need + * a temporary new name; so we try picking a name until we succeed, + * then we get a dirent lock for that temp name until the final itx + * gets queued + */ +retry: + retries++; + pos = snprintf(tmpname, MAXPATHLEN, "%s.zfs_renameat2_emul_", dname); + + for (int i = 0; i < 16; i++) { + int r = 0xFF; + random_get_pseudo_bytes((void *)&r, 1); + pos += snprintf(tmpname+pos, MAXPATHLEN, "%02x", r); + } + + error = zfs_dirent_lock(&tmpdl, tdzp, tmpname, + &tmpzp, ZNEW, NULL, NULL); + + VERIFY3U(retries, <, 10); + if (error) + goto retry; + + /* dst -> tmp */ + zfs_log_rename(zilog, tx, txtype, tdzp, dname, tdzp, tmpname, szp); + /* src -> dst */ + zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); + /* tmp -> src */ + zfs_log_rename(zilog, tx, txtype, tdzp, tmpname, sdzp, sname, szp); + + zfs_dirent_unlock(tmpdl); + kmem_free(tmpname, MAXPATHLEN); +} + +/* See comment above zfs_log_rename_exchange */ +void +zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp, + znode_t *wzp, vsecattr_t *vsecp, zfs_fuid_info_t *fuidp, vattr_t *vap) +{ + zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); + txtype |= TX_CREATE; + zfs_log_create(zilog, tx, txtype, sdzp, wzp, sname, vsecp, fuidp, vap); +} + /* * zfs_log_write() handles TX_WRITE transactions. The specified callback is * called as soon as the write is on stable storage (be it via a DMU sync or a diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index a134c8db8379..8a00cbfac6fb 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -90,6 +90,10 @@ tags = ['functional', 'io'] tests = ['mmap_libaio_001_pos'] tags = ['functional', 'mmap'] +[tests/functional/overlayfs:Linux] +tests = ['overlayfs_basic', 'overlayfs_layered'] +tags = ['functional', 'overlayfs'] + [tests/functional/procfs:Linux] tests = ['procfs_list_basic', 'procfs_list_concurrent_readers', 'procfs_list_stale_read', 'pool_state'] @@ -105,6 +109,10 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos', 'projecttree_001_pos', 'projecttree_002_pos', 'projecttree_003_neg'] tags = ['functional', 'projectquota'] +[tests/functional/renameat2:Linux] +tests = ['renameat2_noreplace', 'renameat2_exchange', 'renameat2_whiteout'] +tags = ['functional', 'renameat2'] + [tests/functional/rsend:Linux] tests = ['send_realloc_dnode_size', 'send_encrypted_files'] tags = ['functional', 'rsend'] diff --git a/tests/test-runner/bin/zts-report.py b/tests/test-runner/bin/zts-report.py index 0e78c3547914..6ef7efbbc91f 100755 --- a/tests/test-runner/bin/zts-report.py +++ b/tests/test-runner/bin/zts-report.py @@ -77,6 +77,11 @@ python_reason = 'Python v3.5 or newer required' python_deps_reason = 'Python modules missing: python-cffi' +# +# Some tests require that the kernel supports renameat2 syscall. +# +renameat2_reason = 'Kernel renameat2 support required' + # # Some tests require the O_TMPFILE flag which was first introduced in the # 3.11 kernel. @@ -250,6 +255,7 @@ 'no_space/enospc_002_pos': ['FAIL', enospc_reason], 'projectquota/setup': ['SKIP', exec_reason], 'redundancy/redundancy_004_neg': ['FAIL', '7290'], + 'renameat2/setup': ['SKIP', renameat2_reason], 'reservation/reservation_008_pos': ['FAIL', '7741'], 'reservation/reservation_018_pos': ['FAIL', '5642'], 'rsend/rsend_019_pos': ['FAIL', '6086'], diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 09c59f591a83..5e0118d88167 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -22,6 +22,7 @@ SUBDIRS = \ randfree_file \ randwritecomp \ readmmap \ + renameat2 \ rename_dir \ rm_lnkcnt_zero_file \ threadsappend \ diff --git a/tests/zfs-tests/cmd/renameat2/.gitignore b/tests/zfs-tests/cmd/renameat2/.gitignore new file mode 100644 index 000000000000..1d540598db9b --- /dev/null +++ b/tests/zfs-tests/cmd/renameat2/.gitignore @@ -0,0 +1 @@ +/renameat2 diff --git a/tests/zfs-tests/cmd/renameat2/Makefile.am b/tests/zfs-tests/cmd/renameat2/Makefile.am new file mode 100644 index 000000000000..0d68b02b0d8e --- /dev/null +++ b/tests/zfs-tests/cmd/renameat2/Makefile.am @@ -0,0 +1,6 @@ +include $(top_srcdir)/config/Rules.am + +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin + +pkgexec_PROGRAMS = renameat2 +renameat2_SOURCES = renameat2.c diff --git a/tests/zfs-tests/cmd/renameat2/renameat2.c b/tests/zfs-tests/cmd/renameat2/renameat2.c new file mode 100644 index 000000000000..a9d0a8b20adf --- /dev/null +++ b/tests/zfs-tests/cmd/renameat2/renameat2.c @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: CDDL-1.0 OR MPL-2.0 */ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (C) 2019 Aleksa Sarai + * Copyright (C) 2019 SUSE LLC + */ + +/* + * mv(1) doesn't currently support RENAME_{EXCHANGE,WHITEOUT} so this is a very + * simple renameat2(2) wrapper for the OpenZFS self-tests. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifndef SYS_renameat2 +#ifdef __NR_renameat2 +#define SYS_renameat2 __NR_renameat2 +#elif defined(__x86_64__) +#define SYS_renameat2 316 +#elif defined(__i386__) +#define SYS_renameat2 353 +#elif defined(__arm__) || defined(__aarch64__) +#define SYS_renameat2 382 +#else +#error "SYS_renameat2 not known for this architecture." +#endif +#endif + +#ifndef RENAME_NOREPLACE +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#endif +#ifndef RENAME_EXCHANGE +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#endif +#ifndef RENAME_WHITEOUT +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +#endif + +/* glibc doesn't provide renameat2 wrapper, let's use our own */ +static int +sys_renameat2(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, unsigned int flags) +{ + int ret = syscall(SYS_renameat2, olddirfd, oldpath, newdirfd, newpath, + flags); + return ((ret < 0) ? -errno : ret); +} + +static void +usage(void) +{ + fprintf(stderr, "usage: renameat2 [-Cnwx] src dst\n"); + exit(1); +} + +static void +check(void) +{ + int err = sys_renameat2(AT_FDCWD, ".", AT_FDCWD, ".", RENAME_EXCHANGE); + exit(err == -ENOSYS); +} + +int +main(int argc, char **argv) +{ + char *src, *dst; + int ch, err; + unsigned int flags = 0; + + while ((ch = getopt(argc, argv, "Cnwx")) >= 0) { + switch (ch) { + case 'C': + check(); + break; + case 'n': + flags |= RENAME_NOREPLACE; + break; + case 'w': + flags |= RENAME_WHITEOUT; + break; + case 'x': + flags |= RENAME_EXCHANGE; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (argc != 2) + usage(); + src = argv[0]; + dst = argv[1]; + + err = sys_renameat2(AT_FDCWD, src, AT_FDCWD, dst, flags); + if (err < 0) + fprintf(stderr, "renameat2: %s", strerror(-err)); + return (err != 0); +} diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 52758a1aa1b2..a49a8fa341fd 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -179,6 +179,7 @@ export ZFSTEST_FILES='chg_usr_exec randfree_file randwritecomp readmmap + renameat2 rename_dir rm_lnkcnt_zero_file threadsappend diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am index c8706c407fa0..3dabc620936c 100644 --- a/tests/zfs-tests/tests/functional/Makefile.am +++ b/tests/zfs-tests/tests/functional/Makefile.am @@ -44,6 +44,7 @@ SUBDIRS = \ no_space \ nopwrite \ online_offline \ + overlayfs \ pool_checkpoint \ pool_names \ poolversion \ @@ -58,6 +59,7 @@ SUBDIRS = \ refquota \ refreserv \ removal \ + renameat2 \ rename_dirs \ replacement \ reservation \ diff --git a/tests/zfs-tests/tests/functional/overlayfs/Makefile.am b/tests/zfs-tests/tests/functional/overlayfs/Makefile.am new file mode 100644 index 000000000000..f38457131e27 --- /dev/null +++ b/tests/zfs-tests/tests/functional/overlayfs/Makefile.am @@ -0,0 +1,6 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/overlayfs +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + overlayfs_basic.ksh \ + overlayfs_layered.ksh diff --git a/tests/zfs-tests/tests/functional/overlayfs/cleanup.ksh b/tests/zfs-tests/tests/functional/overlayfs/cleanup.ksh new file mode 100755 index 000000000000..3166bd6ec16e --- /dev/null +++ b/tests/zfs-tests/tests/functional/overlayfs/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/overlayfs/overlayfs_basic.ksh b/tests/zfs-tests/tests/functional/overlayfs/overlayfs_basic.ksh new file mode 100755 index 000000000000..7ea32ebf50a9 --- /dev/null +++ b/tests/zfs-tests/tests/functional/overlayfs/overlayfs_basic.ksh @@ -0,0 +1,69 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 OR MPL-2.0 + +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Pavel Snajdr /dev/null +cd $TESTDIR +mkdir lower upper work overlay +touch lower/{a,b} +touch upper/{c,d} +echo "orig" > lower/testfile +echo "upper" > upper/testfile + +# Basic overlayfs mount +log_must mount -t overlay \ + -o lowerdir=lower/,upperdir=upper/,workdir=work/ \ + none overlay/ + +# Does presented overlay have all the files we expect? +log_must stat overlay/{a,b,c,d,testfile} + +# We'd expect content of the upper test file +log_must grep upper overlay/testfile + +echo "new" > overlay/testfile + +# We'd expect content of the upper test file changed now +log_must grep new upper/testfile + +log_must umount $TESTDIR/overlay + +log_assert "ZFS supports being upper/lower/mnt of overlayfs as expected." diff --git a/tests/zfs-tests/tests/functional/overlayfs/overlayfs_layered.ksh b/tests/zfs-tests/tests/functional/overlayfs/overlayfs_layered.ksh new file mode 100755 index 000000000000..00d579ac17ee --- /dev/null +++ b/tests/zfs-tests/tests/functional/overlayfs/overlayfs_layered.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 OR MPL-2.0 + +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Pavel Snajdr /dev/null +cd $TESTDIR +mkdir lower middle upper work merge +mkdir {lower,middle,upper}/{dira,dirb} +touch lower/{dira,dirb}/{a,b} +touch middle/{dira,dirb}/{c,d} +touch upper/{dira,dirb}/{e,f} +echo "orig" > lower/testfile +echo "mid" > middle/testfile +echo "upper" > upper/testfile + +# multi-level overlayfs mount +log_must mount -t overlay \ + -o lowerdir=lower/:middle/,upperdir=upper/,workdir=work/ \ + none merge/ + +# Does presented overlay have all the files we expect? +log_must stat merge/{dira,dirb}/{a,b,c,d,e,f} merge/testfile + +# We'd expect content of the upper test file +log_must grep upper merge/testfile + +echo "new" > merge/testfile + +# We'd expect content of the lower test file not changed +log_must grep orig lower/testfile + +# We'd expect content of the upper test file changed to new +log_must grep new upper/testfile + +log_must umount $TESTDIR/merge + +log_assert "ZFS supports multi-layered overlayfs as expected." diff --git a/tests/zfs-tests/tests/functional/overlayfs/setup.ksh b/tests/zfs-tests/tests/functional/overlayfs/setup.ksh new file mode 100755 index 000000000000..9738d569cdb7 --- /dev/null +++ b/tests/zfs-tests/tests/functional/overlayfs/setup.ksh @@ -0,0 +1,43 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +if ! is_linux ; then + log_unsupported "overlayfs is linux-only" +elif ! grep overlay /proc/filesystems; then + log_unsupported "kernel built without overlayfs or module not loaded" +elif ! renameat2 -C ; then + log_unsupported "renameat2 not supported on this (pre-3.15) linux kernel" +fi + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/renameat2/Makefile.am b/tests/zfs-tests/tests/functional/renameat2/Makefile.am new file mode 100644 index 000000000000..bd8d6c9d68bf --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/Makefile.am @@ -0,0 +1,7 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/renameat2 +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + renameat2_noreplace.ksh \ + renameat2_exchange.ksh \ + renameat2_whiteout.ksh diff --git a/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh b/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh new file mode 100755 index 000000000000..3166bd6ec16e --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh new file mode 100755 index 000000000000..d85bc9a0741f --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh @@ -0,0 +1,63 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 OR MPL-2.0 + +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_EXCHANGE." +log_onexit cleanup + +cd $TESTDIR +echo "foo" > foo +echo "bar" > bar + +# Self-exchange is a no-op. +log_must renameat2 -x foo foo +log_must grep '^foo$' foo + +# Basic exchange. +log_must renameat2 -x foo bar +log_must grep '^bar$' foo +log_must grep '^foo$' bar + +# And exchange back. +log_must renameat2 -x foo bar +log_must grep '^foo$' foo +log_must grep '^bar$' bar + +# Exchange with a bad path should fail. +log_mustnot renameat2 -x bar baz + +log_pass "ZFS supports RENAME_EXCHANGE as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh new file mode 100755 index 000000000000..1f346f165681 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh @@ -0,0 +1,53 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 OR MPL-2.0 + +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_NOREPLACE." +log_onexit cleanup + +cd $TESTDIR +touch foo bar + +# Clobbers should always fail. +log_mustnot renameat2 -n foo foo +log_mustnot renameat2 -n foo bar +log_mustnot renameat2 -n bar foo + +# Regular renames should succeed. +log_must renameat2 -n bar baz + +log_pass "ZFS supports RENAME_NOREPLACE as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh new file mode 100755 index 000000000000..a75ffed4af1d --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh @@ -0,0 +1,52 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 OR MPL-2.0 + +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_WHITEOUT." +log_onexit cleanup + +cd $TESTDIR +echo "whiteout" > whiteout + +# Straight-forward rename-with-whiteout. +log_must renameat2 -w whiteout new +# Check new file. +log_must grep '^whiteout$' new +# Check that the whiteout is actually a {0,0} char device. +log_must grep '^character special file:0:0$' <<<"$(stat -c '%F:%t:%T' whiteout)" + +log_pass "ZFS supports RENAME_WHITEOUT as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/setup.ksh b/tests/zfs-tests/tests/functional/renameat2/setup.ksh new file mode 100755 index 000000000000..1185557496a0 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/setup.ksh @@ -0,0 +1,41 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +if ! is_linux ; then + log_unsupported "renameat2 is linux-only" +elif ! renameat2 -C ; then + log_unsupported "renameat2 not supported on this (pre-3.15) linux kernel" +fi + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index 8954caa1c933..63fa6fc3970e 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -163,6 +163,22 @@ log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \ /$TESTPOOL/$TESTFS/link_and_unlink.link log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link +# We can't test TX_EXCHANGE or TX_WHITEOUT without renameat2(2) support. +if ! is_linux ; then + log_note "renameat2 is linux-only" +elif ! renameat2 -C ; then + log_note "renameat2 not supported on this (pre-3.15) linux kernel" +else + # TX_EXCHANGE + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-a bs=1k count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-b bs=1k count=1 + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{a,b} + + # TX_WHITEOUT + log_must mkfile 1k /$TESTPOOL/$TESTFS/whiteout + log_must renameat2 -w /$TESTPOOL/$TESTFS/whiteout{,-moved} +fi + # # 4. Copy TESTFS to temporary location (TESTDIR/copy) #