From 5a1796fcba1a1a47f48a242cbd715351c37fe777 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Thu, 5 Dec 2019 01:52:27 +0100 Subject: [PATCH 1/8] Remove zpl_revalidate: fix snapshot rollback Open files, which aren't present in the snapshot, which is being roll-backed to, need to disappear from the visible VFS image of the dataset. Kernel provides d_drop function to drop invalid entry from the dcache, but inode can be referenced by dentry multiple dentries. The introduced zpl_d_drop_aliases function walks and invalidates all aliases of an inode. Signed-off-by: Pavel Snajdr --- config/kernel-dentry-alias.m4 | 30 +++++++++++++ config/kernel.m4 | 2 + include/os/linux/kernel/linux/dcache_compat.h | 21 +++++++++ include/os/linux/zfs/sys/zpl.h | 1 - module/os/linux/zfs/zfs_vfsops.c | 2 +- module/os/linux/zfs/zpl_inode.c | 44 ------------------- 6 files changed, 54 insertions(+), 46 deletions(-) create mode 100644 config/kernel-dentry-alias.m4 diff --git a/config/kernel-dentry-alias.m4 b/config/kernel-dentry-alias.m4 new file mode 100644 index 000000000000..f0ddb8d010b0 --- /dev/null +++ b/config/kernel-dentry-alias.m4 @@ -0,0 +1,30 @@ +dnl # +dnl # 3.18 API change +dnl # Dentry aliases are in d_u struct dentry member +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [ + ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [ + #include + #include + #include + ], [ + struct inode *inode __attribute__ ((unused)) = NULL; + struct dentry *dentry __attribute__ ((unused)) = NULL; + hlist_for_each_entry(dentry, &inode->i_dentry, + d_u.d_alias) { + d_drop(dentry); + } + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [ + AC_MSG_CHECKING([whether dentry aliases are in d_u member]) + ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1, + [dentry aliases are in d_u member]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + diff --git a/config/kernel.m4 b/config/kernel.m4 index 018f12a94d05..edcb70cbd64b 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -94,6 +94,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED ZFS_AC_KERNEL_SRC_DENTRY + ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT @@ -212,6 +213,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_DENTRY + ZFS_AC_KERNEL_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h index d0588a82e9ad..e2aca1629735 100644 --- a/include/os/linux/kernel/linux/dcache_compat.h +++ b/include/os/linux/kernel/linux/dcache_compat.h @@ -61,4 +61,25 @@ d_clear_d_op(struct dentry *dentry) DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE); } +/* + * Walk and invalidate all dentry aliases of an inode + * unless it's a mountpoint + */ +static inline void +zpl_d_drop_aliases(struct inode *inode) +{ + struct dentry *dentry; + spin_lock(&inode->i_lock); +#ifdef HAVE_DENTRY_D_U_ALIASES + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { +#else + hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { +#endif + if (!IS_ROOT(dentry) && !d_mountpoint(dentry) && + (dentry->d_inode == inode)) { + d_drop(dentry); + } + } + spin_unlock(&inode->i_lock); +} #endif /* _ZFS_DCACHE_H */ diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index 737f6d82ebff..9e5eede44073 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -45,7 +45,6 @@ extern const struct inode_operations zpl_inode_operations; extern const struct inode_operations zpl_dir_inode_operations; extern const struct inode_operations zpl_symlink_inode_operations; extern const struct inode_operations zpl_special_inode_operations; -extern dentry_operations_t zpl_dentry_operations; extern const struct address_space_operations zpl_address_space_operations; extern const struct file_operations zpl_file_operations; extern const struct file_operations zpl_dir_file_operations; diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index 2ce96d1232b3..daada0a7757d 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1519,7 +1519,6 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent) sb->s_op = &zpl_super_operations; sb->s_xattr = zpl_xattr_handlers; sb->s_export_op = &zpl_export_operations; - sb->s_d_op = &zpl_dentry_operations; /* Set features for file system. */ zfs_set_fuid_feature(zfsvfs); @@ -1876,6 +1875,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) zp = list_next(&zfsvfs->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { + zpl_d_drop_aliases(ZTOI(zp)); remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index d2d14bf93aa5..500d5306a8b1 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -698,46 +698,6 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) return (error); } -static int -#ifdef HAVE_D_REVALIDATE_NAMEIDATA -zpl_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - unsigned int flags = (nd ? nd->flags : 0); -#else -zpl_revalidate(struct dentry *dentry, unsigned int flags) -{ -#endif /* HAVE_D_REVALIDATE_NAMEIDATA */ - /* CSTYLED */ - zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info; - int error; - - if (flags & LOOKUP_RCU) - return (-ECHILD); - - /* - * After a rollback negative dentries created before the rollback - * time must be invalidated. Otherwise they can obscure files which - * are only present in the rolled back dataset. - */ - if (dentry->d_inode == NULL) { - spin_lock(&dentry->d_lock); - error = time_before(dentry->d_time, zfsvfs->z_rollback_time); - spin_unlock(&dentry->d_lock); - - if (error) - return (0); - } - - /* - * The dentry may reference a stale inode if a mounted file system - * was rolled back to a point in time where the object didn't exist. - */ - if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale) - return (0); - - return (1); -} - const struct inode_operations zpl_inode_operations = { .setattr = zpl_setattr, .getattr = zpl_getattr, @@ -829,7 +789,3 @@ const struct inode_operations zpl_special_inode_operations = { #endif /* CONFIG_FS_POSIX_ACL */ .permission = zpl_permission, }; - -dentry_operations_t zpl_dentry_operations = { - .d_revalidate = zpl_revalidate, -}; From 75f3263448c2ff8ff8a5a3af6e587481770b186b Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 18 May 2022 20:29:33 +1000 Subject: [PATCH 2/8] debug: add VERIFY_{IMPLY,EQUIV} variants This allows for much cleaner VERIFY-level assertions. Signed-off-by: Aleksa Sarai --- include/os/linux/spl/sys/debug.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/include/os/linux/spl/sys/debug.h b/include/os/linux/spl/sys/debug.h index dc6b85eebff7..5c56ff34ed9b 100644 --- a/include/os/linux/spl/sys/debug.h +++ b/include/os/linux/spl/sys/debug.h @@ -128,6 +128,16 @@ void spl_dumpstack(void); typedef char __attribute__ ((unused)) \ __compile_time_assertion__ ## y[(x) ? 1 : -1] +#define VERIFY_IMPLY(A, B) \ + ((void)(likely((!(A)) || (B)) || \ + spl_panic(__FILE__, __FUNCTION__, __LINE__, \ + "(" #A ") implies (" #B ")"))) + +#define VERIFY_EQUIV(A, B) \ + ((void)(likely(!!(A) == !!(B)) || \ + spl_panic(__FILE__, __FUNCTION__, __LINE__, \ + "(" #A ") is equivalent to (" #B ")"))) + /* * Debugging disabled (--disable-debug) */ @@ -153,15 +163,8 @@ void spl_dumpstack(void); #define ASSERT3P VERIFY3P #define ASSERT0 VERIFY0 #define ASSERT VERIFY -#define IMPLY(A, B) \ - ((void)(likely((!(A)) || (B)) || \ - spl_panic(__FILE__, __FUNCTION__, __LINE__, \ - "(" #A ") implies (" #B ")"))) -#define EQUIV(A, B) \ - ((void)(likely(!!(A) == !!(B)) || \ - spl_panic(__FILE__, __FUNCTION__, __LINE__, \ - "(" #A ") is equivalent to (" #B ")"))) -/* END CSTYLED */ +#define IMPLY VERIFY_IMPLY +#define EQUIV VERIFY_EQUIV #endif /* NDEBUG */ From 4e6ae5ed30061b8619f3f9ca68f0ba9da0e2a9a3 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 26 Apr 2019 23:23:07 +1000 Subject: [PATCH 3/8] zfs_rename: restructure to have cleaner fallbacks This is in preparation for RENAME_EXCHANGE and RENAME_WHITEOUT support for ZoL, but the changes here allow for far nicer fallbacks than the previous implementation (the source and target are re-linked in case of the final link failing). In addition, a small cleanup was done for the "target exists but is a different type" codepath so that it's more understandable. Signed-off-by: Aleksa Sarai --- include/os/linux/zfs/sys/zfs_dir.h | 1 + module/os/linux/zfs/zfs_dir.c | 95 +++++++++++++++++------ module/os/linux/zfs/zfs_vnops_os.c | 119 ++++++++++++++++------------- 3 files changed, 139 insertions(+), 76 deletions(-) diff --git a/include/os/linux/zfs/sys/zfs_dir.h b/include/os/linux/zfs/sys/zfs_dir.h index 0f15e43452b2..1f3cf1323923 100644 --- a/include/os/linux/zfs/sys/zfs_dir.h +++ b/include/os/linux/zfs/sys/zfs_dir.h @@ -52,6 +52,7 @@ extern "C" { extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **, int, int *, pathname_t *); extern void zfs_dirent_unlock(zfs_dirlock_t *); +extern int zfs_drop_nlink(znode_t *, dmu_tx_t *, boolean_t *); extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int); extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int, boolean_t *); diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c index 82b32d1cc3fa..9f07d9a62688 100644 --- a/module/os/linux/zfs/zfs_dir.c +++ b/module/os/linux/zfs/zfs_dir.c @@ -926,6 +926,74 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, return (error); } +static int +zfs_drop_nlink_locked(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp) +{ + zfsvfs_t *zfsvfs = ZTOZSB(zp); + int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode); + boolean_t unlinked = B_FALSE; + sa_bulk_attr_t bulk[3]; + uint64_t mtime[2], ctime[2]; + uint64_t links; + int count = 0; + int error; + + if (zp_is_dir && !zfs_dirempty(zp)) + return (SET_ERROR(ENOTEMPTY)); + + if (ZTOI(zp)->i_nlink <= zp_is_dir) { + zfs_panic_recover("zfs: link count on %lu is %u, " + "should be at least %u", zp->z_id, + (int)ZTOI(zp)->i_nlink, zp_is_dir + 1); + set_nlink(ZTOI(zp), zp_is_dir + 1); + } + drop_nlink(ZTOI(zp)); + if (ZTOI(zp)->i_nlink == zp_is_dir) { + zp->z_unlinked = B_TRUE; + clear_nlink(ZTOI(zp)); + unlinked = B_TRUE; + } else { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), + NULL, &ctime, sizeof (ctime)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + NULL, &zp->z_pflags, sizeof (zp->z_pflags)); + zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, + ctime); + } + links = ZTOI(zp)->i_nlink; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), + NULL, &links, sizeof (links)); + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); + ASSERT3U(error, ==, 0); + + if (unlinkedp != NULL) + *unlinkedp = unlinked; + else if (unlinked) + zfs_unlinked_add(zp, tx); + + return (0); +} + +/* + * Forcefully drop an nlink reference from (zp) and mark it for deletion if it + * was the last link. This *must* only be done to znodes which have already + * been zfs_link_destroy()'d with ZRENAMING. This is explicitly only used in + * the error path of zfs_rename(), where we have to correct the nlink count if + * we failed to link the target as well as failing to re-link the original + * znodes. + */ +int +zfs_drop_nlink(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp) +{ + int error; + + mutex_enter(&zp->z_lock); + error = zfs_drop_nlink_locked(zp, tx, unlinkedp); + mutex_exit(&zp->z_lock); + + return (error); +} + /* * Unlink zp from dl, and mark zp for deletion if this was the last link. Can * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY). @@ -966,31 +1034,8 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, return (error); } - if (ZTOI(zp)->i_nlink <= zp_is_dir) { - zfs_panic_recover("zfs: link count on %lu is %u, " - "should be at least %u", zp->z_id, - (int)ZTOI(zp)->i_nlink, zp_is_dir + 1); - set_nlink(ZTOI(zp), zp_is_dir + 1); - } - drop_nlink(ZTOI(zp)); - if (ZTOI(zp)->i_nlink == zp_is_dir) { - zp->z_unlinked = B_TRUE; - clear_nlink(ZTOI(zp)); - unlinked = B_TRUE; - } else { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), - NULL, &ctime, sizeof (ctime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), - NULL, &zp->z_pflags, sizeof (zp->z_pflags)); - zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, - ctime); - } - links = ZTOI(zp)->i_nlink; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), - NULL, &links, sizeof (links)); - error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - count = 0; - ASSERT(error == 0); + /* The only error is !zfs_dirempty() and we checked earlier. */ + ASSERT3U(zfs_drop_nlink_locked(zp, tx, &unlinked), ==, 0); mutex_exit(&zp->z_lock); } else { error = zfs_dropname(dl, zp, dzp, tx, flag); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index f5f3b3b7532f..3ab858be428e 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -2921,16 +2921,12 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, /* * Source and target must be the same type. */ - if (S_ISDIR(ZTOI(szp)->i_mode)) { - if (!S_ISDIR(ZTOI(tzp)->i_mode)) { - error = SET_ERROR(ENOTDIR); - goto out; - } - } else { - if (S_ISDIR(ZTOI(tzp)->i_mode)) { - error = SET_ERROR(EISDIR); - goto out; - } + boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0; + boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0; + + if (s_is_dir != t_is_dir) { + error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR); + goto out; } /* * POSIX dictates that when the source and target @@ -2986,51 +2982,49 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, return (error); } - if (tzp) /* Attempt to remove the existing target */ - error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); + /* + * Unlink the source. + */ + szp->z_pflags |= ZFS_AV_MODIFIED; + if (tdzp->z_pflags & ZFS_PROJINHERIT) + szp->z_pflags |= ZFS_PROJINHERIT; - if (error == 0) { - error = zfs_link_create(tdl, szp, tx, ZRENAMING); - if (error == 0) { - szp->z_pflags |= ZFS_AV_MODIFIED; - if (tdzp->z_pflags & ZFS_PROJINHERIT) - szp->z_pflags |= ZFS_PROJINHERIT; - - error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), - (void *)&szp->z_pflags, sizeof (uint64_t), tx); - ASSERT0(error); + error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + (void *)&szp->z_pflags, sizeof (uint64_t), tx); + ASSERT0(error); - error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); - if (error == 0) { - zfs_log_rename(zilog, tx, TX_RENAME | - (flags & FIGNORECASE ? TX_CI : 0), sdzp, - sdl->dl_name, tdzp, tdl->dl_name, szp); - } else { - /* - * At this point, we have successfully created - * the target name, but have failed to remove - * the source name. Since the create was done - * with the ZRENAMING flag, there are - * complications; for one, the link count is - * wrong. The easiest way to deal with this - * is to remove the newly created target, and - * return the original error. This must - * succeed; fortunately, it is very unlikely to - * fail, since we just created it. - */ - VERIFY3U(zfs_link_destroy(tdl, szp, tx, - ZRENAMING, NULL), ==, 0); - } - } else { - /* - * If we had removed the existing target, subsequent - * call to zfs_link_create() to add back the same entry - * but, the new dnode (szp) should not fail. - */ - ASSERT(tzp == NULL); - } + error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); + if (error) + goto commit; + + /* + * Unlink the target. + */ + if (tzp) { + error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); + if (error) + goto commit_link_szp; + } + + /* + * Create a new link at the target. + */ + error = zfs_link_create(tdl, szp, tx, ZRENAMING); + if (error) { + /* + * If we have removed the existing target, a subsequent call to + * zfs_link_create() to add back the same entry, but with a new + * dnode (szp), should not fail. + */ + ASSERT3P(tzp, ==, NULL); + goto commit_link_tzp; } + zfs_log_rename(zilog, tx, TX_RENAME | + (flags & FIGNORECASE ? TX_CI : 0), sdzp, + sdl->dl_name, tdzp, tdl->dl_name, szp); + +commit: dmu_tx_commit(tx); out: if (zl != NULL) @@ -3058,6 +3052,29 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, ZFS_EXIT(zfsvfs); return (error); + + /* + * Clean-up path for broken link state. + * + * At this point we are in a (very) bad state, so we need to do our + * best to correct the state. In particular, the nlink of szp is wrong + * because we were destroying and creating links with ZRENAMING. + * + * link_create()s are allowed to fail (though they shouldn't because we + * only just unlinked them and are putting the entries back during + * clean-up). But if they fail, we can just forcefully drop the nlink + * value to (at the very least) avoid broken nlink values -- though in + * the case of non-empty directories we will have to panic. + */ +commit_link_tzp: + if (tzp) { + if (zfs_link_create(tdl, tzp, tx, ZRENAMING)) + VERIFY3U(zfs_drop_nlink(tzp, tx, NULL), ==, 0); + } +commit_link_szp: + if (zfs_link_create(sdl, szp, tx, ZRENAMING)) + VERIFY3U(zfs_drop_nlink(szp, tx, NULL), ==, 0); + goto commit; } /* From 6e2eb8de3bb330a3b175c317c8330ee1934daf3b Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 22 Jun 2019 10:35:11 +1000 Subject: [PATCH 4/8] zfs_rename: support RENAME_* flags Implement support for Linux's RENAME_* flags (for renameat2). Aside from being quite useful for userspace (providing race-free ways to exchange paths and implement mv --no-clobber), they are used by overlayfs and are thus required in order to use overlayfs-on-ZFS. In order for us to represent the new renameat2(2) flags in the ZIL, we create two new transaction types for the two flags which need transactional-level support (RENAME_EXCHANGE and RENAME_WHITEOUT). RENAME_NOREPLACE does not need any ZIL support because we know that if the operation succeeded before creating the ZIL entry, there was no file to be clobbered and thus it can be treated as a regular TX_RENAME. As these new transaction flags modify the on-disk format, it is necessary to add feature flags (rename_exchange and rename_whiteout) -- two flags rather than one so that any future extensions to renameat2 can be handled consistently with another feature flag for the new feature. In order to reduce compatibility issues of pools between Linux (which supports these flags) and other operating systems (which currently don't), these feature flags are only activated once a new TX_RENAME_{EXCHANGE,WHITEOUT} is added to the ZIL and are deactivated when the ZIL is destroyed. This means that a cleanly exported pool can be imported onto a non-Linux system without issue (a pool with a ZIL log containing a new TX_RENAME_* entry can also be imported on such systems, but only in readonly mode). While this design is very similar to zilsaxattr, zilsaxattr is activated whenever the ZIL is instantiated (even if the new transaction type is never used) -- the rename_* feature flags are only activated when a new rename transaction is being added to the ZIL. Pavel helped with carrying this PR and reworking the ZIL bits after I originally wrote it, but the final version was effectively rewritten completely because the ZIL handling needed to be reworked entirely to be crash safe and to use feature flags. Cc: Pavel Snajdr Signed-off-by: Aleksa Sarai --- AUTHORS | 2 + cmd/zdb/zdb_il.c | 20 +- cmd/ztest/ztest.c | 3 + config/kernel-rename.m4 | 44 ++- include/os/freebsd/zfs/sys/zfs_vnops_os.h | 2 +- include/os/linux/kernel/linux/vfs_compat.h | 13 + include/os/linux/zfs/sys/zfs_vnops_os.h | 2 +- include/sys/zfs_znode.h | 6 + include/sys/zil.h | 5 +- include/zfeature_common.h | 2 + module/os/freebsd/zfs/zfs_vnops_os.c | 5 +- module/os/linux/zfs/zfs_dir.c | 3 +- module/os/linux/zfs/zfs_vnops_os.c | 295 ++++++++++++++++-- module/os/linux/zfs/zpl_inode.c | 18 +- module/zcommon/zfeature_common.c | 25 ++ module/zfs/zfs_log.c | 55 +++- module/zfs/zfs_replay.c | 51 ++- module/zfs/zil.c | 33 ++ module/zfs/zvol.c | 3 + .../cli_root/zpool_get/zpool_get.cfg | 7 + 20 files changed, 532 insertions(+), 62 deletions(-) diff --git a/AUTHORS b/AUTHORS index aab8bf29c99f..a665726073fe 100644 --- a/AUTHORS +++ b/AUTHORS @@ -19,6 +19,7 @@ CONTRIBUTORS: Albert Lee Alec Salazar Alejandro R. SedeƱo + Aleksa Sarai Alek Pinchuk Alex Braunegg Alex McWhirter @@ -236,6 +237,7 @@ CONTRIBUTORS: Paul Dagnelie Paul Zuchowski Pavel Boldin + Pavel Snajdr Pavel Zakharov Pawel Jakub Dawidek Pedro Giffuni diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index 553765b71716..3b04f64cd457 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -128,6 +128,14 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, const void *arg) (void) printf("%ssdoid %llu, tdoid %llu\n", tab_prefix, (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid); (void) printf("%ssrc %s tgt %s\n", tab_prefix, snm, tnm); + switch (txtype) { + case TX_RENAME_EXCHANGE: + (void) printf("%sflags RENAME_EXCHANGE\n", tab_prefix); + break; + case TX_RENAME_WHITEOUT: + (void) printf("%sflags RENAME_WHITEOUT\n", tab_prefix); + break; + } } /* ARGSUSED */ @@ -305,6 +313,9 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = { {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ATTR "}, {.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ACL_ATTR "}, {.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "}, + {.zri_print = NULL, .zri_name = "TX_SETSAXATTR "}, + {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_EXCHANGE "}, + {.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_WHITEOUT "}, }; /* ARGSUSED */ @@ -329,7 +340,14 @@ print_log_record(zilog_t *zilog, const lr_t *lr, void *arg, uint64_t claim_txg) if (txtype && verbose >= 3) { if (!zilog->zl_os->os_encrypted) { - zil_rec_info[txtype].zri_print(zilog, txtype, lr); + zil_prt_rec_func_t print_func = + zil_rec_info[txtype].zri_print; + if (print_func != NULL) { + print_func(zilog, txtype, lr); + } else { + (void) printf("%s(%s)\n", tab_prefix, + zil_rec_info[txtype].zri_name); + } } else { (void) printf("%s(encrypted)\n", tab_prefix); } diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 3d12c2b90dfd..ae72475e14b7 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -2383,6 +2383,9 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = { NULL, /* TX_MKDIR_ATTR */ NULL, /* TX_MKDIR_ACL_ATTR */ NULL, /* TX_WRITE2 */ + NULL, /* TX_SETSAXATTR */ + NULL, /* TX_RENAME_EXCHANGE */ + NULL, /* TX_RENAME_WHITEOUT */ }; /* diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index 302db43f5748..8d06f57cb13a 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -1,8 +1,28 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ + dnl # + dnl # 3.9 (to 4.9) API change, + dnl # + dnl # A new version of iops->rename() was added (rename2) that takes a flag + dnl # argument (to support renameat2). However this separate function was + dnl # merged back into iops->rename() in Linux 4.9. + dnl # + ZFS_LINUX_TEST_SRC([inode_operations_rename2], [ + #include + int rename2_fn(struct inode *sip, struct dentry *sdp, + struct inode *tip, struct dentry *tdp, + unsigned int flags) { return 0; } + + static const struct inode_operations + iops __attribute__ ((unused)) = { + .rename2 = rename2_fn, + }; + ],[]) + dnl # dnl # 4.9 API change, - dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants - dnl # flags. + dnl # + dnl # iops->rename2() merged into iops->rename(), and iops->rename() now + dnl # wants flags. dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [ #include @@ -19,8 +39,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ dnl # dnl # 5.12 API change, dnl # - dnl # Linux 5.12 introduced passing struct user_namespace* as the first argument - dnl # of the rename() and other inode_operations members. + dnl # Linux 5.12 introduced passing struct user_namespace* as the first + dnl # argument of the rename() and other inode_operations members. dnl # ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [ #include @@ -44,13 +64,21 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ ],[ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether iop->rename() wants flags]) - ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ + AC_MSG_CHECKING([whether iops->rename2() exists]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename2], [ AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, - [iops->rename() wants flags]) + AC_DEFINE(HAVE_RENAME2, 1, [iops->rename2() exists]) ],[ AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether iops->rename() wants flags]) + ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1, + [iops->rename() wants flags]) + ],[ + AC_MSG_RESULT(no) + ]) ]) ]) ]) diff --git a/include/os/freebsd/zfs/sys/zfs_vnops_os.h b/include/os/freebsd/zfs/sys/zfs_vnops_os.h index bf5e03b24c06..cb1c97dc85ff 100644 --- a/include/os/freebsd/zfs/sys/zfs_vnops_os.h +++ b/include/os/freebsd/zfs/sys/zfs_vnops_os.h @@ -40,7 +40,7 @@ extern int zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags); extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr); extern int zfs_rename(znode_t *sdzp, const char *snm, znode_t *tdzp, - const char *tnm, cred_t *cr, int flags); + const char *tnm, cred_t *cr, int flags, uint64_t rflags); extern int zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, const char *link, znode_t **zpp, cred_t *cr, int flags); extern int zfs_link(znode_t *tdzp, znode_t *sp, diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h index 91e908598fbb..614d31596869 100644 --- a/include/os/linux/kernel/linux/vfs_compat.h +++ b/include/os/linux/kernel/linux/vfs_compat.h @@ -340,6 +340,19 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid) #endif } +/* + * 3.15 API change + */ +#ifndef RENAME_NOREPLACE +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#endif +#ifndef RENAME_EXCHANGE +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#endif +#ifndef RENAME_WHITEOUT +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +#endif + /* * 4.9 API change */ diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h index 47f91e4a6cf4..436b60d56641 100644 --- a/include/os/linux/zfs/sys/zfs_vnops_os.h +++ b/include/os/linux/zfs/sys/zfs_vnops_os.h @@ -58,7 +58,7 @@ extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip, struct kstat *sp); extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr); extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, - char *tnm, cred_t *cr, int flags); + char *tnm, cred_t *cr, int flags, uint64_t rflags); extern int zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link, znode_t **zpp, cred_t *cr, int flags); extern int zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr); diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 8edbe93397cb..c142bb22d631 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -275,6 +275,12 @@ extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp); +extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, + uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, + const char *dname, znode_t *szp); +extern void zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, + uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, + const char *dname, znode_t *szp); extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t len, int ioflag, zil_callback_t callback, void *callback_data); diff --git a/include/sys/zil.h b/include/sys/zil.h index a43823b605de..632a1773b846 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -162,7 +162,10 @@ typedef enum zil_create { #define TX_MKDIR_ATTR 18 /* mkdir with attr */ #define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */ #define TX_WRITE2 20 /* dmu_sync EALREADY write */ -#define TX_MAX_TYPE 21 /* Max transaction type */ +/* TX_SETSAXATTR goes here but is not backported */ +#define TX_RENAME_EXCHANGE 22 /* Atomic swap via renameat2 */ +#define TX_RENAME_WHITEOUT 23 /* Atomic whiteout via renameat2 */ +#define TX_MAX_TYPE 24 /* Max transaction type */ /* * The transactions for mkdir, symlink, remove, rmdir, link, and rename diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 76dd7ed57478..85a55710a41e 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -75,6 +75,8 @@ typedef enum spa_feature { SPA_FEATURE_DEVICE_REBUILD, SPA_FEATURE_ZSTD_COMPRESS, SPA_FEATURE_DRAID, + SPA_FEATURE_RENAME_EXCHANGE, + SPA_FEATURE_RENAME_WHITEOUT, SPA_FEATURES } spa_feature_t; diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 760f30d56b7e..84a453829e6a 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -3473,7 +3473,7 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, int zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, - cred_t *cr, int flags) + cred_t *cr, int flags, uint64_t rflags) { struct componentname scn, tcn; vnode_t *sdvp, *tdvp; @@ -3481,6 +3481,9 @@ zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, int error; svp = tvp = NULL; + if (rflags != 0) + return (SET_ERROR(EINVAL)); + sdvp = ZTOV(sdzp); tdvp = ZTOV(tdzp); error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE); diff --git a/module/os/linux/zfs/zfs_dir.c b/module/os/linux/zfs/zfs_dir.c index 9f07d9a62688..feb8e2a6db20 100644 --- a/module/os/linux/zfs/zfs_dir.c +++ b/module/os/linux/zfs/zfs_dir.c @@ -1035,7 +1035,8 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, } /* The only error is !zfs_dirempty() and we checked earlier. */ - ASSERT3U(zfs_drop_nlink_locked(zp, tx, &unlinked), ==, 0); + error = zfs_drop_nlink_locked(zp, tx, &unlinked); + ASSERT3U(error, ==, 0); mutex_exit(&zp->z_lock); } else { error = zfs_dropname(dl, zp, dzp, tx, flag); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 3ab858be428e..1b38865e9b1c 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -68,6 +68,7 @@ #include #include #include +#include #include /* @@ -2596,6 +2597,70 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) return (err); } +typedef struct zfs_activate_feature_arg { + const char *name; + spa_feature_t feature; +} zfs_activate_feature_arg; + +static int +zfs_activate_feature_check(void *arg, dmu_tx_t *tx) +{ + zfs_activate_feature_arg *zlefa = arg; + spa_feature_t f = zlefa->feature; + + dsl_pool_t *dp = dmu_tx_pool(tx); + + if (f == SPA_FEATURE_NONE) + return (SET_ERROR(EINVAL)); + + if (!spa_feature_is_enabled(dp->dp_spa, f)) + return (SET_ERROR(ENOTSUP)); + + return (0); +} + +static void +zfs_activate_feature_sync(void *arg, dmu_tx_t *tx) +{ + zfs_activate_feature_arg *zlefa = arg; + spa_feature_t f = zlefa->feature; + + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dataset_t *ds = NULL; + + ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN); + VERIFY0(dsl_dataset_hold(dp, zlefa->name, FTAG, &ds)); + if (dsl_dataset_feature_is_active(ds, f) != B_TRUE) { + ds->ds_feature_activation[f] = (void *)B_TRUE; + dsl_dataset_activate_feature(ds->ds_object, f, + ds->ds_feature_activation[f], tx); + ds->ds_feature[f] = ds->ds_feature_activation[f]; + } + dsl_dataset_rele(ds, FTAG); +} + +static int +zfs_activate_feature(spa_t *spa, spa_feature_t feature) +{ + zfs_activate_feature_arg zlefa = { + .name = spa_name(spa), + .feature = feature, + }; + + ASSERT(spa_feature_is_enabled(spa, feature)); + if (spa_feature_is_active(spa, feature)) + return (0); + + /* + * We need to wait for the activation to sync out before we continue + * with the rename operation, so that when we create the ZIL entry the + * dataset already has the feature activated. + */ + return (dsl_sync_task(spa_name(spa), zfs_activate_feature_check, + zfs_activate_feature_sync, &zlefa, 0, + ZFS_SPACE_CHECK_EXTRA_RESERVED)); +} + typedef struct zfs_zlock { krwlock_t *zl_rwlock; /* lock we acquired */ znode_t *zl_znode; /* znode we held */ @@ -2703,6 +2768,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) * tnm - New entry name. * cr - credentials of caller. * flags - case flags + * rflags - RENAME_* flags * * RETURN: 0 on success, error code on failure. * @@ -2712,7 +2778,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) /*ARGSUSED*/ int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, - cred_t *cr, int flags) + cred_t *cr, int flags, uint64_t rflags) { znode_t *szp, *tzp; zfsvfs_t *zfsvfs = ZTOZSB(sdzp); @@ -2720,19 +2786,49 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, zfs_dirlock_t *sdl, *tdl; dmu_tx_t *tx; zfs_zlock_t *zl; + spa_feature_t zil_feature = SPA_FEATURE_NONE; int cmp, serr, terr; int error = 0; int zflg = 0; boolean_t waited = B_FALSE; + /* Needed for whiteout inode creation. */ + vattr_t wo_vap; + boolean_t fuid_dirtied; + zfs_acl_ids_t acl_ids; + boolean_t have_acl = B_FALSE; + znode_t *wzp = NULL; + if (snm == NULL || tnm == NULL) return (SET_ERROR(EINVAL)); + if (rflags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) + return (SET_ERROR(EINVAL)); + + /* Already checked by Linux VFS, but just to make sure. */ + if (rflags & RENAME_EXCHANGE && + (rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT))) + return (SET_ERROR(EINVAL)); + ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(sdzp); + ZFS_VERIFY_ZP(tdzp); zilog = zfsvfs->z_log; - ZFS_VERIFY_ZP(tdzp); + switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) { + case RENAME_EXCHANGE: + zil_feature = SPA_FEATURE_RENAME_EXCHANGE; + break; + case RENAME_WHITEOUT: + zil_feature = SPA_FEATURE_RENAME_WHITEOUT; + break; + } + + if (zil_feature != SPA_FEATURE_NONE && + !spa_feature_is_enabled(zfsvfs->z_os->os_spa, zil_feature)) { + ZFS_EXIT(zfsvfs); + return (SET_ERROR(EINVAL)); + } /* * We check i_sb because snapshots and the ctldir must have different @@ -2901,7 +2997,6 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, * Note that if target and source are the same, this can be * done in a single check. */ - if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) goto out; @@ -2918,15 +3013,21 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, * Does target exist? */ if (tzp) { + if (rflags & RENAME_NOREPLACE) { + error = SET_ERROR(EEXIST); + goto out; + } /* - * Source and target must be the same type. + * Source and target must be the same type (unless exchanging). */ - boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0; - boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0; + if (!(rflags & RENAME_EXCHANGE)) { + boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0; + boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0; - if (s_is_dir != t_is_dir) { - error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR); - goto out; + if (s_is_dir != t_is_dir) { + error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR); + goto out; + } } /* * POSIX dictates that when the source and target @@ -2937,12 +3038,60 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, error = 0; goto out; } + } else if (rflags & RENAME_EXCHANGE) { + /* Target must exist for RENAME_EXCHANGE. */ + error = SET_ERROR(ENOENT); + goto out; + } + + /* Set up inode creation for RENAME_WHITEOUT. */ + if (rflags & RENAME_WHITEOUT) { + /* + * Whiteout files are not regular files or directories, so to + * match zfs_create() we do not inherit the project id. + */ + uint64_t wo_projid = ZFS_DEFAULT_PROJID; + + error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr); + if (error) + goto out; + + zpl_vap_init(&wo_vap, ZTOI(sdzp), S_IFCHR, cr); + /* Can't use of makedevice() here, so hard-code it. */ + wo_vap.va_rdev = 0; + + if (!have_acl) { + error = zfs_acl_ids_create(sdzp, 0, &wo_vap, cr, NULL, + &acl_ids); + if (error) + goto out; + have_acl = B_TRUE; + } + + if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) { + error = SET_ERROR(EDQUOT); + goto out; + } + } + + /* + * Before the dmu_tx is created, activate the RENAME_* feature flags. + * We want to do this as late as possible, to avoid activating the + * feature if the operation is likely to fail, but we have to do it + * before dmu_tx_assign() because we need the feature flag activation + * to sync out before we create the ZIL entry. + */ + if (zil_feature != SPA_FEATURE_NONE) { + error = zfs_activate_feature(zfsvfs->z_os->os_spa, zil_feature); + if (error) + goto out; } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); - dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); + dmu_tx_hold_zap(tx, sdzp->z_id, + (rflags & RENAME_EXCHANGE) ? TRUE : FALSE, snm); dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); if (sdzp != tdzp) { dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); @@ -2952,7 +3101,21 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, tzp); } + if (rflags & RENAME_WHITEOUT) { + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm); + dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); + if (!zfsvfs->z_use_sa && + acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } + } + fuid_dirtied = zfsvfs->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); zfs_sa_upgrade_txholds(tx, szp); dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); @@ -2991,7 +3154,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), (void *)&szp->z_pflags, sizeof (uint64_t), tx); - ASSERT0(error); + VERIFY0(error); error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); if (error) @@ -3001,13 +3164,30 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, * Unlink the target. */ if (tzp) { - error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); + int tzflg = zflg; + + if (rflags & RENAME_EXCHANGE) { + /* This inode will be re-linked soon. */ + tzflg |= ZRENAMING; + + tzp->z_pflags |= ZFS_AV_MODIFIED; + if (sdzp->z_pflags & ZFS_PROJINHERIT) + tzp->z_pflags |= ZFS_PROJINHERIT; + + error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + (void *)&tzp->z_pflags, sizeof (uint64_t), tx); + ASSERT0(error); + } + error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL); if (error) goto commit_link_szp; } /* - * Create a new link at the target. + * Create the new target links: + * * We always link the target. + * * RENAME_EXCHANGE: Link the old target to the source. + * * RENAME_WHITEOUT: Create a whiteout inode in-place of the source. */ error = zfs_link_create(tdl, szp, tx, ZRENAMING); if (error) { @@ -3020,18 +3200,55 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, goto commit_link_tzp; } - zfs_log_rename(zilog, tx, TX_RENAME | - (flags & FIGNORECASE ? TX_CI : 0), sdzp, - sdl->dl_name, tdzp, tdl->dl_name, szp); + switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) { + case RENAME_EXCHANGE: + error = zfs_link_create(sdl, tzp, tx, ZRENAMING); + /* + * The same argument as zfs_link_create() failing for + * szp applies here, since the source directory must + * have had an entry we are replacing. + */ + ASSERT0(error); + if (error) + goto commit_unlink_td_szp; + break; + case RENAME_WHITEOUT: + zfs_mknode(sdzp, &wo_vap, tx, cr, 0, &wzp, &acl_ids); + error = zfs_link_create(sdl, wzp, tx, ZNEW); + if (error) { + zfs_znode_delete(wzp, tx); + remove_inode_hash(ZTOI(wzp)); + goto commit_unlink_td_szp; + } + break; + } + + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + + switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) { + case RENAME_EXCHANGE: + zfs_log_rename_exchange(zilog, tx, + (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, + tdzp, tdl->dl_name, szp); + break; + case RENAME_WHITEOUT: + zfs_log_rename_whiteout(zilog, tx, + (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, + tdzp, tdl->dl_name, szp); + break; + default: + ASSERT0(rflags & ~RENAME_NOREPLACE); + zfs_log_rename(zilog, tx, (flags & FIGNORECASE ? TX_CI : 0), + sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); + break; + } commit: dmu_tx_commit(tx); out: - if (zl != NULL) - zfs_rename_unlock(&zl); - - zfs_dirent_unlock(sdl); - zfs_dirent_unlock(tdl); + if (have_acl) + zfs_acl_ids_free(&acl_ids); zfs_znode_update_vfs(sdzp); if (sdzp == tdzp) @@ -3042,11 +3259,21 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, zfs_znode_update_vfs(szp); zrele(szp); + if (wzp) { + zfs_znode_update_vfs(wzp); + zrele(wzp); + } if (tzp) { zfs_znode_update_vfs(tzp); zrele(tzp); } + if (zl != NULL) + zfs_rename_unlock(&zl); + + zfs_dirent_unlock(sdl); + zfs_dirent_unlock(tdl); + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); @@ -3057,23 +3284,31 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, * Clean-up path for broken link state. * * At this point we are in a (very) bad state, so we need to do our - * best to correct the state. In particular, the nlink of szp is wrong - * because we were destroying and creating links with ZRENAMING. + * best to correct the state. In particular, all of the nlinks are + * wrong because we were destroying and creating links with ZRENAMING. + * + * In some form, all of these operations have to resolve the state: + * + * * link_destroy() *must* succeed. Fortunately, this is very likely + * since we only just created it. * - * link_create()s are allowed to fail (though they shouldn't because we - * only just unlinked them and are putting the entries back during - * clean-up). But if they fail, we can just forcefully drop the nlink - * value to (at the very least) avoid broken nlink values -- though in - * the case of non-empty directories we will have to panic. + * * link_create()s are allowed to fail (though they shouldn't because + * we only just unlinked them and are putting the entries back + * during clean-up). But if they fail, we can just forcefully drop + * the nlink value to (at the very least) avoid broken nlink values + * -- though in the case of non-empty directories we will have to + * panic (otherwise we'd have a leaked directory with a broken ..). */ +commit_unlink_td_szp: + VERIFY0(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL)); commit_link_tzp: if (tzp) { if (zfs_link_create(tdl, tzp, tx, ZRENAMING)) - VERIFY3U(zfs_drop_nlink(tzp, tx, NULL), ==, 0); + VERIFY0(zfs_drop_nlink(tzp, tx, NULL)); } commit_link_szp: if (zfs_link_create(sdl, szp, tx, ZRENAMING)) - VERIFY3U(zfs_drop_nlink(szp, tx, NULL), ==, 0); + VERIFY0(zfs_drop_nlink(szp, tx, NULL)); goto commit; } diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index 500d5306a8b1..4609555334b7 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -474,24 +474,20 @@ static int #ifdef HAVE_IOPS_RENAME_USERNS zpl_rename2(struct user_namespace *user_ns, struct inode *sdip, struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry, - unsigned int flags) + unsigned int rflags) #else zpl_rename2(struct inode *sdip, struct dentry *sdentry, - struct inode *tdip, struct dentry *tdentry, unsigned int flags) + struct inode *tdip, struct dentry *tdentry, unsigned int rflags) #endif { cred_t *cr = CRED(); int error; fstrans_cookie_t cookie; - /* We don't have renameat2(2) support */ - if (flags) - return (-EINVAL); - crhold(cr); cookie = spl_fstrans_mark(); error = -zfs_rename(ITOZ(sdip), dname(sdentry), ITOZ(tdip), - dname(tdentry), cr, 0); + dname(tdentry), cr, 0, rflags); spl_fstrans_unmark(cookie); crfree(cr); ASSERT3S(error, <=, 0); @@ -499,7 +495,9 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, return (error); } -#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS) +#if !defined(HAVE_IOPS_RENAME_USERNS) && \ + !defined(HAVE_RENAME_WANTS_FLAGS) && \ + !defined(HAVE_RENAME2) static int zpl_rename(struct inode *sdip, struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry) @@ -725,7 +723,9 @@ const struct inode_operations zpl_dir_inode_operations = { .mkdir = zpl_mkdir, .rmdir = zpl_rmdir, .mknod = zpl_mknod, -#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) +#ifdef HAVE_RENAME2 + .rename2 = zpl_rename2, +#elif defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS) .rename = zpl_rename2, #else .rename = zpl_rename, diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index fc0e09605eef..ad759c033b8b 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -598,6 +598,31 @@ zpool_feature_init(void) zfeature_register(SPA_FEATURE_DRAID, "org.openzfs:draid", "draid", "Support for distributed spare RAID", ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL); + +#ifdef __linux__ + { + static const spa_feature_t rename_exchange_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_RENAME_EXCHANGE, + "org.openzfs:rename_exchange", "rename_exchange", + "Support for renameat2(RENAME_EXCHANGE).", + ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET, + ZFEATURE_TYPE_BOOLEAN, rename_exchange_deps); + } + { + static const spa_feature_t rename_whiteout_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_RENAME_WHITEOUT, + "org.openzfs:rename_whiteout", "rename_whiteout", + "Support for renameat2(RENAME_WHITEOUT).", + ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET, + ZFEATURE_TYPE_BOOLEAN, rename_whiteout_deps); + } +#endif } #if defined(_KERNEL) diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 9e52bed77a61..460e97993699 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -45,6 +45,7 @@ #include #include #include +#include /* * These zfs_log_* functions must be called within a dmu tx, in one @@ -495,11 +496,8 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, zil_itx_assign(zilog, itx, tx); } -/* - * Handles TX_RENAME transactions. - */ -void -zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, +static void +do_zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp) { itx_t *itx; @@ -521,6 +519,53 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, zil_itx_assign(zilog, itx, tx); } +/* + * Handles TX_RENAME transactions. + */ +void +zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, + const char *sname, znode_t *tdzp, const char *dname, znode_t *szp) +{ + txtype |= TX_RENAME; + do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); +} + +/* + * Handles TX_RENAME_EXCHANGE transactions. + */ +void +zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, + znode_t *szp) +{ + spa_t *spa = zilog->zl_spa; + + /* zfs_rename must have already activated this feature. */ + VERIFY(spa_feature_is_enabled(spa, SPA_FEATURE_RENAME_EXCHANGE)); + VERIFY(spa_feature_is_active(spa, SPA_FEATURE_RENAME_EXCHANGE)); + + txtype |= TX_RENAME_EXCHANGE; + do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); +} + +/* + * Handles TX_RENAME_WHITEOUT transactions. + */ +void +zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, + znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, + znode_t *szp) +{ + spa_t *spa = zilog->zl_spa; + + /* zfs_rename must have already activated this feature. */ + VERIFY(spa_feature_is_enabled(spa, SPA_FEATURE_RENAME_WHITEOUT)); + VERIFY(spa_feature_is_active(spa, SPA_FEATURE_RENAME_WHITEOUT)); + + txtype |= TX_RENAME_WHITEOUT; + do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); +} + /* * zfs_log_write() handles TX_WRITE transactions. The specified callback is * called as soon as the write is on stable storage (be it via a DMU sync or a diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index f3d209f1fbdb..59c76a2ee94b 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -639,19 +639,33 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) } static int -zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) +do_zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap, + uint64_t rflags) { zfsvfs_t *zfsvfs = arg1; lr_rename_t *lr = arg2; char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ char *tname = sname + strlen(sname) + 1; znode_t *sdzp, *tdzp; - int error; - int vflg = 0; + int error, vflg = 0; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); + /* Only Linux currently supports RENAME_* flags. */ +#ifdef __linux__ + VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT)); + + IMPLY(rflags & RENAME_EXCHANGE, + spa_feature_is_active(zfsvfs->z_os->os_spa, + SPA_FEATURE_RENAME_EXCHANGE)); + IMPLY(rflags & RENAME_WHITEOUT, + spa_feature_is_active(zfsvfs->z_os->os_spa, + SPA_FEATURE_RENAME_WHITEOUT)); +#else + VERIFY0(rflags); +#endif + if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) return (error); @@ -663,13 +677,39 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; - error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg); + error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags); zrele(tdzp); zrele(sdzp); return (error); } +static int +zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) +{ + return (do_zfs_replay_rename(arg1, arg2, byteswap, 0)); +} + +static int +zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap) +{ +#ifdef __linux__ + return (do_zfs_replay_rename(arg1, arg2, byteswap, RENAME_EXCHANGE)); +#else + return (SET_ERROR(ENOTSUP)); +#endif +} + +static int +zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) +{ +#ifdef __linux__ + return (do_zfs_replay_rename(arg1, arg2, byteswap, RENAME_WHITEOUT)); +#else + return (SET_ERROR(ENOTSUP)); +#endif +} + static int zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap) { @@ -987,4 +1027,7 @@ zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { zfs_replay_create, /* TX_MKDIR_ATTR */ zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ zfs_replay_write2, /* TX_WRITE2 */ + zfs_replay_error, /* TX_SETSAXATTR */ + zfs_replay_rename_exchange, /* TX_RENAME_EXCHANGE */ + zfs_replay_rename_whiteout, /* TX_RENAME_WHITEOUT */ }; diff --git a/module/zfs/zil.c b/module/zfs/zil.c index b9f177daee53..652a2ea5cd89 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -3044,6 +3044,37 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid) zil_free_commit_waiter(zcw); } +/* + * Called as part of zil_sync once the replay succeeded successfully. + * + * For backwards-compatibility reasons, brand new TX_* records need to have a + * feature flag associated with them so that a system will not be confused when + * importing a pool with unknown TX_* types. These features are only activated + * while the ZIL contains a log entry containing the new TX_* type, and need to + * be deactivated when the ZIL has been applied and cleared (so that the pool + * can be imported onto other systems after a clean 'zfs export'). + */ +static void +zil_sync_deactivate_features(zilog_t *zilog, dmu_tx_t *tx) +{ + dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); + + /* + * A destroyed ZIL chain can't contain any of the TX_* records which + * are gated by these features. So, deactivate the features. They'll be + * re-activated if the feature is needed again. + */ + spa_feature_t tx_features[] = { + SPA_FEATURE_RENAME_EXCHANGE, + SPA_FEATURE_RENAME_WHITEOUT, + }; + for (int i = 0; i < ARRAY_SIZE(tx_features); i++) { + spa_feature_t f = tx_features[i]; + if (dsl_dataset_feature_is_active(ds, f)) + dsl_dataset_deactivate_feature(ds, f, tx); + } +} + /* * Called in syncing context to free committed log blocks and update log header. */ @@ -3092,6 +3123,8 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) */ zil_init_log_chain(zilog, &blk); zh->zh_log = blk; + } else { + zil_sync_deactivate_features(zilog, tx); } } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 7d141a12288b..c678d7a9a6f3 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -558,6 +558,9 @@ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { zvol_replay_err, /* TX_MKDIR_ATTR */ zvol_replay_err, /* TX_MKDIR_ACL_ATTR */ zvol_replay_err, /* TX_WRITE2 */ + zvol_replay_err, /* TX_SETSAXATTR */ + zvol_replay_err, /* TX_RENAME_EXCHANGE */ + zvol_replay_err, /* TX_RENAME_WHITEOUT */ }; /* diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index 6075e1f1abbd..0e1572f55909 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -104,3 +104,10 @@ if ! is_freebsd; then "feature@edonr" ) fi + +if is_linux; then + properties+=( + "feature@rename_exchange" + "feature@rename_whiteout" + ) +fi From 1853ccd536db503d0da81bb945b50953d7c20f14 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Thu, 7 Nov 2019 12:07:08 -0500 Subject: [PATCH 5/8] renameat2: add EL7 compatibility handling EL7 has backported renameat2(2) support, but this was done by creating a wrapper struct around the inode_operations struct and adding the rename2 callback to the wrapping struct. The semantics are the same as the upstream callback, so just detect and use the wrapper. Since we only need this wrapper for rename2 (and renaming is a directory operation) we only use the wrapper for the directory iops struct. Signed-off-by: Pavel Snajdr Signed-off-by: Aleksa Sarai --- config/kernel-rename.m4 | 27 +++++++++++++++++++++++++++ include/os/linux/zfs/sys/zpl.h | 4 ++++ module/os/linux/zfs/zfs_znode.c | 5 +++++ module/os/linux/zfs/zpl_inode.c | 9 +++++++++ 4 files changed, 45 insertions(+) diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4 index 8d06f57cb13a..a2b0800ab4d2 100644 --- a/config/kernel-rename.m4 +++ b/config/kernel-rename.m4 @@ -36,6 +36,24 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [ }; ],[]) + dnl # + dnl # EL7 compatibility + dnl # + dnl # EL7 has backported renameat2 support, but it's done by defining a + dnl # separate iops wrapper structure that takes the .renameat2 function. + dnl # + ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [ + #include + int rename2_fn(struct inode *sip, struct dentry *sdp, + struct inode *tip, struct dentry *tdp, + unsigned int flags) { return 0; } + + static const struct inode_operations_wrapper + iops __attribute__ ((unused)) = { + .rename2 = rename2_fn, + }; + ],[]) + dnl # dnl # 5.12 API change, dnl # @@ -78,6 +96,15 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [ [iops->rename() wants flags]) ],[ AC_MSG_RESULT(no) + + AC_MSG_CHECKING([whether struct inode_operations_wrapper takes .rename2()]) + ZFS_LINUX_TEST_RESULT([dir_inode_operations_wrapper_rename2], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_RENAME2_OPERATIONS_WRAPPER, 1, + [struct inode_operations_wrapper takes .rename2()]) + ],[ + AC_MSG_RESULT(no) + ]) ]) ]) ]) diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index 9e5eede44073..e4fbcff3b853 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -42,7 +42,11 @@ extern void zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr); extern const struct inode_operations zpl_inode_operations; +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER +extern const struct inode_operations_wrapper zpl_dir_inode_operations; +#else extern const struct inode_operations zpl_dir_inode_operations; +#endif extern const struct inode_operations zpl_symlink_inode_operations; extern const struct inode_operations zpl_special_inode_operations; extern const struct address_space_operations zpl_address_space_operations; diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index cd80049df142..0a697b1adb76 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -414,7 +414,12 @@ zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) break; case S_IFDIR: +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER + ip->i_flags |= S_IOPS_WRAPPER; + ip->i_op = &zpl_dir_inode_operations.ops; +#else ip->i_op = &zpl_dir_inode_operations; +#endif ip->i_fop = &zpl_dir_file_operations; ITOZ(ip)->z_zn_prefetch = B_TRUE; break; diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index 4609555334b7..9e34c6e37a04 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -714,7 +714,12 @@ const struct inode_operations zpl_inode_operations = { .permission = zpl_permission, }; +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER +const struct inode_operations_wrapper zpl_dir_inode_operations = { + .ops = { +#else const struct inode_operations zpl_dir_inode_operations = { +#endif .create = zpl_create, .lookup = zpl_lookup, .link = zpl_link, @@ -748,6 +753,10 @@ const struct inode_operations zpl_dir_inode_operations = { .get_acl = zpl_get_acl, #endif /* CONFIG_FS_POSIX_ACL */ .permission = zpl_permission, +#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER + }, + .rename2 = zpl_rename2, +#endif }; const struct inode_operations zpl_symlink_inode_operations = { From deabe8b260a3f305b32a478bd28aef106817eb30 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 14 Oct 2019 22:37:03 +0200 Subject: [PATCH 6/8] tests: add tests for renameat2(2) flags Since mv(1) doesn't expose the RENAME_* flags we need to have our own variation in the tests/ tree. The tests are fairly obvious functional tests, though in the future (once we solve the d_revalidate issue) we might also add a full-stack overlayfs integration test. Signed-off-by: Aleksa Sarai --- configure.ac | 1 + tests/runfiles/linux.run | 4 + tests/test-runner/bin/zts-report.py.in | 10 +- tests/zfs-tests/cmd/Makefile.am | 1 + tests/zfs-tests/cmd/renameat2/.gitignore | 1 + tests/zfs-tests/cmd/renameat2/Makefile.am | 6 + tests/zfs-tests/cmd/renameat2/renameat2.c | 128 ++++++++++++++++++ tests/zfs-tests/include/commands.cfg | 1 + .../tests/functional/renameat2/Makefile.am | 7 + .../tests/functional/renameat2/cleanup.ksh | 34 +++++ .../renameat2/renameat2_exchange.ksh | 71 ++++++++++ .../renameat2/renameat2_noreplace.ksh | 58 ++++++++ .../renameat2/renameat2_whiteout.ksh | 59 ++++++++ .../tests/functional/renameat2/setup.ksh | 37 +++++ .../tests/functional/slog/slog_017_neg.ksh | 128 ++++++++++++++++++ .../functional/slog/slog_replay_fs_001.ksh | 62 +++++++++ 16 files changed, 606 insertions(+), 2 deletions(-) create mode 100644 tests/zfs-tests/cmd/renameat2/.gitignore create mode 100644 tests/zfs-tests/cmd/renameat2/Makefile.am create mode 100644 tests/zfs-tests/cmd/renameat2/renameat2.c create mode 100644 tests/zfs-tests/tests/functional/renameat2/Makefile.am create mode 100755 tests/zfs-tests/tests/functional/renameat2/cleanup.ksh create mode 100755 tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh create mode 100755 tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh create mode 100755 tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh create mode 100755 tests/zfs-tests/tests/functional/renameat2/setup.ksh create mode 100755 tests/zfs-tests/tests/functional/slog/slog_017_neg.ksh diff --git a/configure.ac b/configure.ac index c1c43437da16..d84cf7bff662 100644 --- a/configure.ac +++ b/configure.ac @@ -234,6 +234,7 @@ AC_CONFIG_FILES([ tests/zfs-tests/cmd/readmmap/Makefile tests/zfs-tests/cmd/read_dos_attributes/Makefile tests/zfs-tests/cmd/rename_dir/Makefile + tests/zfs-tests/cmd/renameat2/Makefile tests/zfs-tests/cmd/rm_lnkcnt_zero_file/Makefile tests/zfs-tests/cmd/send_doall/Makefile tests/zfs-tests/cmd/stride_dd/Makefile diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index a94472d12615..9afdd914ce23 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -157,6 +157,10 @@ tags = ['functional', 'projectquota'] tests = ['read_dos_attrs_001', 'write_dos_attrs_001'] tags = ['functional', 'dos_attributes'] +[tests/functional/renameat2:Linux] +tests = ['renameat2_noreplace', 'renameat2_exchange', 'renameat2_whiteout'] +tags = ['functional', 'renameat2'] + [tests/functional/rsend:Linux] tests = ['send_realloc_dnode_size', 'send_encrypted_files'] tags = ['functional', 'rsend'] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 71b0cc8d6483..3e3a66669df8 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -70,6 +70,11 @@ exec_reason = 'Test user execute permissions required for utilities' python_reason = 'Python v3.5 or newer required' python_deps_reason = 'Python modules missing: python-cffi' +# +# Some tests require that the kernel supports renameat2 syscall. +# +renameat2_reason = 'Kernel renameat2 support required' + # # Some tests require the O_TMPFILE flag which was first introduced in the # 3.11 kernel. @@ -245,8 +250,9 @@ maybe = { 'pool_checkpoint/checkpoint_discard_busy': ['FAIL', '11946'], 'projectquota/setup': ['SKIP', exec_reason], 'removal/removal_condense_export': ['FAIL', known_reason], - 'reservation/reservation_008_pos': ['FAIL', '7741'], - 'reservation/reservation_018_pos': ['FAIL', '5642'], + 'renameat2/setup': ['SKIP', renameat2_reason], + 'reservation/reservation_008_pos': ['FAIL', 7741], + 'reservation/reservation_018_pos': ['FAIL', 5642], 'snapshot/clone_001_pos': ['FAIL', known_reason], 'snapshot/snapshot_009_pos': ['FAIL', '7961'], 'snapshot/snapshot_010_pos': ['FAIL', '7961'], diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 88d8c8cf08b2..c078899369fb 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -25,6 +25,7 @@ SUBDIRS = \ randwritecomp \ readmmap \ rename_dir \ + renameat2 \ rm_lnkcnt_zero_file \ send_doall \ stride_dd \ diff --git a/tests/zfs-tests/cmd/renameat2/.gitignore b/tests/zfs-tests/cmd/renameat2/.gitignore new file mode 100644 index 000000000000..1d540598db9b --- /dev/null +++ b/tests/zfs-tests/cmd/renameat2/.gitignore @@ -0,0 +1 @@ +/renameat2 diff --git a/tests/zfs-tests/cmd/renameat2/Makefile.am b/tests/zfs-tests/cmd/renameat2/Makefile.am new file mode 100644 index 000000000000..0d68b02b0d8e --- /dev/null +++ b/tests/zfs-tests/cmd/renameat2/Makefile.am @@ -0,0 +1,6 @@ +include $(top_srcdir)/config/Rules.am + +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin + +pkgexec_PROGRAMS = renameat2 +renameat2_SOURCES = renameat2.c diff --git a/tests/zfs-tests/cmd/renameat2/renameat2.c b/tests/zfs-tests/cmd/renameat2/renameat2.c new file mode 100644 index 000000000000..a9d0a8b20adf --- /dev/null +++ b/tests/zfs-tests/cmd/renameat2/renameat2.c @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: CDDL-1.0 OR MPL-2.0 */ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (C) 2019 Aleksa Sarai + * Copyright (C) 2019 SUSE LLC + */ + +/* + * mv(1) doesn't currently support RENAME_{EXCHANGE,WHITEOUT} so this is a very + * simple renameat2(2) wrapper for the OpenZFS self-tests. + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifndef SYS_renameat2 +#ifdef __NR_renameat2 +#define SYS_renameat2 __NR_renameat2 +#elif defined(__x86_64__) +#define SYS_renameat2 316 +#elif defined(__i386__) +#define SYS_renameat2 353 +#elif defined(__arm__) || defined(__aarch64__) +#define SYS_renameat2 382 +#else +#error "SYS_renameat2 not known for this architecture." +#endif +#endif + +#ifndef RENAME_NOREPLACE +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#endif +#ifndef RENAME_EXCHANGE +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#endif +#ifndef RENAME_WHITEOUT +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +#endif + +/* glibc doesn't provide renameat2 wrapper, let's use our own */ +static int +sys_renameat2(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, unsigned int flags) +{ + int ret = syscall(SYS_renameat2, olddirfd, oldpath, newdirfd, newpath, + flags); + return ((ret < 0) ? -errno : ret); +} + +static void +usage(void) +{ + fprintf(stderr, "usage: renameat2 [-Cnwx] src dst\n"); + exit(1); +} + +static void +check(void) +{ + int err = sys_renameat2(AT_FDCWD, ".", AT_FDCWD, ".", RENAME_EXCHANGE); + exit(err == -ENOSYS); +} + +int +main(int argc, char **argv) +{ + char *src, *dst; + int ch, err; + unsigned int flags = 0; + + while ((ch = getopt(argc, argv, "Cnwx")) >= 0) { + switch (ch) { + case 'C': + check(); + break; + case 'n': + flags |= RENAME_NOREPLACE; + break; + case 'w': + flags |= RENAME_WHITEOUT; + break; + case 'x': + flags |= RENAME_EXCHANGE; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (argc != 2) + usage(); + src = argv[0]; + dst = argv[1]; + + err = sys_renameat2(AT_FDCWD, src, AT_FDCWD, dst, flags); + if (err < 0) + fprintf(stderr, "renameat2: %s", strerror(-err)); + return (err != 0); +} diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index f0da78628d69..850981046448 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -217,6 +217,7 @@ export ZFSTEST_FILES='badsend randwritecomp readmmap read_dos_attributes + renameat2 rename_dir rm_lnkcnt_zero_file send_doall diff --git a/tests/zfs-tests/tests/functional/renameat2/Makefile.am b/tests/zfs-tests/tests/functional/renameat2/Makefile.am new file mode 100644 index 000000000000..bd8d6c9d68bf --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/Makefile.am @@ -0,0 +1,7 @@ +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/renameat2 +dist_pkgdata_SCRIPTS = \ + setup.ksh \ + cleanup.ksh \ + renameat2_noreplace.ksh \ + renameat2_exchange.ksh \ + renameat2_whiteout.ksh diff --git a/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh b/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh new file mode 100755 index 000000000000..3166bd6ec16e --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/cleanup.ksh @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh new file mode 100755 index 000000000000..4547494b0cd1 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh @@ -0,0 +1,71 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_EXCHANGE." +log_onexit cleanup + +check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "enabled" + +cd $TESTDIR +echo "foo" > foo +echo "bar" > bar + +# Self-exchange is a no-op. +log_must renameat2 -x foo foo +log_must grep '^foo$' foo + +# Basic exchange. +log_must renameat2 -x foo bar +check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "active" +log_must grep '^bar$' foo +log_must grep '^foo$' bar + +# And exchange back. +log_must renameat2 -x foo bar +check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "active" +log_must grep '^foo$' foo +log_must grep '^bar$' bar + +# Exchange with a bad path should fail. +log_mustnot renameat2 -x bar baz + +# The feature flag must remain active until a clean export. +check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "active" +log_must zpool export "$TESTPOOL1" +log_must zpool import "$TESTPOOL1" +check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "enabled" + +log_pass "ZFS supports RENAME_EXCHANGE as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh new file mode 100755 index 000000000000..272cdd99d841 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_NOREPLACE." +log_onexit cleanup + +check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "enabled" +check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "enabled" + +cd $TESTDIR +touch foo bar + +# Clobbers should always fail. +log_mustnot renameat2 -n foo foo +log_mustnot renameat2 -n foo bar +log_mustnot renameat2 -n bar foo + +# Regular renames should succeed. +log_must renameat2 -n bar baz + +# The other rename feature flags should not be active. +check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "enabled" +check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "enabled" + +log_pass "ZFS supports RENAME_NOREPLACE as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh new file mode 100755 index 000000000000..c2bf59fbf772 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + log_must rm -rf $TESTDIR/* +} + +log_assert "ZFS supports RENAME_WHITEOUT." +log_onexit cleanup + +check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "enabled" + +cd $TESTDIR +echo "whiteout" > whiteout + +# Straight-forward rename-with-whiteout. +log_must renameat2 -w whiteout new +check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "active" +# Check new file. +log_must grep '^whiteout$' new +# Check that the whiteout is actually a {0,0} char device. +log_must grep '^character special file:0:0$' <<<"$(stat -c '%F:%t:%T' whiteout)" + +# The feature flag must remain active until a clean export. +check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "active" +log_must zpool export "$TESTPOOL1" +log_must zpool import "$TESTPOOL1" +check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "enabled" + +log_pass "ZFS supports RENAME_WHITEOUT as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/setup.ksh b/tests/zfs-tests/tests/functional/renameat2/setup.ksh new file mode 100755 index 000000000000..b8c26d5ba062 --- /dev/null +++ b/tests/zfs-tests/tests/functional/renameat2/setup.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2019 Aleksa Sarai +# Copyright (C) 2019 SUSE LLC +# + +. $STF_SUITE/include/libtest.shlib + +if ! is_linux ; then + log_unsupported "renameat2 is linux-only" +elif ! renameat2 -C ; then + log_unsupported "renameat2 not supported on this (pre-3.15) linux kernel" +fi + +DISK=${DISKS%% *} +default_setup $DISK diff --git a/tests/zfs-tests/tests/functional/slog/slog_017_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_017_neg.ksh new file mode 100755 index 000000000000..a951b1e734c4 --- /dev/null +++ b/tests/zfs-tests/tests/functional/slog/slog_017_neg.ksh @@ -0,0 +1,128 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (C) 2022 Aleksa Sarai +# Copyright (C) 2022 SUSE LLC +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +# +# DESCRIPTION: +# Verify that features activated after "zpool freeze" are not actually +# activated. This is actually an unwanted feature of spa_freeze() and +# resulted in certain workarounds for feature@rename_{exchange,whiteout} in +# slog_replay_fs_001. The purpose of this test is to document the behaviour +# so that if it is ever fixed we can remove the slog_replay_fs_001 +# workaround. +# +# This test is based on slog_replay_fs_001. +# +# STRATEGY: +# 1. Create an empty file system (TESTFS) +# 2. Enable the rename_* features. +# 3. Freeze TESTFS +# 4. Trigger the activation of the rename_* features. +# 5. Verify that the features are not active but the ZIL contains new +# TX_RENAME_* entries. +# +# Critically we must not cause the ZIL to be replayed because that will cause +# an assertion to fail (because the feature is not activated and there is an +# assertion verifying this on zfs_replay). +# + +verify_runnable "global" + +log_assert "Activation of features during spa_freeze silently fails." +log_onexit cleanup +log_must setup + +if ! is_linux ; then + log_unsupported "renameat2 is linux-only" +elif ! renameat2 -C ; then + log_unsupported "renameat2 not supported on this (pre-3.15) linux kernel" +fi + +# +# 1. Create an empty file system (TESTFS) +# +log_must zpool create $TESTPOOL $VDEV log mirror $LDEV +log_must zfs set compression=on $TESTPOOL +log_must zfs create $TESTPOOL/$TESTFS + +# +# 2. Enable the rename_* features. +# +# RENAME_EXCHANGE +zfs set "feature@rename_exchange=enabled" "$TESTPOOL" +# RENAME_WHITEOUT +zfs set "feature@rename_whiteout=enabled" "$TESTPOOL" + +# Make sure the features are *enabled*, not active. +zpool sync "$TESTPOOL" +check_feature_flag "feature@rename_exchange" "$TESTPOOL" "enabled" +check_feature_flag "feature@rename_whiteout" "$TESTPOOL" "enabled" + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ + conv=fdatasync,fsync bs=1 count=1 + +# +# 3. Freeze TESTFS +# +log_must zpool freeze $TESTPOOL + +# +# 4. Trigger the activation of the rename_* features. +# + +# TX_RENAME_EXCHANGE +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-a bs=1k count=1 +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-b bs=1k count=1 +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-c bs=1k count=1 +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-d bs=1k count=1 +# rotate the files around +log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{a,b} +log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{b,c} +log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{c,a} +# exchange same path +log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{d,d} + +# TX_RENAME_WHITEOUT +log_must mkfile 1k /$TESTPOOL/$TESTFS/whiteout +log_must renameat2 -w /$TESTPOOL/$TESTFS/whiteout{,-moved} + +# +# 5. Verify that the features are not active but the ZIL contains new +# TX_RENAME_* entries. +# +check_feature_flag "feature@rename_exchange" "$TESTPOOL" "enabled" +check_feature_flag "feature@rename_whiteout" "$TESTPOOL" "enabled" +log_must eval "zdb -iv $TESTPOOL/$TESTFS | grep 'TX_RENAME_EXCHANGE'" +log_must eval "zdb -iv $TESTPOOL/$TESTFS | grep 'TX_RENAME_WHITEOUT'" + +log_pass "Activation of features during spa_freeze silently fails." diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index 0b78a099f0b9..f5aa08d0b1a2 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -69,6 +69,45 @@ log_must zpool create $TESTPOOL $VDEV log mirror $LDEV log_must zfs set compression=on $TESTPOOL log_must zfs create $TESTPOOL/$TESTFS +# +# Enable the rename_* features on Linux. These features are activated during +# zfs_rename() and thus by all rights should not be done before we freeze the +# pool, but due to how 'zpool freeze' is implemented, if the features are +# activated after the pool is freezed, the activations are not written to the +# pool. This results in assertion errors in test environments during ZIL replay +# (in real ZFS deployments this scenario doesn't happen). In order to make +# these features testable, activate the features before we freeze the pool. +# +# See slog_017_neg for what happens if we don't. +# +if ! is_linux ; then + log_note "renameat2 is linux-only" +elif ! renameat2 -C ; then + log_note "renameat2 not supported on this (pre-3.15) linux kernel" +else + # RENAME_EXCHANGE + zfs set "feature@rename_exchange=enabled" "$TESTPOOL" + check_feature_flag "feature@rename_exchange" "$TESTPOOL" "enabled" + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/preflight-xchg-a bs=1k count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/preflight-xchg-b bs=1k count=1 + log_must renameat2 -x /$TESTPOOL/$TESTFS/preflight-xchg-{a,b} + + # RENAME_WHITEOUT + zfs set "feature@rename_whiteout=enabled" "$TESTPOOL" + check_feature_flag "feature@rename_whiteout" "$TESTPOOL" "enabled" + log_must mkfile 1k /$TESTPOOL/$TESTFS/preflight-whiteout + log_must renameat2 -w /$TESTPOOL/$TESTFS/preflight-whiteout{,-moved} + + # Make sure the features are active. + zpool sync "$TESTPOOL" + check_feature_flag "feature@rename_exchange" "$TESTPOOL" "active" + check_feature_flag "feature@rename_whiteout" "$TESTPOOL" "active" + + # Clean up since these aren't used for the actual ZIL test. + rm /$TESTPOOL/$TESTFS/preflight-* +fi + + # # This dd command works around an issue where ZIL records aren't created # after freezing the pool unless a ZIL header already exists. Create a file @@ -175,6 +214,29 @@ log_must ln /$TESTPOOL/$TESTFS/link_and_unlink \ /$TESTPOOL/$TESTFS/link_and_unlink.link log_must rm /$TESTPOOL/$TESTFS/link_and_unlink.link +# We can't test RENAME_* flags without renameat2(2) support. +if ! is_linux ; then + log_note "renameat2 is linux-only" +elif ! renameat2 -C ; then + log_note "renameat2 not supported on this (pre-3.15) linux kernel" +else + # TX_RENAME_EXCHANGE + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-a bs=1k count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-b bs=1k count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-c bs=1k count=1 + log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-d bs=1k count=1 + # rotate the files around + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{a,b} + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{b,c} + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{c,a} + # exchange same path + log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{d,d} + + # TX_RENAME_WHITEOUT + log_must mkfile 1k /$TESTPOOL/$TESTFS/whiteout + log_must renameat2 -w /$TESTPOOL/$TESTFS/whiteout{,-moved} +fi + # # 4. Copy TESTFS to temporary location (TESTDIR/copy) # From 736141a4d6c1631a1dbf68d7b464644bd2c56936 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 17 May 2022 23:29:13 +1000 Subject: [PATCH 7/8] WIP: handle zfs_mknode issues with RENAME_WHITEOUT Signed-off-by: Aleksa Sarai --- include/os/freebsd/zfs/sys/zfs_vnops_os.h | 2 +- include/os/linux/spl/sys/sysmacros.h | 10 +++ include/os/linux/zfs/sys/zfs_vnops_os.h | 2 +- include/sys/zfs_znode.h | 6 +- include/sys/zil.h | 13 ++++ module/os/freebsd/zfs/zfs_vnops_os.c | 4 +- module/os/linux/zfs/zfs_vnops_os.c | 23 +++--- module/os/linux/zfs/zpl_inode.c | 12 ++- module/zfs/zfs_log.c | 47 +++++++++++- module/zfs/zfs_replay.c | 91 +++++++++++++++++++---- module/zfs/zil.c | 14 +++- 11 files changed, 187 insertions(+), 37 deletions(-) diff --git a/include/os/freebsd/zfs/sys/zfs_vnops_os.h b/include/os/freebsd/zfs/sys/zfs_vnops_os.h index cb1c97dc85ff..886fec007ca8 100644 --- a/include/os/freebsd/zfs/sys/zfs_vnops_os.h +++ b/include/os/freebsd/zfs/sys/zfs_vnops_os.h @@ -40,7 +40,7 @@ extern int zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags); extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr); extern int zfs_rename(znode_t *sdzp, const char *snm, znode_t *tdzp, - const char *tnm, cred_t *cr, int flags, uint64_t rflags); + const char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap); extern int zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap, const char *link, znode_t **zpp, cred_t *cr, int flags); extern int zfs_link(znode_t *tdzp, znode_t *sp, diff --git a/include/os/linux/spl/sys/sysmacros.h b/include/os/linux/spl/sys/sysmacros.h index 98d1ab1d7f8a..e14afcbe8117 100644 --- a/include/os/linux/spl/sys/sysmacros.h +++ b/include/os/linux/spl/sys/sysmacros.h @@ -121,6 +121,16 @@ extern uint32_t zone_get_hostid(void *zone); extern void spl_setup(void); extern void spl_cleanup(void); +/* + * Only handles the first 4096 majors and first 256 minors. We don't have a + * libc for the kernel module so we define this inline. + */ +static inline dev_t +makedev(unsigned int major, unsigned int minor) +{ + return ((major & 0xFFF) << 8) | (minor & 0xFF); +} + #define highbit(x) __fls(x) #define lowbit(x) __ffs(x) diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h index 436b60d56641..b2e06312f198 100644 --- a/include/os/linux/zfs/sys/zfs_vnops_os.h +++ b/include/os/linux/zfs/sys/zfs_vnops_os.h @@ -58,7 +58,7 @@ extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip, struct kstat *sp); extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr); extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, - char *tnm, cred_t *cr, int flags, uint64_t rflags); + char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap); extern int zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link, znode_t **zpp, cred_t *cr, int flags); extern int zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr); diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index c142bb22d631..84a18159424e 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -275,12 +275,12 @@ extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp); -extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, - uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, - const char *dname, znode_t *szp); extern void zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp); +extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, + uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, + const char *dname, znode_t *szp, znode_t *wzp); extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t len, int ioflag, zil_callback_t callback, void *callback_data); diff --git a/include/sys/zil.h b/include/sys/zil.h index 632a1773b846..0ee3eb154fe2 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -317,6 +317,19 @@ typedef struct { /* 2 strings: names of source and destination follow this */ } lr_rename_t; +typedef struct { + lr_rename_t lr_rename; /* common rename portion */ + /* members related to the whiteout file (based on lr_create_t) */ + uint64_t lr_wfoid; /* obj id of the new whiteout file */ + uint64_t lr_wmode; /* mode of object */ + uint64_t lr_wuid; /* uid of whiteout */ + uint64_t lr_wgid; /* gid of whiteout */ + uint64_t lr_wgen; /* generation (txg of creation) */ + uint64_t lr_wcrtime[2]; /* creation time */ + uint64_t lr_wrdev; /* always makedev(0, 0) */ + /* 2 strings: names of source and destination follow this */ +} lr_rename_whiteout_t; + typedef struct { lr_t lr_common; /* common portion of log record */ uint64_t lr_foid; /* file object to write */ diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 84a453829e6a..088ff701100b 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -3473,7 +3473,7 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, int zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, - cred_t *cr, int flags, uint64_t rflags) + cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap) { struct componentname scn, tcn; vnode_t *sdvp, *tdvp; @@ -3481,7 +3481,7 @@ zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname, int error; svp = tvp = NULL; - if (rflags != 0) + if (rflags != 0 || wo_vap != NULL) return (SET_ERROR(EINVAL)); sdvp = ZTOV(sdzp); diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 1b38865e9b1c..40b6ac3ec966 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -2769,6 +2769,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) * cr - credentials of caller. * flags - case flags * rflags - RENAME_* flags + * wa_vap - attributes for RENAME_WHITEOUT (must be a char 0:0). * * RETURN: 0 on success, error code on failure. * @@ -2778,7 +2779,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) /*ARGSUSED*/ int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, - cred_t *cr, int flags, uint64_t rflags) + cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap) { znode_t *szp, *tzp; zfsvfs_t *zfsvfs = ZTOZSB(sdzp); @@ -2792,7 +2793,6 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, int zflg = 0; boolean_t waited = B_FALSE; /* Needed for whiteout inode creation. */ - vattr_t wo_vap; boolean_t fuid_dirtied; zfs_acl_ids_t acl_ids; boolean_t have_acl = B_FALSE; @@ -2810,6 +2810,15 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, (rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT))) return (SET_ERROR(EINVAL)); + /* + * Make sure we only get wo_vap iff. RENAME_WHITEOUT and that it's the + * right kind of vattr_t for the whiteout file. These are set + * internally by ZFS so should never be incorrect. + */ + VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL); + VERIFY_IMPLY(wo_vap, wo_vap->va_mode == S_IFCHR); + VERIFY_IMPLY(wo_vap, wo_vap->va_rdev == makedevice(0, 0)); + ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(sdzp); ZFS_VERIFY_ZP(tdzp); @@ -3056,12 +3065,8 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, if (error) goto out; - zpl_vap_init(&wo_vap, ZTOI(sdzp), S_IFCHR, cr); - /* Can't use of makedevice() here, so hard-code it. */ - wo_vap.va_rdev = 0; - if (!have_acl) { - error = zfs_acl_ids_create(sdzp, 0, &wo_vap, cr, NULL, + error = zfs_acl_ids_create(sdzp, 0, wo_vap, cr, NULL, &acl_ids); if (error) goto out; @@ -3213,7 +3218,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, goto commit_unlink_td_szp; break; case RENAME_WHITEOUT: - zfs_mknode(sdzp, &wo_vap, tx, cr, 0, &wzp, &acl_ids); + zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids); error = zfs_link_create(sdl, wzp, tx, ZNEW); if (error) { zfs_znode_delete(wzp, tx); @@ -3235,7 +3240,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, case RENAME_WHITEOUT: zfs_log_rename_whiteout(zilog, tx, (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, - tdzp, tdl->dl_name, szp); + tdzp, tdl->dl_name, szp, wzp); break; default: ASSERT0(rflags & ~RENAME_NOREPLACE); diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c index 9e34c6e37a04..45aa2d7c7e9e 100644 --- a/module/os/linux/zfs/zpl_inode.c +++ b/module/os/linux/zfs/zpl_inode.c @@ -24,6 +24,7 @@ */ +#include #include #include #include @@ -481,14 +482,23 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry, #endif { cred_t *cr = CRED(); + vattr_t *wo_vap = NULL; int error; fstrans_cookie_t cookie; crhold(cr); + if (rflags & RENAME_WHITEOUT) { + wo_vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); + zpl_vap_init(wo_vap, sdip, S_IFCHR, cr); + wo_vap->va_rdev = makedevice(0, 0); + } + cookie = spl_fstrans_mark(); error = -zfs_rename(ITOZ(sdip), dname(sdentry), ITOZ(tdip), - dname(tdentry), cr, 0, rflags); + dname(tdentry), cr, 0, rflags, wo_vap); spl_fstrans_unmark(cookie); + if (wo_vap) + kmem_free(wo_vap, sizeof (vattr_t)); crfree(cr); ASSERT3S(error, <=, 0); diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 460e97993699..053376b04a24 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -550,20 +550,63 @@ zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, /* * Handles TX_RENAME_WHITEOUT transactions. + * + * Unfortunately we cannot reuse do_zfs_log_rename because we we need to call + * zfs_mknode() on replay which requires stashing bits as with TX_CREATE. */ void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, - znode_t *szp) + znode_t *szp, znode_t *wzp) { + itx_t *itx; spa_t *spa = zilog->zl_spa; + lr_rename_whiteout_t *lr; + size_t snamesize = strlen(sname) + 1; + size_t dnamesize = strlen(dname) + 1; /* zfs_rename must have already activated this feature. */ VERIFY(spa_feature_is_enabled(spa, SPA_FEATURE_RENAME_WHITEOUT)); VERIFY(spa_feature_is_active(spa, SPA_FEATURE_RENAME_WHITEOUT)); + if (zil_replaying(zilog, tx)) + return; + txtype |= TX_RENAME_WHITEOUT; - do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); + itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); + lr = (lr_rename_whiteout_t *)&itx->itx_lr; + lr->lr_rename.lr_sdoid = sdzp->z_id; + lr->lr_rename.lr_tdoid = tdzp->z_id; + + /* + * RENAME_WHITEOUT will create an entry at the source znode, so we need + * to store the same data that the equivalent call to zfs_log_create() + * would. + */ + lr->lr_wfoid = wzp->z_id; + LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT); + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen, + sizeof (uint64_t)); + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)), + lr->lr_wcrtime, sizeof (uint64_t) * 2); + lr->lr_wmode = wzp->z_mode; + lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp)); + lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp)); + + /* + * This rdev will always be makdevice(0, 0) but because the ZIL log and + * replay code needs to be platform independent (and there is no + * platform independent makdev()) we need to copy the one created + * during the rename operation. + */ + (void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev, + sizeof (lr->lr_wrdev)); + + memcpy((char *)(lr + 1), sname, snamesize); + memcpy((char *)(lr + 1) + snamesize, dname, dnamesize); + itx->itx_oid = szp->z_id; + + zil_itx_assign(zilog, itx, tx); } /* diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 59c76a2ee94b..eed310415a32 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -639,29 +639,25 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap) } static int -do_zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap, - uint64_t rflags) +do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname, + char *tname, uint64_t rflags, vattr_t *wo_vap) { - zfsvfs_t *zfsvfs = arg1; - lr_rename_t *lr = arg2; - char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ - char *tname = sname + strlen(sname) + 1; znode_t *sdzp, *tdzp; int error, vflg = 0; - if (byteswap) - byteswap_uint64_array(lr, sizeof (*lr)); - /* Only Linux currently supports RENAME_* flags. */ #ifdef __linux__ VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT)); - IMPLY(rflags & RENAME_EXCHANGE, + VERIFY_IMPLY(rflags & RENAME_EXCHANGE, spa_feature_is_active(zfsvfs->z_os->os_spa, SPA_FEATURE_RENAME_EXCHANGE)); - IMPLY(rflags & RENAME_WHITEOUT, + VERIFY_IMPLY(rflags & RENAME_WHITEOUT, spa_feature_is_active(zfsvfs->z_os->os_spa, SPA_FEATURE_RENAME_WHITEOUT)); + + /* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */ + VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL); #else VERIFY0(rflags); #endif @@ -677,7 +673,8 @@ do_zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap, if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; - error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags); + error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags, + wo_vap); zrele(tdzp); zrele(sdzp); @@ -687,15 +684,37 @@ do_zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap, static int zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap) { - return (do_zfs_replay_rename(arg1, arg2, byteswap, 0)); + zfsvfs_t *zfsvfs = arg1; + lr_rename_t *lr = arg2; + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL)); } static int zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap) { #ifdef __linux__ - return (do_zfs_replay_rename(arg1, arg2, byteswap, RENAME_EXCHANGE)); + zfsvfs_t *zfsvfs = arg1; + lr_rename_t *lr = arg2; + char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ + char *tname = sname + strlen(sname) + 1; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE, + NULL)); #else + /* + * We should never reach this point because the feature is not + * supported by non-Linux versions of OpenZFS. + */ + PANIC("TX_RENAME_EXCHANGE cannot be replayed on non-Linux systems."); return (SET_ERROR(ENOTSUP)); #endif } @@ -704,8 +723,50 @@ static int zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) { #ifdef __linux__ - return (do_zfs_replay_rename(arg1, arg2, byteswap, RENAME_WHITEOUT)); + zfsvfs_t *zfsvfs = arg1; + lr_rename_whiteout_t *lr = arg2; + int error; + /* sname and tname follow lr_rename_whiteout_t */ + char *sname = (char *)(lr + 1); + char *tname = sname + strlen(sname) + 1; + /* For the whiteout file. */ + xvattr_t xva; + uint64_t objid; + uint64_t dnodesize; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + objid = LR_FOID_GET_OBJ(lr->lr_wfoid); + dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT; + + xva_init(&xva); + zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, + lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid); + + /* + * As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which + * assigns the object's creation time, generation number, and dnode + * slot count. The generic zfs_rename() has no concept of these + * attributes, so we smuggle the values inside the vattr's otherwise + * unused va_ctime, va_nblocks, and va_fsid fields. + */ + ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime); + xva.xva_vattr.va_nblocks = lr->lr_wgen; + xva.xva_vattr.va_fsid = dnodesize; + + error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT); + if (error) + return (error); + + return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname, + RENAME_WHITEOUT, &xva.xva_vattr)); #else + /* + * We should never reach this point because the feature is not + * supported by non-Linux versions of OpenZFS. + */ + PANIC("TX_RENAME_WHITEOUT cannot be replayed on non-Linux systems."); return (SET_ERROR(ENOTSUP)); #endif } diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 652a2ea5cd89..a9d166ae5069 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -3055,7 +3055,7 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid) * can be imported onto other systems after a clean 'zfs export'). */ static void -zil_sync_deactivate_features(zilog_t *zilog, dmu_tx_t *tx) +zil_sync_deactivate_features(zilog_t *zilog, dmu_tx_t *tx, boolean_t keep_first) { dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); @@ -3067,9 +3067,17 @@ zil_sync_deactivate_features(zilog_t *zilog, dmu_tx_t *tx) spa_feature_t tx_features[] = { SPA_FEATURE_RENAME_EXCHANGE, SPA_FEATURE_RENAME_WHITEOUT, + /* + * ZILSAXATTR is unconditionally activated for every ZIL, so if + * we can only deactivate the feature if we're not going to + * reuse this ZIL. + */ + keep_first ? SPA_FEATURE_NONE : SPA_FEATURE_ZILSAXATTR, }; for (int i = 0; i < ARRAY_SIZE(tx_features); i++) { spa_feature_t f = tx_features[i]; + if (f == SPA_FEATURE_NONE) + continue; if (dsl_dataset_feature_is_active(ds, f)) dsl_dataset_deactivate_feature(ds, f, tx); } @@ -3123,9 +3131,9 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) */ zil_init_log_chain(zilog, &blk); zh->zh_log = blk; - } else { - zil_sync_deactivate_features(zilog, tx); } + + zil_sync_deactivate_features(zilog, tx, zilog->zl_keep_first); } while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { From 6ac99af002e98a21a453990a76a7aaab0449b2bd Mon Sep 17 00:00:00 2001 From: Ryan Moeller Date: Mon, 17 Oct 2022 13:20:03 +0000 Subject: [PATCH 8/8] Revert feature flags for rename2 This means in cases where a pool is not cleanly exported you may end up with loss of unflushed TX_RENAME_EXCHANGE and TX_RENAME_WHITEOUT transactions and all transactions that follow them in the ZIL for that dataset if the pool is then imported by a release build of ZFS that does not recognize the new TX types. A warning message is logged to dmesg and the pool is fully usable, just missing the last changes. Debug builds will panic when they fail to replay the full log. It's also possible a different new TX type might land upstream before this, in which case similar results are likely to be encountered. The alternative is a pool that cannot be imported on any version of ZFS lacking support for TX_RENAME_EXCHANGE and TX_RENAME_WHITEOUT replay when uncleanly exported. That prevents people from either upgrading their pool for the ability to use overlayfs features or going back to a previous boot environment after having upgraded the pool. Signed-off-by: Ryan Moeller --- include/zfeature_common.h | 2 - module/os/linux/zfs/zfs_vnops_os.c | 94 ------------- module/zcommon/zfeature_common.c | 25 ---- module/zfs/zfs_log.c | 12 -- module/zfs/zfs_replay.c | 17 --- module/zfs/zil.c | 41 ------ .../cli_root/zpool_get/zpool_get.cfg | 7 - .../renameat2/renameat2_exchange.ksh | 10 -- .../renameat2/renameat2_noreplace.ksh | 7 - .../renameat2/renameat2_whiteout.ksh | 9 -- .../tests/functional/slog/slog_017_neg.ksh | 128 ------------------ .../functional/slog/slog_replay_fs_001.ksh | 39 ------ 12 files changed, 391 deletions(-) delete mode 100755 tests/zfs-tests/tests/functional/slog/slog_017_neg.ksh diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 85a55710a41e..76dd7ed57478 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -75,8 +75,6 @@ typedef enum spa_feature { SPA_FEATURE_DEVICE_REBUILD, SPA_FEATURE_ZSTD_COMPRESS, SPA_FEATURE_DRAID, - SPA_FEATURE_RENAME_EXCHANGE, - SPA_FEATURE_RENAME_WHITEOUT, SPA_FEATURES } spa_feature_t; diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 40b6ac3ec966..6aa655ccfd28 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -68,7 +68,6 @@ #include #include #include -#include #include /* @@ -2597,70 +2596,6 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr) return (err); } -typedef struct zfs_activate_feature_arg { - const char *name; - spa_feature_t feature; -} zfs_activate_feature_arg; - -static int -zfs_activate_feature_check(void *arg, dmu_tx_t *tx) -{ - zfs_activate_feature_arg *zlefa = arg; - spa_feature_t f = zlefa->feature; - - dsl_pool_t *dp = dmu_tx_pool(tx); - - if (f == SPA_FEATURE_NONE) - return (SET_ERROR(EINVAL)); - - if (!spa_feature_is_enabled(dp->dp_spa, f)) - return (SET_ERROR(ENOTSUP)); - - return (0); -} - -static void -zfs_activate_feature_sync(void *arg, dmu_tx_t *tx) -{ - zfs_activate_feature_arg *zlefa = arg; - spa_feature_t f = zlefa->feature; - - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dataset_t *ds = NULL; - - ASSERT3S(spa_feature_table[f].fi_type, ==, ZFEATURE_TYPE_BOOLEAN); - VERIFY0(dsl_dataset_hold(dp, zlefa->name, FTAG, &ds)); - if (dsl_dataset_feature_is_active(ds, f) != B_TRUE) { - ds->ds_feature_activation[f] = (void *)B_TRUE; - dsl_dataset_activate_feature(ds->ds_object, f, - ds->ds_feature_activation[f], tx); - ds->ds_feature[f] = ds->ds_feature_activation[f]; - } - dsl_dataset_rele(ds, FTAG); -} - -static int -zfs_activate_feature(spa_t *spa, spa_feature_t feature) -{ - zfs_activate_feature_arg zlefa = { - .name = spa_name(spa), - .feature = feature, - }; - - ASSERT(spa_feature_is_enabled(spa, feature)); - if (spa_feature_is_active(spa, feature)) - return (0); - - /* - * We need to wait for the activation to sync out before we continue - * with the rename operation, so that when we create the ZIL entry the - * dataset already has the feature activated. - */ - return (dsl_sync_task(spa_name(spa), zfs_activate_feature_check, - zfs_activate_feature_sync, &zlefa, 0, - ZFS_SPACE_CHECK_EXTRA_RESERVED)); -} - typedef struct zfs_zlock { krwlock_t *zl_rwlock; /* lock we acquired */ znode_t *zl_znode; /* znode we held */ @@ -2787,7 +2722,6 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, zfs_dirlock_t *sdl, *tdl; dmu_tx_t *tx; zfs_zlock_t *zl; - spa_feature_t zil_feature = SPA_FEATURE_NONE; int cmp, serr, terr; int error = 0; int zflg = 0; @@ -2824,21 +2758,6 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, ZFS_VERIFY_ZP(tdzp); zilog = zfsvfs->z_log; - switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) { - case RENAME_EXCHANGE: - zil_feature = SPA_FEATURE_RENAME_EXCHANGE; - break; - case RENAME_WHITEOUT: - zil_feature = SPA_FEATURE_RENAME_WHITEOUT; - break; - } - - if (zil_feature != SPA_FEATURE_NONE && - !spa_feature_is_enabled(zfsvfs->z_os->os_spa, zil_feature)) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - /* * We check i_sb because snapshots and the ctldir must have different * super blocks. @@ -3079,19 +2998,6 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm, } } - /* - * Before the dmu_tx is created, activate the RENAME_* feature flags. - * We want to do this as late as possible, to avoid activating the - * feature if the operation is likely to fail, but we have to do it - * before dmu_tx_assign() because we need the feature flag activation - * to sync out before we create the ZIL entry. - */ - if (zil_feature != SPA_FEATURE_NONE) { - error = zfs_activate_feature(zfsvfs->z_os->os_spa, zil_feature); - if (error) - goto out; - } - tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index ad759c033b8b..fc0e09605eef 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -598,31 +598,6 @@ zpool_feature_init(void) zfeature_register(SPA_FEATURE_DRAID, "org.openzfs:draid", "draid", "Support for distributed spare RAID", ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL); - -#ifdef __linux__ - { - static const spa_feature_t rename_exchange_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_RENAME_EXCHANGE, - "org.openzfs:rename_exchange", "rename_exchange", - "Support for renameat2(RENAME_EXCHANGE).", - ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET, - ZFEATURE_TYPE_BOOLEAN, rename_exchange_deps); - } - { - static const spa_feature_t rename_whiteout_deps[] = { - SPA_FEATURE_EXTENSIBLE_DATASET, - SPA_FEATURE_NONE - }; - zfeature_register(SPA_FEATURE_RENAME_WHITEOUT, - "org.openzfs:rename_whiteout", "rename_whiteout", - "Support for renameat2(RENAME_WHITEOUT).", - ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET, - ZFEATURE_TYPE_BOOLEAN, rename_whiteout_deps); - } -#endif } #if defined(_KERNEL) diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 053376b04a24..edc67e07878c 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -45,7 +45,6 @@ #include #include #include -#include /* * These zfs_log_* functions must be called within a dmu tx, in one @@ -538,12 +537,6 @@ zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname, znode_t *szp) { - spa_t *spa = zilog->zl_spa; - - /* zfs_rename must have already activated this feature. */ - VERIFY(spa_feature_is_enabled(spa, SPA_FEATURE_RENAME_EXCHANGE)); - VERIFY(spa_feature_is_active(spa, SPA_FEATURE_RENAME_EXCHANGE)); - txtype |= TX_RENAME_EXCHANGE; do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp); } @@ -560,15 +553,10 @@ zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *szp, znode_t *wzp) { itx_t *itx; - spa_t *spa = zilog->zl_spa; lr_rename_whiteout_t *lr; size_t snamesize = strlen(sname) + 1; size_t dnamesize = strlen(dname) + 1; - /* zfs_rename must have already activated this feature. */ - VERIFY(spa_feature_is_enabled(spa, SPA_FEATURE_RENAME_WHITEOUT)); - VERIFY(spa_feature_is_active(spa, SPA_FEATURE_RENAME_WHITEOUT)); - if (zil_replaying(zilog, tx)) return; diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index eed310415a32..6ff67d672f27 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -649,13 +649,6 @@ do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname, #ifdef __linux__ VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT)); - VERIFY_IMPLY(rflags & RENAME_EXCHANGE, - spa_feature_is_active(zfsvfs->z_os->os_spa, - SPA_FEATURE_RENAME_EXCHANGE)); - VERIFY_IMPLY(rflags & RENAME_WHITEOUT, - spa_feature_is_active(zfsvfs->z_os->os_spa, - SPA_FEATURE_RENAME_WHITEOUT)); - /* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */ VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL); #else @@ -710,11 +703,6 @@ zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap) return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE, NULL)); #else - /* - * We should never reach this point because the feature is not - * supported by non-Linux versions of OpenZFS. - */ - PANIC("TX_RENAME_EXCHANGE cannot be replayed on non-Linux systems."); return (SET_ERROR(ENOTSUP)); #endif } @@ -762,11 +750,6 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap) return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname, RENAME_WHITEOUT, &xva.xva_vattr)); #else - /* - * We should never reach this point because the feature is not - * supported by non-Linux versions of OpenZFS. - */ - PANIC("TX_RENAME_WHITEOUT cannot be replayed on non-Linux systems."); return (SET_ERROR(ENOTSUP)); #endif } diff --git a/module/zfs/zil.c b/module/zfs/zil.c index a9d166ae5069..b9f177daee53 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -3044,45 +3044,6 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid) zil_free_commit_waiter(zcw); } -/* - * Called as part of zil_sync once the replay succeeded successfully. - * - * For backwards-compatibility reasons, brand new TX_* records need to have a - * feature flag associated with them so that a system will not be confused when - * importing a pool with unknown TX_* types. These features are only activated - * while the ZIL contains a log entry containing the new TX_* type, and need to - * be deactivated when the ZIL has been applied and cleared (so that the pool - * can be imported onto other systems after a clean 'zfs export'). - */ -static void -zil_sync_deactivate_features(zilog_t *zilog, dmu_tx_t *tx, boolean_t keep_first) -{ - dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); - - /* - * A destroyed ZIL chain can't contain any of the TX_* records which - * are gated by these features. So, deactivate the features. They'll be - * re-activated if the feature is needed again. - */ - spa_feature_t tx_features[] = { - SPA_FEATURE_RENAME_EXCHANGE, - SPA_FEATURE_RENAME_WHITEOUT, - /* - * ZILSAXATTR is unconditionally activated for every ZIL, so if - * we can only deactivate the feature if we're not going to - * reuse this ZIL. - */ - keep_first ? SPA_FEATURE_NONE : SPA_FEATURE_ZILSAXATTR, - }; - for (int i = 0; i < ARRAY_SIZE(tx_features); i++) { - spa_feature_t f = tx_features[i]; - if (f == SPA_FEATURE_NONE) - continue; - if (dsl_dataset_feature_is_active(ds, f)) - dsl_dataset_deactivate_feature(ds, f, tx); - } -} - /* * Called in syncing context to free committed log blocks and update log header. */ @@ -3132,8 +3093,6 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx) zil_init_log_chain(zilog, &blk); zh->zh_log = blk; } - - zil_sync_deactivate_features(zilog, tx, zilog->zl_keep_first); } while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index 0e1572f55909..6075e1f1abbd 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -104,10 +104,3 @@ if ! is_freebsd; then "feature@edonr" ) fi - -if is_linux; then - properties+=( - "feature@rename_exchange" - "feature@rename_whiteout" - ) -fi diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh index 4547494b0cd1..94e56231feb1 100755 --- a/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_exchange.ksh @@ -37,8 +37,6 @@ function cleanup log_assert "ZFS supports RENAME_EXCHANGE." log_onexit cleanup -check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "enabled" - cd $TESTDIR echo "foo" > foo echo "bar" > bar @@ -49,23 +47,15 @@ log_must grep '^foo$' foo # Basic exchange. log_must renameat2 -x foo bar -check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "active" log_must grep '^bar$' foo log_must grep '^foo$' bar # And exchange back. log_must renameat2 -x foo bar -check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "active" log_must grep '^foo$' foo log_must grep '^bar$' bar # Exchange with a bad path should fail. log_mustnot renameat2 -x bar baz -# The feature flag must remain active until a clean export. -check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "active" -log_must zpool export "$TESTPOOL1" -log_must zpool import "$TESTPOOL1" -check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "enabled" - log_pass "ZFS supports RENAME_EXCHANGE as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh index 272cdd99d841..d75b94fab465 100755 --- a/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_noreplace.ksh @@ -37,9 +37,6 @@ function cleanup log_assert "ZFS supports RENAME_NOREPLACE." log_onexit cleanup -check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "enabled" -check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "enabled" - cd $TESTDIR touch foo bar @@ -51,8 +48,4 @@ log_mustnot renameat2 -n bar foo # Regular renames should succeed. log_must renameat2 -n bar baz -# The other rename feature flags should not be active. -check_feature_flag "feature@rename_exchange" "$TESTPOOL1" "enabled" -check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "enabled" - log_pass "ZFS supports RENAME_NOREPLACE as expected." diff --git a/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh b/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh index c2bf59fbf772..8ecb074dbbdb 100755 --- a/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh +++ b/tests/zfs-tests/tests/functional/renameat2/renameat2_whiteout.ksh @@ -37,23 +37,14 @@ function cleanup log_assert "ZFS supports RENAME_WHITEOUT." log_onexit cleanup -check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "enabled" - cd $TESTDIR echo "whiteout" > whiteout # Straight-forward rename-with-whiteout. log_must renameat2 -w whiteout new -check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "active" # Check new file. log_must grep '^whiteout$' new # Check that the whiteout is actually a {0,0} char device. log_must grep '^character special file:0:0$' <<<"$(stat -c '%F:%t:%T' whiteout)" -# The feature flag must remain active until a clean export. -check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "active" -log_must zpool export "$TESTPOOL1" -log_must zpool import "$TESTPOOL1" -check_feature_flag "feature@rename_whiteout" "$TESTPOOL1" "enabled" - log_pass "ZFS supports RENAME_WHITEOUT as expected." diff --git a/tests/zfs-tests/tests/functional/slog/slog_017_neg.ksh b/tests/zfs-tests/tests/functional/slog/slog_017_neg.ksh deleted file mode 100755 index a951b1e734c4..000000000000 --- a/tests/zfs-tests/tests/functional/slog/slog_017_neg.ksh +++ /dev/null @@ -1,128 +0,0 @@ -#!/bin/ksh -p -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# - -# -# Copyright (C) 2022 Aleksa Sarai -# Copyright (C) 2022 SUSE LLC -# - -. $STF_SUITE/tests/functional/slog/slog.kshlib - -# -# DESCRIPTION: -# Verify that features activated after "zpool freeze" are not actually -# activated. This is actually an unwanted feature of spa_freeze() and -# resulted in certain workarounds for feature@rename_{exchange,whiteout} in -# slog_replay_fs_001. The purpose of this test is to document the behaviour -# so that if it is ever fixed we can remove the slog_replay_fs_001 -# workaround. -# -# This test is based on slog_replay_fs_001. -# -# STRATEGY: -# 1. Create an empty file system (TESTFS) -# 2. Enable the rename_* features. -# 3. Freeze TESTFS -# 4. Trigger the activation of the rename_* features. -# 5. Verify that the features are not active but the ZIL contains new -# TX_RENAME_* entries. -# -# Critically we must not cause the ZIL to be replayed because that will cause -# an assertion to fail (because the feature is not activated and there is an -# assertion verifying this on zfs_replay). -# - -verify_runnable "global" - -log_assert "Activation of features during spa_freeze silently fails." -log_onexit cleanup -log_must setup - -if ! is_linux ; then - log_unsupported "renameat2 is linux-only" -elif ! renameat2 -C ; then - log_unsupported "renameat2 not supported on this (pre-3.15) linux kernel" -fi - -# -# 1. Create an empty file system (TESTFS) -# -log_must zpool create $TESTPOOL $VDEV log mirror $LDEV -log_must zfs set compression=on $TESTPOOL -log_must zfs create $TESTPOOL/$TESTFS - -# -# 2. Enable the rename_* features. -# -# RENAME_EXCHANGE -zfs set "feature@rename_exchange=enabled" "$TESTPOOL" -# RENAME_WHITEOUT -zfs set "feature@rename_whiteout=enabled" "$TESTPOOL" - -# Make sure the features are *enabled*, not active. -zpool sync "$TESTPOOL" -check_feature_flag "feature@rename_exchange" "$TESTPOOL" "enabled" -check_feature_flag "feature@rename_whiteout" "$TESTPOOL" "enabled" - -# -# This dd command works around an issue where ZIL records aren't created -# after freezing the pool unless a ZIL header already exists. Create a file -# synchronously to force ZFS to write one out. -# -log_must dd if=/dev/zero of=/$TESTPOOL/$TESTFS/sync \ - conv=fdatasync,fsync bs=1 count=1 - -# -# 3. Freeze TESTFS -# -log_must zpool freeze $TESTPOOL - -# -# 4. Trigger the activation of the rename_* features. -# - -# TX_RENAME_EXCHANGE -log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-a bs=1k count=1 -log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-b bs=1k count=1 -log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-c bs=1k count=1 -log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/xchg-d bs=1k count=1 -# rotate the files around -log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{a,b} -log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{b,c} -log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{c,a} -# exchange same path -log_must renameat2 -x /$TESTPOOL/$TESTFS/xchg-{d,d} - -# TX_RENAME_WHITEOUT -log_must mkfile 1k /$TESTPOOL/$TESTFS/whiteout -log_must renameat2 -w /$TESTPOOL/$TESTFS/whiteout{,-moved} - -# -# 5. Verify that the features are not active but the ZIL contains new -# TX_RENAME_* entries. -# -check_feature_flag "feature@rename_exchange" "$TESTPOOL" "enabled" -check_feature_flag "feature@rename_whiteout" "$TESTPOOL" "enabled" -log_must eval "zdb -iv $TESTPOOL/$TESTFS | grep 'TX_RENAME_EXCHANGE'" -log_must eval "zdb -iv $TESTPOOL/$TESTFS | grep 'TX_RENAME_WHITEOUT'" - -log_pass "Activation of features during spa_freeze silently fails." diff --git a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh index f5aa08d0b1a2..0a6da23db2f1 100755 --- a/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh +++ b/tests/zfs-tests/tests/functional/slog/slog_replay_fs_001.ksh @@ -69,45 +69,6 @@ log_must zpool create $TESTPOOL $VDEV log mirror $LDEV log_must zfs set compression=on $TESTPOOL log_must zfs create $TESTPOOL/$TESTFS -# -# Enable the rename_* features on Linux. These features are activated during -# zfs_rename() and thus by all rights should not be done before we freeze the -# pool, but due to how 'zpool freeze' is implemented, if the features are -# activated after the pool is freezed, the activations are not written to the -# pool. This results in assertion errors in test environments during ZIL replay -# (in real ZFS deployments this scenario doesn't happen). In order to make -# these features testable, activate the features before we freeze the pool. -# -# See slog_017_neg for what happens if we don't. -# -if ! is_linux ; then - log_note "renameat2 is linux-only" -elif ! renameat2 -C ; then - log_note "renameat2 not supported on this (pre-3.15) linux kernel" -else - # RENAME_EXCHANGE - zfs set "feature@rename_exchange=enabled" "$TESTPOOL" - check_feature_flag "feature@rename_exchange" "$TESTPOOL" "enabled" - log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/preflight-xchg-a bs=1k count=1 - log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/preflight-xchg-b bs=1k count=1 - log_must renameat2 -x /$TESTPOOL/$TESTFS/preflight-xchg-{a,b} - - # RENAME_WHITEOUT - zfs set "feature@rename_whiteout=enabled" "$TESTPOOL" - check_feature_flag "feature@rename_whiteout" "$TESTPOOL" "enabled" - log_must mkfile 1k /$TESTPOOL/$TESTFS/preflight-whiteout - log_must renameat2 -w /$TESTPOOL/$TESTFS/preflight-whiteout{,-moved} - - # Make sure the features are active. - zpool sync "$TESTPOOL" - check_feature_flag "feature@rename_exchange" "$TESTPOOL" "active" - check_feature_flag "feature@rename_whiteout" "$TESTPOOL" "active" - - # Clean up since these aren't used for the actual ZIL test. - rm /$TESTPOOL/$TESTFS/preflight-* -fi - - # # This dd command works around an issue where ZIL records aren't created # after freezing the pool unless a ZIL header already exists. Create a file