Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 'zfs rollback' on mounted file systems #1214

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/linux/vfs_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ bdi_setup_and_register(struct backing_dev_info *bdi,char *name,unsigned int cap)
}
#endif /* HAVE_BDI && !HAVE_BDI_SETUP_AND_REGISTER */

/*
* 2.6.38 API change,
* LOOKUP_RCU flag introduced to distinguish rcu-walk from ref-walk cases.
*/
#ifndef LOOKUP_RCU
#define LOOKUP_RCU 0x0
#endif /* LOOKUP_RCU */

/*
* 3.2-rc1 API change,
* Add set_nlink() if it is not exported by the Linux kernel.
Expand Down
1 change: 1 addition & 0 deletions include/sys/zfs_vfsops.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ typedef struct zfs_sb {
krwlock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all znodes in the fs */
uint64_t z_nr_znodes; /* number of znodes in the fs */
unsigned long z_rollback_time;/* last online rollback time */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
struct inode *z_ctldir; /* .zfs directory inode */
avl_tree_t z_ctldir_snaps; /* .zfs/snapshot entries */
Expand Down
1 change: 1 addition & 0 deletions include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ typedef struct znode {
boolean_t z_is_zvol; /* are we used by the zvol */
boolean_t z_is_mapped; /* are we mmap'ed */
boolean_t z_is_ctldir; /* are we .zfs entry */
boolean_t z_is_stale; /* are we stale due to rollback? */
struct inode z_inode; /* generic vfs inode */
} znode_t;

Expand Down
4 changes: 3 additions & 1 deletion include/sys/zpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,20 @@
#include <sys/vfs.h>
#include <linux/vfs_compat.h>
#include <linux/xattr_compat.h>
#include <linux/dcache_compat.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/falloc.h>

/* zpl_inode.c */
extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
struct dentry *dentry, zpl_umode_t mode, cred_t *cr);
zpl_umode_t mode, cred_t *cr);

extern const struct inode_operations zpl_inode_operations;
extern const struct inode_operations zpl_dir_inode_operations;
extern const struct inode_operations zpl_symlink_inode_operations;
extern const struct inode_operations zpl_special_inode_operations;
extern dentry_operations_t zpl_dentry_operations;

/* zpl_file.c */
extern ssize_t zpl_read_common(struct inode *ip, const char *buf,
Expand Down
1 change: 1 addition & 0 deletions module/zfs/zfs_ctldir.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_TRUE;
zp->z_is_sa = B_FALSE;
zp->z_is_stale = B_FALSE;
ip->i_ino = id;
ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
ip->i_uid = 0;
Expand Down
44 changes: 26 additions & 18 deletions module/zfs/zfs_vfsops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,7 @@ EXPORT_SYMBOL(zfs_sb_prune);
#endif /* HAVE_SHRINK */

/*
* Teardown the zfs_sb_t::z_os.
* Teardown the zfs_sb_t.
*
* Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
* and 'z_teardown_inactive_lock' held.
Expand All @@ -1053,7 +1053,6 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
* for non-snapshots.
*/
shrink_dcache_sb(zsb->z_parent->z_sb);
(void) spl_invalidate_inodes(zsb->z_parent->z_sb, 0);
}

/*
Expand All @@ -1079,25 +1078,26 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
}

/*
* At this point there are no vops active, and any new vops will
* fail with EIO since we have z_teardown_lock for writer (only
* relavent for forced unmount).
* At this point there are no VFS ops active, and any new VFS ops
* will fail with EIO since we have z_teardown_lock for writer (only
* relevant for forced unmount)..
*
* Release all holds on dbufs.
*/
mutex_enter(&zsb->z_znodes_lock);
for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
zp = list_next(&zsb->z_all_znodes, zp))
zp = list_next(&zsb->z_all_znodes, zp)) {
if (zp->z_sa_hdl) {
ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0);
zfs_znode_dmu_fini(zp);
}
}
mutex_exit(&zsb->z_znodes_lock);

/*
* If we are unmounting, set the unmounted flag and let new vops
* If we are unmounting, set the unmounted flag and let new VFS ops
* unblock. zfs_inactive will have the unmounted behavior, and all
* other vops will fail with EIO.
* other VFS ops will fail with EIO.
*/
if (unmounting) {
zsb->z_unmounted = B_TRUE;
Expand Down Expand Up @@ -1392,7 +1392,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
EXPORT_SYMBOL(zfs_vget);

/*
* Block out VOPs and close zfs_sb_t::z_os
* Block out VFS ops and close zfs_sb_t
*
* Note, if successful, then we return with the 'z_teardown_lock' and
* 'z_teardown_inactive_lock' write held.
Expand All @@ -1404,14 +1404,15 @@ zfs_suspend_fs(zfs_sb_t *zsb)

if ((error = zfs_sb_teardown(zsb, B_FALSE)) != 0)
return (error);

dmu_objset_disown(zsb->z_os, zsb);

return (0);
}
EXPORT_SYMBOL(zfs_suspend_fs);

/*
* Reopen zfs_sb_t::z_os and release VOPs.
* Reopen zfs_sb_t and release VFS ops.
*/
int
zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
Expand Down Expand Up @@ -1440,30 +1441,37 @@ zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
goto bail;

VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
zsb->z_rollback_time = jiffies;

/*
* Attempt to re-establish all the active znodes with
* their dbufs. If a zfs_rezget() fails, then we'll let
* any potential callers discover that via ZFS_ENTER_VERIFY_VP
* when they try to use their znode.
* Attempt to re-establish all the active inodes with their
* dbufs. If a zfs_rezget() fails, then we unhash the inode
* and mark it stale. This prevents a collision if a new
* inode/object is created which must use the same inode
* number. The stale inode will be be released when the
* VFS prunes the dentry holding the remaining references
* on the stale inode.
*/
mutex_enter(&zsb->z_znodes_lock);
for (zp = list_head(&zsb->z_all_znodes); zp;
zp = list_next(&zsb->z_all_znodes, zp)) {
(void) zfs_rezget(zp);
err2 = zfs_rezget(zp);
if (err2) {
remove_inode_hash(ZTOI(zp));
zp->z_is_stale = B_TRUE;
}
}
mutex_exit(&zsb->z_znodes_lock);

}

bail:
/* release the VOPs */
/* release the VFS ops */
rw_exit(&zsb->z_teardown_inactive_lock);
rrw_exit(&zsb->z_teardown_lock, FTAG);

if (err) {
/*
* Since we couldn't reopen zfs_sb_t::z_os, force
* Since we couldn't reopen zfs_sb_t, force
* unmount this file system.
*/
(void) zfs_umount(zsb->z_sb);
Expand Down
46 changes: 33 additions & 13 deletions module/zfs/zfs_znode.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,10 @@ zfs_inode_destroy(struct inode *ip)
zfsctl_inode_destroy(ip);

mutex_enter(&zsb->z_znodes_lock);
list_remove(&zsb->z_all_znodes, zp);
zsb->z_nr_znodes--;
if (list_link_active(&zp->z_link_node)) {
list_remove(&zsb->z_all_znodes, zp);
zsb->z_nr_znodes--;
}
mutex_exit(&zsb->z_znodes_lock);

if (zp->z_acl_cached) {
Expand Down Expand Up @@ -348,7 +350,7 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
static znode_t *
zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
struct dentry *dentry, struct inode *dip)
struct inode *dip)
{
znode_t *zp;
struct inode *ip;
Expand Down Expand Up @@ -379,6 +381,7 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
zp->z_is_zvol = B_FALSE;
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_FALSE;
zp->z_is_stale = B_FALSE;

zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);

Expand Down Expand Up @@ -414,11 +417,15 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
zfs_inode_update(zp);
zfs_inode_set_ops(zsb, ip);

if (insert_inode_locked(ip))
goto error;

if (dentry)
d_instantiate(dentry, ip);
/*
* The only way insert_inode_locked() can fail is if the ip->i_ino
* number is already hashed for this super block. This can never
* happen because the inode numbers map 1:1 with the object numbers.
*
* The one exception is rolling back a mounted file system, but in
* this case all the active inode are unhashed during the rollback.
*/
VERIFY3S(insert_inode_locked(ip), ==, 0);

mutex_enter(&zsb->z_znodes_lock);
list_insert_tail(&zsb->z_all_znodes, zp);
Expand Down Expand Up @@ -720,9 +727,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,

if (!(flag & IS_ROOT_NODE)) {
*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
vap->va_dentry, ZTOI(dzp));
ASSERT(*zpp != NULL);
ASSERT(dzp != NULL);
ZTOI(dzp));
VERIFY(*zpp != NULL);
VERIFY(dzp != NULL);
} else {
/*
* If we are creating the root node, the "parent" we
Expand Down Expand Up @@ -931,7 +938,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
* bonus buffer.
*/
zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
doi.doi_bonus_type, obj_num, NULL, NULL, NULL);
doi.doi_bonus_type, obj_num, NULL, NULL);
if (zp == NULL) {
err = ENOENT;
} else {
Expand Down Expand Up @@ -961,8 +968,20 @@ zfs_rezget(znode_t *zp)
zfs_acl_free(zp->z_acl_cached);
zp->z_acl_cached = NULL;
}

mutex_exit(&zp->z_acl_lock);

rw_enter(&zp->z_xattr_lock, RW_WRITER);
if (zp->z_xattr_cached) {
nvlist_free(zp->z_xattr_cached);
zp->z_xattr_cached = NULL;
}

if (zp->z_xattr_parent) {
iput(ZTOI(zp->z_xattr_parent));
zp->z_xattr_parent = NULL;
}
rw_exit(&zp->z_xattr_lock);

ASSERT(zp->z_sa_hdl == NULL);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
Expand Down Expand Up @@ -1016,6 +1035,7 @@ zfs_rezget(znode_t *zp)

zp->z_unlinked = (zp->z_links == 0);
zp->z_blksz = doi.doi_data_block_size;
zfs_inode_update(zp);

ZFS_OBJ_HOLD_EXIT(zsb, obj_num);

Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zpl_ctldir.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)

crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
zpl_vap_init(vap, dip, dentry, mode | S_IFDIR, cr);
zpl_vap_init(vap, dip, mode | S_IFDIR, cr);

error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
if (error == 0) {
Expand Down
Loading