Skip to content

Commit

Permalink
ZFS fix to make xattr=sa syncing to ZIL on create/remove/update.
Browse files Browse the repository at this point in the history
In current implementation of xattr=sa, it doesn't sync to ZIL before
returning on xattr create/remove/update, so if node crash happens
before xattr gets synced to storage, then corresponding xattr are lost.

This diff makes xattr=sa syncing to ZIL on create/remove/update, so
that xattr's are not lost on node crash.

Signed-off-by: Jitendra Patidar <jitendra.patidar@nutanix.com>
Closes #8768
  • Loading branch information
jsai20 committed Aug 7, 2019
1 parent c81f179 commit 5423ad1
Show file tree
Hide file tree
Showing 15 changed files with 338 additions and 8 deletions.
27 changes: 27 additions & 0 deletions cmd/zdb/zdb_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,32 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, void *arg)
}
}

/* ARGSUSED */
static void
zil_prt_rec_setsaxattr(zilog_t *zilog, int txtype, void *arg)
{
lr_setsaxattr_t *lr = arg;
char *name;
char *val;
int i;

name = (char *)(lr + 1);
(void) printf("%sfoid %llu\n", tab_prefix,
(u_longlong_t)lr->lr_foid);

(void) printf("%sXAT_NAME %s\n", tab_prefix, name);
if (lr->lr_size == 0) {
(void) printf("%sXAT_VALUE NULL\n", tab_prefix);
} else {
printf("%sXAT_VALUE ", tab_prefix);
val = name + (strlen(name) + 1);
for (i = 0; i < lr->lr_size; i++) {
printf("%c", *val);
val++;
}
}
}

/* ARGSUSED */
static void
zil_prt_rec_acl(zilog_t *zilog, int txtype, void *arg)
Expand Down Expand Up @@ -305,6 +331,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
{.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ATTR "},
{.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ACL_ATTR "},
{.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "},
{.zri_print = zil_prt_rec_setsaxattr, .zri_name = "TX_SETSAXATTR "},
};

/* ARGSUSED */
Expand Down
1 change: 1 addition & 0 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2132,6 +2132,7 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* TX_MKDIR_ATTR */
NULL, /* TX_MKDIR_ACL_ATTR */
NULL, /* TX_WRITE2 */
NULL, /* TX_SETSAXATTR */
};

/*
Expand Down
2 changes: 1 addition & 1 deletion include/sys/zfs_sa.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
int zfs_sa_get_xattr(struct znode *);
int zfs_sa_set_xattr(struct znode *);
int zfs_sa_set_xattr(struct znode *, const char *, const void *, size_t);
void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *);
void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
void zfs_sa_init(void);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern void zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, const char *name, const void *value, size_t size);

#if defined(HAVE_UIO_RW)
extern caddr_t zfs_map_page(page_t *, enum seg_rw);
Expand Down
13 changes: 11 additions & 2 deletions include/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ typedef enum zil_create {
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
#define TX_WRITE2 20 /* dmu_sync EALREADY write */
#define TX_MAX_TYPE 21 /* Max transaction type */
#define TX_SETSAXATTR 21 /* Set sa xattrs on file */
#define TX_MAX_TYPE 22 /* Max transaction type */

/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
Expand All @@ -182,7 +183,8 @@ typedef enum zil_create {
(txtype) == TX_SETATTR || \
(txtype) == TX_ACL_V0 || \
(txtype) == TX_ACL || \
(txtype) == TX_WRITE2)
(txtype) == TX_WRITE2 || \
(txtype) == TX_SETSAXATTR)

/*
* The number of dnode slots consumed by the object is stored in the 8
Expand Down Expand Up @@ -335,6 +337,13 @@ typedef struct {
/* optional attribute lr_attr_t may be here */
} lr_setattr_t;

typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to change attributes */
uint64_t lr_size;
/* xattr name and value follows */
} lr_setsaxattr_t;

typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* obj id of file */
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ typedef enum spa_feature {
SPA_FEATURE_BOOKMARK_WRITTEN,
SPA_FEATURE_LOG_SPACEMAP,
SPA_FEATURE_LIVELIST,
SPA_FEATURE_ZILSAXATTR,
SPA_FEATURES
} spa_feature_t;

Expand Down
11 changes: 11 additions & 0 deletions module/zcommon/zfeature_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,17 @@ zpool_feature_init(void)
"com.datto:resilver_defer", "resilver_defer",
"Support for defering new resilvers when one is already running.",
ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
{
static const spa_feature_t zilsaxattr_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_ZILSAXATTR,
"org.zfsonlinux:zilsaxattr", "zilsaxattr",
"Zil sa xattr.",
ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET,
ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps);
}
}

#if defined(_KERNEL)
Expand Down
34 changes: 34 additions & 0 deletions module/zfs/zfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,40 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
zil_itx_assign(zilog, itx, tx);
}

/*
* Handles TX_SETSAXATTR transactions.
*/
void
zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, const char *name, const void *value, size_t size)
{
itx_t *itx;
lr_setsaxattr_t *lr;
size_t recsize = sizeof (lr_setsaxattr_t);
void *xattrstart;
int namelen;

if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;

namelen = strlen(name) + 1;
recsize += (namelen + size);
itx = zil_itx_create(txtype, recsize);
lr = (lr_setsaxattr_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
xattrstart = (char *)(lr + 1);
bcopy(name, xattrstart, namelen);
if (value != NULL) {
bcopy(value, xattrstart + namelen, size);
lr->lr_size = size;
} else {
lr->lr_size = 0;
}

itx->itx_sync = (zp->z_sync_cnt != 0);
zil_itx_assign(zilog, itx, tx);
}

/*
* Handles TX_ACL transactions.
*/
Expand Down
87 changes: 87 additions & 0 deletions module/zfs/zfs_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@
#include <sys/atomic.h>
#include <sys/cred.h>
#include <sys/zpl.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/zfeature.h>

/*
* Functions to replay ZFS intent log (ZIL) records
Expand Down Expand Up @@ -860,6 +863,89 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
return (error);
}

static int
zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap)
{
zfsvfs_t *zfsvfs = arg1;
lr_setsaxattr_t *lr = arg2;
znode_t *zp;
nvlist_t *nvl;
size_t sa_size;
const char *name;
const void *value;
size_t size;
int error = 0;

ASSERT(spa_feature_is_enabled(zfsvfs->z_os->os_spa,
SPA_FEATURE_ZILSAXATTR));
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
}

if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);

rw_enter(&zp->z_xattr_lock, RW_WRITER);
mutex_enter(&zp->z_lock);
if (zp->z_xattr_cached == NULL)
error = -zfs_sa_get_xattr(zp);
mutex_exit(&zp->z_lock);

if (error)
goto out;

ASSERT(zp->z_xattr_cached);
nvl = zp->z_xattr_cached;

/* Get xattr name, value and size from log record */
size = lr->lr_size;
name = (char *)(lr + 1);
if (size == 0) {
value = NULL;
error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
} else {
value = name + strlen(name) + 1;
/* Limited to 32k to keep nvpair memory allocations small */
if (size > DXATTR_MAX_ENTRY_SIZE) {
error = (-EFBIG);
goto out;
}

/* Prevent the DXATTR SA from consuming the entire SA region */
error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
if (error)
goto out;

if (sa_size > DXATTR_MAX_SA_SIZE) {
error = (-EFBIG);
goto out;
}

error = -nvlist_add_byte_array(nvl, name, (uchar_t *)value,
size);
}

/*
* Update the SA for additions, modifications, and removals. On
* error drop the inconsistent cached version of the nvlist, it
* will be reconstructed from the ARC when next accessed.
*/
if (error == 0)
error = -zfs_sa_set_xattr(zp, name, value, size);

if (error) {
nvlist_free(nvl);
zp->z_xattr_cached = NULL;
}

ASSERT3S(error, <=, 0);

out:
rw_exit(&zp->z_xattr_lock);
iput(ZTOI(zp));
return (error);
}

static int
zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap)
{
Expand Down Expand Up @@ -981,4 +1067,5 @@ zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_create, /* TX_MKDIR_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
zfs_replay_write2, /* TX_WRITE2 */
zfs_replay_setsaxattr, /* TX_SETSAXATTR */
};
24 changes: 22 additions & 2 deletions module/zfs/zfs_sa.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
#include <sys/sa.h>
#include <sys/zfs_acl.h>
#include <sys/zfs_sa.h>
#include <sys/fs/zfs.h>
#include <sys/dmu_objset.h>
#include <sys/sa_impl.h>
#include <sys/zfeature.h>

/*
* ZPL attribute registration table.
Expand Down Expand Up @@ -69,6 +71,8 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
{NULL, 0, 0, 0}
};

int zfs_zil_saxattr_enabled = 1;

#ifdef _KERNEL
int
zfs_sa_readlink(znode_t *zp, uio_t *uio)
Expand Down Expand Up @@ -218,13 +222,14 @@ zfs_sa_get_xattr(znode_t *zp)
}

int
zfs_sa_set_xattr(znode_t *zp)
zfs_sa_set_xattr(znode_t *zp, const char *name, const void *value, size_t vsize)
{
zfsvfs_t *zfsvfs = ZTOZSB(zp);
zilog_t *zilog;
dmu_tx_t *tx;
char *obj;
size_t size;
int error;
int error, logsaxattr = 0;

ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
ASSERT(zp->z_xattr_cached);
Expand All @@ -243,6 +248,11 @@ zfs_sa_set_xattr(znode_t *zp)
if (error)
goto out_free;

zilog = zfsvfs->z_log;
if (spa_feature_is_enabled(zfsvfs->z_os->os_spa,
SPA_FEATURE_ZILSAXATTR) && zfs_zil_saxattr_enabled) {
logsaxattr = 1;
}
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa_create(tx, size);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
Expand All @@ -255,6 +265,10 @@ zfs_sa_set_xattr(znode_t *zp)
sa_bulk_attr_t bulk[2];
uint64_t ctime[2];

if (logsaxattr) {
zfs_log_setsaxattr(zilog, tx, TX_SETSAXATTR, zp, name,
value, vsize);
}
zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DXATTR(zfsvfs),
NULL, obj, size);
Expand All @@ -263,6 +277,8 @@ zfs_sa_set_xattr(znode_t *zp)
VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));

dmu_tx_commit(tx);
if (logsaxattr && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
}
out_free:
vmem_free(obj, size);
Expand Down Expand Up @@ -432,6 +448,10 @@ zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp)
}
}

module_param(zfs_zil_saxattr_enabled, int, 0644);
MODULE_PARM_DESC(zfs_zil_saxattr_enabled,
"Toggle xattr=sa extended attribute need to be logged in zil. ");

EXPORT_SYMBOL(zfs_attr_table);
EXPORT_SYMBOL(zfs_sa_readlink);
EXPORT_SYMBOL(zfs_sa_symlink);
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zpl_xattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
* will be reconstructed from the ARC when next accessed.
*/
if (error == 0)
error = -zfs_sa_set_xattr(zp);
error = -zfs_sa_set_xattr(zp, name, value, size);

if (error) {
nvlist_free(nvl);
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ tests = ['slog_001_pos', 'slog_002_pos', 'slog_003_pos', 'slog_004_pos',
'slog_005_pos', 'slog_006_pos', 'slog_007_pos', 'slog_008_neg',
'slog_009_neg', 'slog_010_neg', 'slog_011_neg', 'slog_012_neg',
'slog_013_pos', 'slog_014_pos', 'slog_015_neg', 'slog_replay_fs',
'slog_replay_volume']
'slog_replay_volume', 'slog_016_pos']
tags = ['functional', 'slog']

[tests/functional/snapshot]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,6 @@ if is_linux; then
"feature@resilver_defer"
"feature@bookmark_v2"
"feature@livelist"
"feature@zilsaxattr"
)
fi
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/slog/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ dist_pkgdata_SCRIPTS = \
slog_014_pos.ksh \
slog_015_neg.ksh \
slog_replay_fs.ksh \
slog_replay_volume.ksh
slog_replay_volume.ksh \
slog_016_pos.ksh

dist_pkgdata_DATA = \
slog.cfg \
Expand Down
Loading

0 comments on commit 5423ad1

Please sign in to comment.