Skip to content

Commit

Permalink
ZFS fix to make xattr=sa logging to ZIL on xattr create/remove/update.
Browse files Browse the repository at this point in the history
As such, there are no specific synchronous symentics defined for
the xattrs. But for xattr=on, it does log to ZIL and zil_commit() is
done, if sync=always is set on dataset. This provides sync symentics
for xattr=on with sync=always set on dataset.
For the xattr=sa implementation, it doesn't log to ZIL, so, even with
sync=always, xattrs are not guranteed to be synced before xattr call
returns to caller. so xattr can be lost if system crash happens, before
txg carring xattr transaction is synced.

This change makes xattr=sa logging to ZIL on xattr create/remove/update
and xattrs are synced to ZIL (zil_commit() done) for sync=always.
This make xattr=sa behaviour similar to xattr=on.

This could also provide basic framework to support implementing sync
symentics at file level for xattr=sa.

Signed-off-by: Jitendra Patidar <jitendra.patidar@nutanix.com>
Closes #8768

Change-Id: Ic24306def9515d34ac07fba4cd7d341ddbfc75c3
  • Loading branch information
jsai20 committed Jun 11, 2021
1 parent 860051f commit fbc2d93
Show file tree
Hide file tree
Showing 19 changed files with 361 additions and 14 deletions.
27 changes: 27 additions & 0 deletions cmd/zdb/zdb_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,32 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, const void *arg)
}
}

/* ARGSUSED */
static void
zil_prt_rec_setsaxattr(zilog_t *zilog, int txtype, const void *arg)
{
const lr_setsaxattr_t *lr = arg;
char *name;
char *val;
int i;

name = (char *)(lr + 1);
(void) printf("%sfoid %llu\n", tab_prefix,
(u_longlong_t)lr->lr_foid);

(void) printf("%sXAT_NAME %s\n", tab_prefix, name);
if (lr->lr_size == 0) {
(void) printf("%sXAT_VALUE NULL\n", tab_prefix);
} else {
printf("%sXAT_VALUE ", tab_prefix);
val = name + (strlen(name) + 1);
for (i = 0; i < lr->lr_size; i++) {
printf("%c", *val);
val++;
}
}
}

/* ARGSUSED */
static void
zil_prt_rec_acl(zilog_t *zilog, int txtype, const void *arg)
Expand Down Expand Up @@ -305,6 +331,7 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
{.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ATTR "},
{.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ACL_ATTR "},
{.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "},
{.zri_print = zil_prt_rec_setsaxattr, .zri_name = "TX_SETSAXATTR "},
};

/* ARGSUSED */
Expand Down
1 change: 1 addition & 0 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2383,6 +2383,7 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* TX_MKDIR_ATTR */
NULL, /* TX_MKDIR_ACL_ATTR */
NULL, /* TX_WRITE2 */
NULL, /* TX_SETSAXATTR */
};

/*
Expand Down
2 changes: 1 addition & 1 deletion include/sys/zfs_sa.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
int zfs_sa_get_xattr(struct znode *);
int zfs_sa_set_xattr(struct znode *);
int zfs_sa_set_xattr(struct znode *, const char *, const void *, size_t);
void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *);
void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
void zfs_sa_init(void);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern void zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, const char *name, const void *value, size_t size);

extern void zfs_znode_update_vfs(struct znode *);

Expand Down
13 changes: 11 additions & 2 deletions include/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ typedef enum zil_create {
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
#define TX_WRITE2 20 /* dmu_sync EALREADY write */
#define TX_MAX_TYPE 21 /* Max transaction type */
#define TX_SETSAXATTR 21 /* Set sa xattrs on file */
#define TX_MAX_TYPE 22 /* Max transaction type */

/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
Expand All @@ -182,7 +183,8 @@ typedef enum zil_create {
(txtype) == TX_SETATTR || \
(txtype) == TX_ACL_V0 || \
(txtype) == TX_ACL || \
(txtype) == TX_WRITE2)
(txtype) == TX_WRITE2 || \
(txtype) == TX_SETSAXATTR)

/*
* The number of dnode slots consumed by the object is stored in the 8
Expand Down Expand Up @@ -335,6 +337,13 @@ typedef struct {
/* optional attribute lr_attr_t may be here */
} lr_setattr_t;

typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to change attributes */
uint64_t lr_size;
/* xattr name and value follows */
} lr_setsaxattr_t;

typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* obj id of file */
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ typedef enum spa_feature {
SPA_FEATURE_DEVICE_REBUILD,
SPA_FEATURE_ZSTD_COMPRESS,
SPA_FEATURE_DRAID,
SPA_FEATURE_ZILSAXATTR,
SPA_FEATURES
} spa_feature_t;

Expand Down
9 changes: 5 additions & 4 deletions lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@
<elf-symbol name='fletcher_4_superscalar4_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='fletcher_4_superscalar_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='1904' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='1960' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand Down Expand Up @@ -4396,7 +4396,8 @@
<enumerator name='SPA_FEATURE_DEVICE_REBUILD' value='31'/>
<enumerator name='SPA_FEATURE_ZSTD_COMPRESS' value='32'/>
<enumerator name='SPA_FEATURE_DRAID' value='33'/>
<enumerator name='SPA_FEATURES' value='34'/>
<enumerator name='SPA_FEATURE_ZILSAXATTR' value='34'/>
<enumerator name='SPA_FEATURES' value='35'/>
</enum-decl>
<pointer-type-def type-id='type-id-328' size-in-bits='64' id='type-id-329'/>
<function-decl name='zfeature_lookup_name' visibility='default' binding='global' size-in-bits='64'>
Expand Down Expand Up @@ -6712,8 +6713,8 @@
<pointer-type-def type-id='type-id-463' size-in-bits='64' id='type-id-460'/>
<typedef-decl name='zfeature_info_t' type-id='type-id-456' id='type-id-464'/>

<array-type-def dimensions='1' type-id='type-id-464' size-in-bits='15232' id='type-id-465'>
<subrange length='34' type-id='type-id-33' id='type-id-395'/>
<array-type-def dimensions='1' type-id='type-id-464' size-in-bits='15680' id='type-id-465'>
<subrange length='35' type-id='type-id-33' id='type-id-395'/>

</array-type-def>
<var-decl name='spa_feature_table' type-id='type-id-465' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
Expand Down
4 changes: 4 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -2073,6 +2073,10 @@ Limit SLOG write size per commit executed with synchronous priority.
Any writes above that will be executed with lower (asynchronous) priority
to limit potential SLOG device abuse by single active ZIL writer.
.
.It Sy zfs_zil_saxattr_enabled Ns = Ns Sy 1 Pq int
Enable xattr=sa extended attribute logging in zil.
Setting value to 0 disables xattr=sa logging in zil.
.
.It Sy zfs_embedded_slog_min_ms Ns = Ns Sy 64 Pq int
Usually, one metaslab from each normal-class vdev is dedicated for use by
the ZIL to log synchronous writes.
Expand Down
4 changes: 2 additions & 2 deletions module/os/freebsd/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -5488,7 +5488,7 @@ zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
nvl = zp->z_xattr_cached;
error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
if (error == 0)
error = zfs_sa_set_xattr(zp);
error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
if (error != 0) {
zp->z_xattr_cached = NULL;
nvlist_free(nvl);
Expand Down Expand Up @@ -5626,7 +5626,7 @@ zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
kmem_free(buf, entry_size);
if (error == 0)
error = zfs_sa_set_xattr(zp);
error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
if (error != 0) {
zp->z_xattr_cached = NULL;
nvlist_free(nvl);
Expand Down
2 changes: 1 addition & 1 deletion module/os/linux/zfs/zpl_xattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
* will be reconstructed from the ARC when next accessed.
*/
if (error == 0)
error = -zfs_sa_set_xattr(zp);
error = -zfs_sa_set_xattr(zp, name, value, size);

if (error) {
nvlist_free(nvl);
Expand Down
12 changes: 12 additions & 0 deletions module/zcommon/zfeature_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,18 @@ zpool_feature_init(void)
zfeature_register(SPA_FEATURE_DRAID,
"org.openzfs:draid", "draid", "Support for distributed spare RAID",
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL);

{
static const spa_feature_t zilsaxattr_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_ZILSAXATTR,
"org.zfsonlinux:zilsaxattr", "zilsaxattr",
"Zil sa xattr.",
ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE,
ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps);
}
}

#if defined(_KERNEL)
Expand Down
34 changes: 34 additions & 0 deletions module/zfs/zfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,40 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
zil_itx_assign(zilog, itx, tx);
}

/*
* Handles TX_SETSAXATTR transactions.
*/
void
zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, const char *name, const void *value, size_t size)
{
itx_t *itx;
lr_setsaxattr_t *lr;
size_t recsize = sizeof (lr_setsaxattr_t);
void *xattrstart;
int namelen;

if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;

namelen = strlen(name) + 1;
recsize += (namelen + size);
itx = zil_itx_create(txtype, recsize);
lr = (lr_setsaxattr_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
xattrstart = (char *)(lr + 1);
bcopy(name, xattrstart, namelen);
if (value != NULL) {
bcopy(value, (char *)xattrstart + namelen, size);
lr->lr_size = size;
} else {
lr->lr_size = 0;
}

itx->itx_sync = (zp->z_sync_cnt != 0);
zil_itx_assign(zilog, itx, tx);
}

/*
* Handles TX_ACL transactions.
*/
Expand Down
87 changes: 87 additions & 0 deletions module/zfs/zfs_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
#include <sys/atomic.h>
#include <sys/cred.h>
#include <sys/zpl.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/zfeature.h>

/*
* NB: FreeBSD expects to be able to do vnode locking in lookup and
Expand Down Expand Up @@ -868,6 +871,89 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
return (error);
}

static int
zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap)
{
zfsvfs_t *zfsvfs = arg1;
lr_setsaxattr_t *lr = arg2;
znode_t *zp;
nvlist_t *nvl;
size_t sa_size;
char *name;
char *value;
size_t size;
int error = 0;

ASSERT(spa_feature_is_active(zfsvfs->z_os->os_spa,
SPA_FEATURE_ZILSAXATTR));
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
}

if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);

rw_enter(&zp->z_xattr_lock, RW_WRITER);
mutex_enter(&zp->z_lock);
if (zp->z_xattr_cached == NULL)
error = -zfs_sa_get_xattr(zp);
mutex_exit(&zp->z_lock);

if (error)
goto out;

ASSERT(zp->z_xattr_cached);
nvl = zp->z_xattr_cached;

/* Get xattr name, value and size from log record */
size = lr->lr_size;
name = (char *)(lr + 1);
if (size == 0) {
value = NULL;
error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
} else {
value = name + strlen(name) + 1;
/* Limited to 32k to keep nvpair memory allocations small */
if (size > DXATTR_MAX_ENTRY_SIZE) {
error = (-EFBIG);
goto out;
}

/* Prevent the DXATTR SA from consuming the entire SA region */
error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
if (error)
goto out;

if (sa_size > DXATTR_MAX_SA_SIZE) {
error = (-EFBIG);
goto out;
}

error = -nvlist_add_byte_array(nvl, name, (uchar_t *)value,
size);
}

/*
* Update the SA for additions, modifications, and removals. On
* error drop the inconsistent cached version of the nvlist, it
* will be reconstructed from the ARC when next accessed.
*/
if (error == 0)
error = -zfs_sa_set_xattr(zp, name, value, size);

if (error) {
nvlist_free(nvl);
zp->z_xattr_cached = NULL;
}

ASSERT3S(error, <=, 0);

out:
rw_exit(&zp->z_xattr_lock);
zrele(zp);
return (error);
}

static int
zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap)
{
Expand Down Expand Up @@ -989,4 +1075,5 @@ zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_create, /* TX_MKDIR_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
zfs_replay_write2, /* TX_WRITE2 */
zfs_replay_setsaxattr, /* TX_SETSAXATTR */
};
Loading

0 comments on commit fbc2d93

Please sign in to comment.