Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ZFS fix to make xattr=sa logging to ZIL on create/remove/update. #9078

Merged
merged 1 commit into from
Feb 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions cmd/zdb/zdb_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,29 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, const void *arg)
}
}

static void
zil_prt_rec_setsaxattr(zilog_t *zilog, int txtype, const void *arg)
{
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
(void) zilog, (void) txtype;
const lr_setsaxattr_t *lr = arg;

char *name = (char *)(lr + 1);
(void) printf("%sfoid %llu\n", tab_prefix,
(u_longlong_t)lr->lr_foid);

(void) printf("%sXAT_NAME %s\n", tab_prefix, name);
if (lr->lr_size == 0) {
(void) printf("%sXAT_VALUE NULL\n", tab_prefix);
} else {
(void) printf("%sXAT_VALUE ", tab_prefix);
char *val = name + (strlen(name) + 1);
for (int i = 0; i < lr->lr_size; i++) {
(void) printf("%c", *val);
val++;
}
}
}

static void
zil_prt_rec_acl(zilog_t *zilog, int txtype, const void *arg)
{
Expand Down Expand Up @@ -304,6 +327,8 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
{.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ATTR "},
{.zri_print = zil_prt_rec_create, .zri_name = "TX_MKDIR_ACL_ATTR "},
{.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "},
{.zri_print = zil_prt_rec_setsaxattr,
.zri_name = "TX_SETSAXATTR "},
};

static int
Expand Down
1 change: 1 addition & 0 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2386,6 +2386,7 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* TX_MKDIR_ATTR */
NULL, /* TX_MKDIR_ACL_ATTR */
NULL, /* TX_WRITE2 */
NULL, /* TX_SETSAXATTR */
};

/*
Expand Down
2 changes: 1 addition & 1 deletion include/sys/zfs_sa.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
int zfs_sa_get_xattr(struct znode *);
int zfs_sa_set_xattr(struct znode *);
int zfs_sa_set_xattr(struct znode *, const char *, const void *, size_t);
void zfs_sa_upgrade(struct sa_handle *, dmu_tx_t *);
void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
void zfs_sa_init(void);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
extern void zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, const char *name, const void *value, size_t size);

extern void zfs_znode_update_vfs(struct znode *);

Expand Down
13 changes: 11 additions & 2 deletions include/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ typedef enum zil_create {
#define TX_MKDIR_ATTR 18 /* mkdir with attr */
#define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */
#define TX_WRITE2 20 /* dmu_sync EALREADY write */
#define TX_MAX_TYPE 21 /* Max transaction type */
#define TX_SETSAXATTR 21 /* Set sa xattrs on file */
#define TX_MAX_TYPE 22 /* Max transaction type */

/*
* The transactions for mkdir, symlink, remove, rmdir, link, and rename
Expand All @@ -182,7 +183,8 @@ typedef enum zil_create {
(txtype) == TX_SETATTR || \
(txtype) == TX_ACL_V0 || \
(txtype) == TX_ACL || \
(txtype) == TX_WRITE2)
(txtype) == TX_WRITE2 || \
(txtype) == TX_SETSAXATTR)

/*
* The number of dnode slots consumed by the object is stored in the 8
Expand Down Expand Up @@ -335,6 +337,13 @@ typedef struct {
/* optional attribute lr_attr_t may be here */
} lr_setattr_t;

typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to change attributes */
uint64_t lr_size;
/* xattr name and value follows */
} lr_setsaxattr_t;

typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* obj id of file */
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ typedef enum spa_feature {
SPA_FEATURE_DEVICE_REBUILD,
SPA_FEATURE_ZSTD_COMPRESS,
SPA_FEATURE_DRAID,
SPA_FEATURE_ZILSAXATTR,
SPA_FEATURES
} spa_feature_t;

Expand Down
9 changes: 5 additions & 4 deletions lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@
<elf-symbol name='fletcher_4_superscalar4_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='fletcher_4_superscalar_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='1904' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='1960' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand Down Expand Up @@ -1854,8 +1854,8 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='../../module/zcommon/zfeature_common.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='15232' id='d96379d0'>
<subrange length='34' type-id='7359adad' id='6a6a7e00'/>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='15680' id='d96379d0'>
<subrange length='35' type-id='7359adad' id='6a6a7e00'/>
</array-type-def>
<enum-decl name='spa_feature' id='33ecb627'>
<underlying-type type-id='9cac1fee'/>
Expand Down Expand Up @@ -1894,7 +1894,8 @@
<enumerator name='SPA_FEATURE_DEVICE_REBUILD' value='31'/>
<enumerator name='SPA_FEATURE_ZSTD_COMPRESS' value='32'/>
<enumerator name='SPA_FEATURE_DRAID' value='33'/>
<enumerator name='SPA_FEATURES' value='34'/>
<enumerator name='SPA_FEATURE_ZILSAXATTR' value='34'/>
<enumerator name='SPA_FEATURES' value='35'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<enum-decl name='zfeature_flags' id='6db816a4'>
Expand Down
10 changes: 10 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -2098,6 +2098,16 @@ Limit SLOG write size per commit executed with synchronous priority.
Any writes above that will be executed with lower (asynchronous) priority
to limit potential SLOG device abuse by single active ZIL writer.
.
.It Sy zfs_zil_saxattr Ns = Ns Sy 1 Ns | Ns 0 Pq int
Setting this tunable to zero disables ZIL logging of new
.Sy xattr Ns = Ns Sy sa
records if the
.Sy org.openzfs:zilsaxattr
feature is enabled on the pool.
This would only be necessary to work around bugs in the ZIL logging or replay
code for this record type.
The tunable has no effect if the feature is disabled.
.
.It Sy zfs_embedded_slog_min_ms Ns = Ns Sy 64 Pq int
Usually, one metaslab from each normal-class vdev is dedicated for use by
the ZIL to log synchronous writes.
Expand Down
18 changes: 18 additions & 0 deletions man/man7/zpool-features.7
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,24 @@ by user and group.
\*[instant-never]
\*[remount-upgrade]
.
.feature org.openzfs zilsaxattr yes extensible_dataset
This feature enables
.Sy xattr Ns = Ns Sy sa
extended attribute logging in the ZIL.
jsai20 marked this conversation as resolved.
Show resolved Hide resolved
If enabled, extended attribute changes
.Pq both Sy xattrdir Ns = Ns Sy dir No and Sy xattr Ns = Ns Sy sa
are guaranteed to be durable if either the dataset had
.Sy sync Ns = Ns Sy always
set at the time the changes were made, or
.Xr sync 2
is called on the dataset after the changes were made.
.Pp
This feature becomes
.Sy active
when a ZIL is created for at least one dataset and will be returned to the
.Sy enabled
state when it is destroyed for all datasets that use this feature.
.
.feature com.delphix zpool_checkpoint yes
This feature enables the
.Nm zpool Cm checkpoint
Expand Down
6 changes: 3 additions & 3 deletions module/os/freebsd/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -5549,7 +5549,7 @@ zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
if (error != 0)
error = SET_ERROR(error);
else
error = zfs_sa_set_xattr(zp);
error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
if (error != 0) {
zp->z_xattr_cached = NULL;
nvlist_free(nvl);
Expand Down Expand Up @@ -5706,9 +5706,9 @@ zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
if (error != 0)
error = SET_ERROR(error);
}
kmem_free(buf, entry_size);
if (error == 0)
error = zfs_sa_set_xattr(zp);
error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
jsai20 marked this conversation as resolved.
Show resolved Hide resolved
kmem_free(buf, entry_size);
if (error != 0) {
zp->z_xattr_cached = NULL;
nvlist_free(nvl);
Expand Down
2 changes: 1 addition & 1 deletion module/os/linux/zfs/zpl_xattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
* will be reconstructed from the ARC when next accessed.
*/
if (error == 0)
error = -zfs_sa_set_xattr(zp);
error = -zfs_sa_set_xattr(zp, name, value, size);
jsai20 marked this conversation as resolved.
Show resolved Hide resolved

if (error) {
nvlist_free(nvl);
Expand Down
12 changes: 12 additions & 0 deletions module/zcommon/zfeature_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,18 @@ zpool_feature_init(void)
"org.openzfs:draid", "draid", "Support for distributed spare RAID",
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);

{
static const spa_feature_t zilsaxattr_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
jsai20 marked this conversation as resolved.
Show resolved Hide resolved
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_ZILSAXATTR,
"org.openzfs:zilsaxattr", "zilsaxattr",
"Support for xattr=sa extended attribute logging in ZIL.",
ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT,
ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
}

zfs_mod_list_supported_free(sfeatures);
}

Expand Down
34 changes: 34 additions & 0 deletions module/zfs/zfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,40 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
zil_itx_assign(zilog, itx, tx);
}

/*
* Handles TX_SETSAXATTR transactions.
*/
void
zfs_log_setsaxattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, const char *name, const void *value, size_t size)
{
itx_t *itx;
lr_setsaxattr_t *lr;
size_t recsize = sizeof (lr_setsaxattr_t);
void *xattrstart;
int namelen;

if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;

namelen = strlen(name) + 1;
recsize += (namelen + size);
itx = zil_itx_create(txtype, recsize);
lr = (lr_setsaxattr_t *)&itx->itx_lr;
lr->lr_foid = zp->z_id;
xattrstart = (char *)(lr + 1);
bcopy(name, xattrstart, namelen);
if (value != NULL) {
bcopy(value, (char *)xattrstart + namelen, size);
lr->lr_size = size;
} else {
lr->lr_size = 0;
}

itx->itx_sync = (zp->z_sync_cnt != 0);
zil_itx_assign(zilog, itx, tx);
}

/*
* Handles TX_ACL transactions.
*/
Expand Down
83 changes: 83 additions & 0 deletions module/zfs/zfs_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#include <sys/atomic.h>
#include <sys/cred.h>
#include <sys/zpl.h>
#include <sys/dmu_objset.h>
#include <sys/zfeature.h>

/*
* NB: FreeBSD expects to be able to do vnode locking in lookup and
Expand Down Expand Up @@ -868,6 +870,86 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
return (error);
}

static int
zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap)
{
zfsvfs_t *zfsvfs = arg1;
lr_setsaxattr_t *lr = arg2;
znode_t *zp;
nvlist_t *nvl;
size_t sa_size;
char *name;
char *value;
size_t size;
int error = 0;

ASSERT(spa_feature_is_active(zfsvfs->z_os->os_spa,
SPA_FEATURE_ZILSAXATTR));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));

if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);

rw_enter(&zp->z_xattr_lock, RW_WRITER);
mutex_enter(&zp->z_lock);
if (zp->z_xattr_cached == NULL)
error = zfs_sa_get_xattr(zp);
mutex_exit(&zp->z_lock);

if (error)
goto out;

ASSERT(zp->z_xattr_cached);
nvl = zp->z_xattr_cached;

/* Get xattr name, value and size from log record */
size = lr->lr_size;
name = (char *)(lr + 1);
if (size == 0) {
value = NULL;
error = nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
} else {
value = name + strlen(name) + 1;
/* Limited to 32k to keep nvpair memory allocations small */
if (size > DXATTR_MAX_ENTRY_SIZE) {
jsai20 marked this conversation as resolved.
Show resolved Hide resolved
error = SET_ERROR(EFBIG);
goto out;
}

/* Prevent the DXATTR SA from consuming the entire SA region */
error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
if (error)
goto out;

if (sa_size > DXATTR_MAX_SA_SIZE) {
error = SET_ERROR(EFBIG);
goto out;
}

error = nvlist_add_byte_array(nvl, name, (uchar_t *)value,
size);
}

/*
* Update the SA for additions, modifications, and removals. On
* error drop the inconsistent cached version of the nvlist, it
* will be reconstructed from the ARC when next accessed.
*/
if (error == 0)
error = zfs_sa_set_xattr(zp, name, value, size);
jsai20 marked this conversation as resolved.
Show resolved Hide resolved

if (error) {
nvlist_free(nvl);
zp->z_xattr_cached = NULL;
}

out:
rw_exit(&zp->z_xattr_lock);
zrele(zp);
return (error);
}

static int
zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap)
{
Expand Down Expand Up @@ -989,4 +1071,5 @@ zil_replay_func_t *const zfs_replay_vector[TX_MAX_TYPE] = {
zfs_replay_create, /* TX_MKDIR_ATTR */
zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */
zfs_replay_write2, /* TX_WRITE2 */
zfs_replay_setsaxattr, /* TX_SETSAXATTR */
};
Loading