Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zstd early abort #13244

Merged
merged 1 commit into from
May 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/sys/zstd/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ typedef struct zfs_zstd_meta {
* kstat helper macros
*/
#define ZSTDSTAT(stat) (zstd_stats.stat.value.ui64)
#define ZSTDSTAT_ZERO(stat) \
(atomic_store_64(&zstd_stats.stat.value.ui64, 0))
#define ZSTDSTAT_ADD(stat, val) \
atomic_add_64(&zstd_stats.stat.value.ui64, (val))
#define ZSTDSTAT_SUB(stat, val) \
Expand All @@ -90,6 +92,8 @@ void zstd_fini(void);

size_t zfs_zstd_compress(void *s_start, void *d_start, size_t s_len,
size_t d_len, int level);
size_t zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len,
size_t d_len, int level);
int zfs_zstd_get_level(void *s_start, size_t s_len, uint8_t *level);
int zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
size_t d_len, uint8_t *level);
Expand Down
8 changes: 8 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -2129,6 +2129,14 @@ However, if there are fewer than
metaslabs in the vdev, this functionality is disabled.
This ensures that we don't set aside an unreasonable amount of space for the ZIL.
.
.It Sy zfs_zstd_earlyabort_pass Ns = Ns Sy 1 Pq int
Whether heuristic for detection of incompressible data with zstd levels >= 3
using LZ4 and zstd-1 passes is enabled.
.
.It Sy zfs_zstd_abort_size Ns = Ns Sy 131072 Pq int
Minimal uncompressed size (inclusive) of a record before the early abort
heuristic will be attempted.
.
.It Sy zio_deadman_log_all Ns = Ns Sy 0 Ns | Ns 1 Pq int
If non-zero, the zio deadman will produce debugging messages
.Pq see Sy zfs_dbgmsg_enable
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/zio_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
{"gzip-9", 9, gzip_compress, gzip_decompress, NULL},
{"zle", 64, zle_compress, zle_decompress, NULL},
{"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs, NULL},
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress,
{"zstd", ZIO_ZSTD_LEVEL_DEFAULT, zfs_zstd_compress_wrap,
zfs_zstd_decompress, zfs_zstd_decompress_level},
};

Expand Down
126 changes: 121 additions & 5 deletions module/zstd/zfs_zstd.c

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The man page correctly notes the earlyabort heuristic runs at zstd levels >= 3 by default but the code comment for zfs_zstd_compress_wrap instead says it does not run at level 3 and is confusing
<= should be <

* - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently 128k), don't try any of this, just go. (because experimentally that was a reasonable cutoff for a perf win with tiny ratio change)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, just something I missed because I was dithering over which version to use. Feel free to open a tiny PR changing it, or I'll try to remember when I get a chance later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep. in code documentation is wrong. the code itself starts heuristic at => level 3

Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@
#include "lib/zstd.h"
#include "lib/common/zstd_errors.h"

static int zstd_earlyabort_pass = 1;
static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3;
static unsigned int zstd_abort_size = (128 * 1024);

static kstat_t *zstd_ksp = NULL;

typedef struct zstd_stats {
Expand All @@ -62,6 +66,21 @@ typedef struct zstd_stats {
kstat_named_t zstd_stat_dec_header_inval;
kstat_named_t zstd_stat_com_fail;
kstat_named_t zstd_stat_dec_fail;
/*
* LZ4 first-pass early abort verdict
*/
kstat_named_t zstd_stat_lz4pass_allowed;
kstat_named_t zstd_stat_lz4pass_rejected;
/*
* zstd-1 second-pass early abort verdict
*/
kstat_named_t zstd_stat_zstdpass_allowed;
kstat_named_t zstd_stat_zstdpass_rejected;
/*
* We excluded this from early abort for some reason
*/
kstat_named_t zstd_stat_passignored;
kstat_named_t zstd_stat_passignored_size;
kstat_named_t zstd_stat_buffers;
kstat_named_t zstd_stat_size;
} zstd_stats_t;
Expand All @@ -76,10 +95,44 @@ static zstd_stats_t zstd_stats = {
{ "decompress_header_invalid", KSTAT_DATA_UINT64 },
{ "compress_failed", KSTAT_DATA_UINT64 },
{ "decompress_failed", KSTAT_DATA_UINT64 },
{ "lz4pass_allowed", KSTAT_DATA_UINT64 },
{ "lz4pass_rejected", KSTAT_DATA_UINT64 },
{ "zstdpass_allowed", KSTAT_DATA_UINT64 },
{ "zstdpass_rejected", KSTAT_DATA_UINT64 },
{ "passignored", KSTAT_DATA_UINT64 },
{ "passignored_size", KSTAT_DATA_UINT64 },
{ "buffers", KSTAT_DATA_UINT64 },
{ "size", KSTAT_DATA_UINT64 },
};

#ifdef _KERNEL
static int
kstat_zstd_update(kstat_t *ksp, int rw)
{
ASSERT(ksp != NULL);

if (rw == KSTAT_WRITE && ksp == zstd_ksp) {
ZSTDSTAT_ZERO(zstd_stat_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_alloc_fallback);
ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail);
ZSTDSTAT_ZERO(zstd_stat_com_inval);
ZSTDSTAT_ZERO(zstd_stat_dec_inval);
ZSTDSTAT_ZERO(zstd_stat_dec_header_inval);
ZSTDSTAT_ZERO(zstd_stat_com_fail);
ZSTDSTAT_ZERO(zstd_stat_dec_fail);
ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed);
ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected);
ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed);
ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected);
ZSTDSTAT_ZERO(zstd_stat_passignored);
ZSTDSTAT_ZERO(zstd_stat_passignored_size);
}

return (0);
}
#endif

/* Enums describing the allocator type specified by kmem_type in zstd_kmem */
enum zstd_kmem_type {
ZSTD_KMEM_UNKNOWN = 0,
Expand Down Expand Up @@ -377,6 +430,64 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
}


size_t
zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len,
int level)
{
int16_t zstd_level;
if (zstd_enum_to_level(level, &zstd_level)) {
ZSTDSTAT_BUMP(zstd_stat_com_inval);
return (s_len);
}
/*
* A zstd early abort heuristic.
*
* - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently
* 128k), don't try any of this, just go.
* (because experimentally that was a reasonable cutoff for a perf win
* with tiny ratio change)
* - First, we try LZ4 compression, and if it doesn't early abort, we
* jump directly to whatever compression level we intended to try.
* - Second, we try zstd-1 - if that errors out (usually, but not
* exclusively, if it would overflow), we give up early.
*
* If it works, instead we go on and compress anyway.
*
* Why two passes? LZ4 alone gets you a lot of the way, but on highly
* compressible data, it was losing up to 8.5% of the compressed
* savings versus no early abort, and all the zstd-fast levels are
* worse indications on their own than LZ4, and don't improve the LZ4
* pass noticably if stacked like this.
*/
size_t actual_abort_size = zstd_abort_size;
if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level &&
s_len >= actual_abort_size) {
int pass_len = 1;
pass_len = lz4_compress_zfs(s_start, d_start, s_len, d_len, 0);
if (pass_len < d_len) {
ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed);
goto keep_trying;
}
ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected);

pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len,
ZIO_ZSTD_LEVEL_1);
if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) {
ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected);
return (s_len);
}
ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed);
} else {
ZSTDSTAT_BUMP(zstd_stat_passignored);
if (s_len < actual_abort_size) {
ZSTDSTAT_BUMP(zstd_stat_passignored_size);
}
}
keep_trying:
return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level));

}

/* Compress block using zstd */
size_t
zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
Expand Down Expand Up @@ -437,8 +548,10 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
* too small, that is not a failure. Everything else is a
* failure, so increment the compression failure counter.
*/
if (ZSTD_getErrorCode(c_len) != ZSTD_error_dstSize_tooSmall) {
int err = ZSTD_getErrorCode(c_len);
if (err != ZSTD_error_dstSize_tooSmall) {
ZSTDSTAT_BUMP(zstd_stat_com_fail);
dprintf("Error: %s", ZSTD_getErrorString(err));
}
return (s_len);
}
Expand Down Expand Up @@ -753,6 +866,9 @@ zstd_init(void)
if (zstd_ksp != NULL) {
zstd_ksp->ks_data = &zstd_stats;
kstat_install(zstd_ksp);
#ifdef _KERNEL
zstd_ksp->ks_update = kstat_zstd_update;
#endif
}

return (0);
Expand Down Expand Up @@ -781,8 +897,8 @@ module_init(zstd_init);
module_exit(zstd_fini);
#endif

EXPORT_SYMBOL(zfs_zstd_compress);
EXPORT_SYMBOL(zfs_zstd_decompress_level);
EXPORT_SYMBOL(zfs_zstd_decompress);
EXPORT_SYMBOL(zfs_zstd_cache_reap_now);
ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, INT, ZMOD_RW,
"Enable early abort attempts when using zstd");
ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW,
"Minimal size of block to attempt early abort");
#endif