Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ZStandard Compression #9024

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
85e37b8
Track compression level through dataset, zio, and arc header fields
allanjude Jul 12, 2019
97394cd
ZoL Track compression level through dataset, zio, and arc header fields
c0d3z3r0 Jun 9, 2020
2b0ce55
Activate per-dataset feature flags when the property is set
allanjude Jul 31, 2020
38b07cf
Import unmodified ZStandard v1.4.5
c0d3z3r0 Jun 9, 2020
636c6f4
Add platform and userspace build compatibility headers
allanjude Jul 12, 2019
683f234
Add userspace compatibility defines for kernel code to zfs_context.h
c0d3z3r0 Jun 9, 2020
e48bc2b
Add ZStandard compression support
allanjude Jul 12, 2019
e7fc148
Merge in ZoF improvements to ZSTD for ZFS
c0d3z3r0 Jun 9, 2020
61b377d
Add a build intermediary lib for zstd
c0d3z3r0 Jun 9, 2020
e4a850e
Add kstat counters for ZSTD
allanjude Jun 16, 2020
e84fea6
Formal changes: tests, documentation, copyright
c0d3z3r0 Jun 9, 2020
50295af
zdb: add a new flag (-Z) to inspect ZSTD compression header
allanjude Jun 14, 2020
93b2a4c
[NEEDSREWORD] Make zdb display the compression algorithm and level
allanjude Jul 3, 2020
4541610
zstd: add more tests
allanjude Jul 11, 2020
5c489af
Don't skip encryption stage because of insufficient compression gains
allanjude Aug 8, 2020
a3635ee
Add new tests for encrypted L2ARC w/ and w/o Compressed ARC
allanjude Aug 11, 2020
d095515
Fix L2ARC reads when compressed ARC disabled
allanjude Aug 8, 2020
f2f2f68
Disable an unused function to pass the compile tests
allanjude Sep 14, 2019
50fb4c3
Cleanup
allanjude Aug 13, 2020
d1e89a7
[squash me] Add tests to makefile
allanjude Aug 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ CONTRIBUTORS:
Keith M Wesolowski <wesolows@foobazco.org>
Kevin Tanguy <kevin.tanguy@ovh.net>
KireinaHoro <i@jsteward.moe>
Kjeld Schouten-Lebbing <kjeld@schouten-lebbing.nl>
Kohsuke Kawaguchi <kk@kohsuke.org>
Kyle Blatter <kyleblatter@llnl.gov>
Kyle Fuller <inbox@kylefuller.co.uk>
Expand Down Expand Up @@ -210,6 +211,7 @@ CONTRIBUTORS:
Michael Gebetsroither <michael@mgeb.org>
Michael Kjorling <michael@kjorling.se>
Michael Martin <mgmartin.mgm@gmail.com>
Michael Niewöhner <foss@mniewoehner.de>
Mike Gerdts <mike.gerdts@joyent.com>
Mike Harsch <mike@harschsystems.com>
Mike Leddy <mike.leddy@gmail.com>
Expand Down Expand Up @@ -258,6 +260,7 @@ CONTRIBUTORS:
Saso Kiselkov <saso.kiselkov@nexenta.com>
Scot W. Stevenson <scot.stevenson@gmail.com>
Sean Eric Fagan <sef@ixsystems.com>
Sebastian Gottschall <s.gottschall@dd-wrt.com>
Sen Haerens <sen@senhaerens.be>
Serapheim Dimitropoulos <serapheim@delphix.com>
Seth Forshee <seth.forshee@canonical.com>
Expand Down
4 changes: 3 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,11 @@ commitcheck:

PHONY += cstyle
cstyle:
@find ${top_srcdir} -name build -prune -o -type f -name '*.[hc]' \
@find ${top_srcdir} -name build -prune \
-o -type f -name '*.[hc]' \
! -name 'zfs_config.*' ! -name '*.mod.c' \
! -name 'opt_global.h' ! -name '*_if*.h' \
! -path './module/zstd/lib/*' \
-exec ${top_srcdir}/scripts/cstyle.pl -cpP {} \+

filter_executable = -exec test -x '{}' \; -print
Expand Down
2 changes: 1 addition & 1 deletion cmd/dbufstat/dbufstat.in
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def get_compstring(c):
"ZIO_COMPRESS_GZIP_6", "ZIO_COMPRESS_GZIP_7",
"ZIO_COMPRESS_GZIP_8", "ZIO_COMPRESS_GZIP_9",
"ZIO_COMPRESS_ZLE", "ZIO_COMPRESS_LZ4",
"ZIO_COMPRESS_FUNCTION"]
"ZIO_COMPRESS_ZSTD", "ZIO_COMPRESS_FUNCTION"]

# If "-rr" option is used, don't convert to string representation
if raw > 1:
Expand Down
106 changes: 96 additions & 10 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
* Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
* Copyright (c) 2015, 2017, Intel Corporation.
* Copyright (c) 2020 Datto Inc.
* Copyright (c) 2020, The FreeBSD Foundation [1]
*
* [1] Portions of this software were developed by Allan Jude
* under sponsorship from the FreeBSD Foundation.
*/

#include <stdio.h>
Expand Down Expand Up @@ -71,6 +75,7 @@
#include <sys/dsl_scan.h>
#include <sys/btree.h>
#include <zfs_comutil.h>
#include <sys/zstd/zstd.h>

#include <libnvpair.h>
#include <libzutil.h>
Expand Down Expand Up @@ -834,6 +839,7 @@ usage(void)
"work with dataset)\n");
(void) fprintf(stderr, " -Y attempt all reconstruction "
"combinations for split blocks\n");
(void) fprintf(stderr, " -Z show ZSTD headers \n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
Expand Down Expand Up @@ -2134,6 +2140,65 @@ blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
}

static void
snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
const blkptr_t *bp)
{
abd_t *pabd;
void *buf;
zio_t *zio;
zfs_zstdhdr_t zstd_hdr;
int error;

if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_ZSTD)
return;

if (BP_IS_HOLE(bp))
return;

if (BP_IS_EMBEDDED(bp)) {
buf = malloc(SPA_MAXBLOCKSIZE);
if (buf == NULL) {
(void) fprintf(stderr, "out of memory\n");
exit(1);
}
decode_embedded_bp_compressed(bp, buf);
memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
free(buf);
zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf),
" ZSTD:size=%u:version=%u:level=%u:EMBEDDED",
zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
return;
}

pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
zio = zio_root(spa, NULL, NULL, 0);

/* Decrypt but don't decompress so we can read the compression header */
zio_nowait(zio_read(zio, spa, bp, pabd, BP_GET_PSIZE(bp), NULL, NULL,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW_COMPRESS,
NULL));
error = zio_wait(zio);
if (error) {
(void) fprintf(stderr, "read failed: %d\n", error);
return;
}
buf = abd_borrow_buf_copy(pabd, BP_GET_LSIZE(bp));
memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);

(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf),
" ZSTD:size=%u:version=%u:level=%u:NORMAL",
zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);

abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp));
}

static void
snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
boolean_t bp_freed)
Expand Down Expand Up @@ -2198,7 +2263,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
}

static void
print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
const dnode_phys_t *dnp)
{
char blkbuf[BP_SPRINTF_LEN];
Expand All @@ -2222,6 +2287,8 @@ print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
}

snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE);
if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD)
snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp);
(void) printf("%s\n", blkbuf);
}

Expand All @@ -2234,7 +2301,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
if (bp->blk_birth == 0)
return (0);

print_indirect(bp, zb, dnp);
print_indirect(spa, bp, zb, dnp);

if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
arc_flags_t flags = ARC_FLAG_WAIT;
Expand Down Expand Up @@ -3310,7 +3377,25 @@ dump_object(objset_t *os, uint64_t object, int verbosity,
" (K=%s)", ZDB_CHECKSUM_NAME(doi.doi_checksum));
}

if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
if (doi.doi_compress == ZIO_COMPRESS_INHERIT &&
ZIO_COMPRESS_HASLEVEL(os->os_compress) && verbosity >= 6) {
const char *compname = NULL;
if (zfs_prop_index_to_string(ZFS_PROP_COMPRESSION,
ZIO_COMPRESS_RAW(os->os_compress, os->os_complevel),
&compname) == 0) {
(void) snprintf(aux + strlen(aux),
sizeof (aux) - strlen(aux), " (Z=inherit=%s)",
compname);
} else {
(void) snprintf(aux + strlen(aux),
sizeof (aux) - strlen(aux),
" (Z=inherit=%s-unknown)",
ZDB_COMPRESS_NAME(os->os_compress));
}
} else if (doi.doi_compress == ZIO_COMPRESS_INHERIT && verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
" (Z=inherit=%s)", ZDB_COMPRESS_NAME(os->os_compress));
} else if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
" (Z=%s)", ZDB_COMPRESS_NAME(doi.doi_compress));
}
Expand Down Expand Up @@ -4093,6 +4178,8 @@ dump_l2arc_log_entries(uint64_t log_entries,
(u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop));
(void) printf("|\t\t\t\tcompr: %llu\n",
(u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop));
(void) printf("|\t\t\t\tcomplevel: %llu\n",
(u_longlong_t)(&le[j])->le_complevel);
(void) printf("|\t\t\t\ttype: %llu\n",
(u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop));
(void) printf("|\t\t\t\tprotected: %llu\n",
Expand Down Expand Up @@ -4186,16 +4273,14 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr,
switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) {
case ZIO_COMPRESS_OFF:
break;
case ZIO_COMPRESS_LZ4:
default:
abd = abd_alloc_for_io(asize, B_TRUE);
abd_copy_from_buf_off(abd, &this_lb, 0, asize);
zio_decompress_data(L2BLK_GET_COMPRESS(
(&lbps[0])->lbp_prop), abd, &this_lb,
asize, sizeof (this_lb));
asize, sizeof (this_lb), NULL);
abd_free(abd);
break;
default:
break;
}

if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
Expand Down Expand Up @@ -7684,9 +7769,9 @@ zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize,
VERIFY0(random_get_pseudo_bytes(lbuf2, lsize));

if (zio_decompress_data(*cfuncp, pabd,
lbuf, psize, lsize) == 0 &&
lbuf, psize, lsize, NULL) == 0 &&
zio_decompress_data(*cfuncp, pabd,
lbuf2, psize, lsize) == 0 &&
lbuf2, psize, lsize, NULL) == 0 &&
bcmp(lbuf, lbuf2, lsize) == 0)
break;
}
Expand Down Expand Up @@ -8078,7 +8163,7 @@ main(int argc, char **argv)
zfs_btree_verify_intensity = 3;

while ((c = getopt(argc, argv,
"AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XYy")) != -1) {
"AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XYyZ")) != -1) {
switch (c) {
case 'b':
case 'c':
Expand All @@ -8098,6 +8183,7 @@ main(int argc, char **argv)
case 'S':
case 'u':
case 'y':
case 'Z':
dump_opt[c]++;
dump_all = 0;
break;
Expand Down
3 changes: 2 additions & 1 deletion config/ax_code_coverage.m4
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ CODE_COVERAGE_GENHTML_OPTIONS ?= $(CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT)

# Add any folders you want to ignore here
# Ignore tmp and tests themselves
CODE_COVERAGE_IGNORE_PATTERN ?= "/tmp/*" "*/tests/*"
CODE_COVERAGE_IGNORE_PATTERN ?= "/tmp/*" "*/tests/*"
CODE_COVERAGE_IGNORE_PATTERN += "*/module/zstd/lib/*"

GITIGNOREFILES ?=
GITIGNOREFILES += $(CODE_COVERAGE_OUTPUT_FILE) $(CODE_COVERAGE_OUTPUT_DIRECTORY)
Expand Down
3 changes: 3 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ AC_CONFIG_FILES([
include/sys/fs/Makefile
include/sys/lua/Makefile
include/sys/sysevent/Makefile
include/sys/zstd/Makefile
lib/Makefile
lib/libavl/Makefile
lib/libefi/Makefile
Expand Down Expand Up @@ -163,6 +164,7 @@ AC_CONFIG_FILES([
lib/libzfs_core/Makefile
lib/libzfs_core/libzfs_core.pc
lib/libzpool/Makefile
lib/libzstd/Makefile
lib/libzutil/Makefile
man/Makefile
man/man1/Makefile
Expand All @@ -180,6 +182,7 @@ AC_CONFIG_FILES([
module/unicode/Makefile
module/zcommon/Makefile
module/zfs/Makefile
module/zstd/Makefile
rpm/Makefile
rpm/generic/Makefile
rpm/generic/zfs-dkms.spec
Expand Down
2 changes: 2 additions & 0 deletions cppcheck-suppressions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ uninitvar:module/os/freebsd/zfs/vdev_geom.c
uninitvar:module/os/freebsd/zfs/zfs_vfsops.c
uninitvar:module/os/freebsd/spl/spl_zone.c
uninitvar:lib/libzutil/os/freebsd/zutil_import_os.c
*:module/zstd/lib/zstd.c
*:module/zstd/lib/zstd.h
2 changes: 1 addition & 1 deletion include/sys/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SUBDIRS = fm fs crypto lua sysevent
SUBDIRS = fm fs crypto lua sysevent zstd

COMMON_H = \
abd.h \
Expand Down
12 changes: 8 additions & 4 deletions include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2019, Klara Inc.
*/

#ifndef _SYS_ARC_H
Expand Down Expand Up @@ -251,18 +253,20 @@ void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
int32_t size);
arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
uint64_t psize, uint64_t lsize, enum zio_compress compression_type,
uint8_t complevel);
arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
enum zio_compress compression_type, uint8_t complevel);
uint8_t arc_get_complevel(arc_buf_t *buf);
arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
enum zio_compress compression_type, uint8_t complevel);
arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
enum zio_compress compression_type, uint8_t complevel);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_destroy(arc_buf_t *buf, void *tag);
Expand Down
6 changes: 5 additions & 1 deletion include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,12 +269,13 @@ typedef struct l2arc_log_ent_phys {
*/
uint64_t le_prop;
uint64_t le_daddr; /* buf location on l2dev */
uint64_t le_complevel;
/*
* We pad the size of each entry to a power of 2 so that the size of
* l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT,
* because of the L2ARC_SET_*SIZE macros.
*/
const uint64_t le_pad[3]; /* pad to 64 bytes */
const uint64_t le_pad[2]; /* pad to 64 bytes */
} l2arc_log_ent_phys_t;

#define L2ARC_LOG_BLK_MAX_ENTRIES (1022)
Expand Down Expand Up @@ -460,6 +461,9 @@ struct arc_buf_hdr {
uint64_t b_birth;

arc_buf_contents_t b_type;
uint8_t b_complevel;
uint8_t b_reserved1; /* used for 4 byte alignment */
uint16_t b_reserved2; /* used for 4 byte alignment */
arc_buf_hdr_t *b_hash_next;
arc_flags_t b_flags;

Expand Down
1 change: 1 addition & 0 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ struct objset {
uint64_t os_dnodesize; /* default dnode size for new objects */
enum zio_checksum os_checksum;
enum zio_compress os_compress;
uint8_t os_complevel;
uint8_t os_copies;
enum zio_checksum os_dedup_checksum;
boolean_t os_dedup_verify;
Expand Down
2 changes: 2 additions & 0 deletions include/sys/dsl_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,8 @@ int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
uint64_t quota);
int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
uint64_t reservation);
int dsl_dataset_set_compression(const char *dsname, zprop_source_t source,
uint64_t compression);

boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
uint64_t earlier_txg);
Expand Down
3 changes: 3 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2019, Klara Inc.
*/

#ifndef _SYS_SPA_H
Expand Down Expand Up @@ -120,6 +122,7 @@ struct dsl_crypto_params;

#define SPA_COMPRESSBITS 7
#define SPA_VDEVBITS 24
#define SPA_COMPRESSMASK ((1U << SPA_COMPRESSBITS) - 1)

/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
Expand Down
Loading