Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce ZSTD compression to ZFS #10278

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ CONTRIBUTORS:
Keith M Wesolowski <wesolows@foobazco.org>
Kevin Tanguy <kevin.tanguy@ovh.net>
KireinaHoro <i@jsteward.moe>
Kjeld Schouten-Lebbing <kjeld@schouten-lebbing.nl>
Kohsuke Kawaguchi <kk@kohsuke.org>
Kyle Blatter <kyleblatter@llnl.gov>
Kyle Fuller <inbox@kylefuller.co.uk>
Expand Down Expand Up @@ -210,6 +211,7 @@ CONTRIBUTORS:
Michael Gebetsroither <michael@mgeb.org>
Michael Kjorling <michael@kjorling.se>
Michael Martin <mgmartin.mgm@gmail.com>
Michael Niewöhner <foss@mniewoehner.de>
Mike Gerdts <mike.gerdts@joyent.com>
Mike Harsch <mike@harschsystems.com>
Mike Leddy <mike.leddy@gmail.com>
Expand Down Expand Up @@ -258,6 +260,7 @@ CONTRIBUTORS:
Saso Kiselkov <saso.kiselkov@nexenta.com>
Scot W. Stevenson <scot.stevenson@gmail.com>
Sean Eric Fagan <sef@ixsystems.com>
Sebastian Gottschall <s.gottschall@dd-wrt.com>
Sen Haerens <sen@senhaerens.be>
Serapheim Dimitropoulos <serapheim@delphix.com>
Seth Forshee <seth.forshee@canonical.com>
Expand Down
4 changes: 3 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,11 @@ commitcheck:

PHONY += cstyle
cstyle:
@find ${top_srcdir} -name build -prune -o -type f -name '*.[hc]' \
@find ${top_srcdir} -name build -prune \
-o -type f -name '*.[hc]' \
! -name 'zfs_config.*' ! -name '*.mod.c' \
! -name 'opt_global.h' ! -name '*_if*.h' \
! -path './module/zstd/lib/*' \
-exec ${top_srcdir}/scripts/cstyle.pl -cpP {} \+

filter_executable = -exec test -x '{}' \; -print
Expand Down
2 changes: 1 addition & 1 deletion cmd/dbufstat/dbufstat.in
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ def get_compstring(c):
"ZIO_COMPRESS_GZIP_6", "ZIO_COMPRESS_GZIP_7",
"ZIO_COMPRESS_GZIP_8", "ZIO_COMPRESS_GZIP_9",
"ZIO_COMPRESS_ZLE", "ZIO_COMPRESS_LZ4",
"ZIO_COMPRESS_FUNCTION"]
"ZIO_COMPRESS_ZSTD", "ZIO_COMPRESS_FUNCTION"]

# If "-rr" option is used, don't convert to string representation
if raw > 1:
Expand Down
106 changes: 96 additions & 10 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
* Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
* Copyright (c) 2015, 2017, Intel Corporation.
* Copyright (c) 2020 Datto Inc.
* Copyright (c) 2020, The FreeBSD Foundation [1]
*
* [1] Portions of this software were developed by Allan Jude
* under sponsorship from the FreeBSD Foundation.
*/

#include <stdio.h>
Expand Down Expand Up @@ -71,6 +75,7 @@
#include <sys/dsl_scan.h>
#include <sys/btree.h>
#include <zfs_comutil.h>
#include <sys/zstd/zstd.h>

#include <libnvpair.h>
#include <libzutil.h>
Expand Down Expand Up @@ -834,6 +839,7 @@ usage(void)
"work with dataset)\n");
(void) fprintf(stderr, " -Y attempt all reconstruction "
"combinations for split blocks\n");
(void) fprintf(stderr, " -Z show ZSTD headers \n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
Expand Down Expand Up @@ -2134,6 +2140,65 @@ blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
}

static void
snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
const blkptr_t *bp)
{
abd_t *pabd;
void *buf;
zio_t *zio;
zfs_zstdhdr_t zstd_hdr;
int error;

if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_ZSTD)
return;

if (BP_IS_HOLE(bp))
return;

if (BP_IS_EMBEDDED(bp)) {
buf = malloc(SPA_MAXBLOCKSIZE);
if (buf == NULL) {
(void) fprintf(stderr, "out of memory\n");
exit(1);
}
decode_embedded_bp_compressed(bp, buf);
memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
free(buf);
zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf),
" ZSTD:size=%u:version=%u:level=%u:EMBEDDED",
zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
return;
}

pabd = abd_alloc_for_io(SPA_MAXBLOCKSIZE, B_FALSE);
zio = zio_root(spa, NULL, NULL, 0);

/* Decrypt but don't decompress so we can read the compression header */
zio_nowait(zio_read(zio, spa, bp, pabd, BP_GET_PSIZE(bp), NULL, NULL,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW_COMPRESS,
NULL));
error = zio_wait(zio);
if (error) {
(void) fprintf(stderr, "read failed: %d\n", error);
return;
}
buf = abd_borrow_buf_copy(pabd, BP_GET_LSIZE(bp));
memcpy(&zstd_hdr, buf, sizeof (zstd_hdr));
zstd_hdr.c_len = BE_32(zstd_hdr.c_len);
zstd_hdr.raw_version_level = BE_32(zstd_hdr.raw_version_level);

(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf),
" ZSTD:size=%u:version=%u:level=%u:NORMAL",
zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);

abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp));
}

static void
snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
boolean_t bp_freed)
Expand Down Expand Up @@ -2198,7 +2263,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
}

static void
print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
print_indirect(spa_t *spa, blkptr_t *bp, const zbookmark_phys_t *zb,
const dnode_phys_t *dnp)
{
char blkbuf[BP_SPRINTF_LEN];
Expand All @@ -2222,6 +2287,8 @@ print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
}

snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp, B_FALSE);
if (dump_opt['Z'] && BP_GET_COMPRESS(bp) == ZIO_COMPRESS_ZSTD)
snprintf_zstd_header(spa, blkbuf, sizeof (blkbuf), bp);
(void) printf("%s\n", blkbuf);
}

Expand All @@ -2234,7 +2301,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
if (bp->blk_birth == 0)
return (0);

print_indirect(bp, zb, dnp);
print_indirect(spa, bp, zb, dnp);

if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
arc_flags_t flags = ARC_FLAG_WAIT;
Expand Down Expand Up @@ -3310,7 +3377,25 @@ dump_object(objset_t *os, uint64_t object, int verbosity,
" (K=%s)", ZDB_CHECKSUM_NAME(doi.doi_checksum));
}

if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
if (doi.doi_compress == ZIO_COMPRESS_INHERIT &&
ZIO_COMPRESS_HASLEVEL(os->os_compress) && verbosity >= 6) {
const char *compname = NULL;
if (zfs_prop_index_to_string(ZFS_PROP_COMPRESSION,
ZIO_COMPRESS_RAW(os->os_compress, os->os_complevel),
&compname) == 0) {
(void) snprintf(aux + strlen(aux),
sizeof (aux) - strlen(aux), " (Z=inherit=%s)",
compname);
} else {
(void) snprintf(aux + strlen(aux),
sizeof (aux) - strlen(aux),
" (Z=inherit=%s-unknown)",
ZDB_COMPRESS_NAME(os->os_compress));
}
} else if (doi.doi_compress == ZIO_COMPRESS_INHERIT && verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
" (Z=inherit=%s)", ZDB_COMPRESS_NAME(os->os_compress));
} else if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux) - strlen(aux),
" (Z=%s)", ZDB_COMPRESS_NAME(doi.doi_compress));
}
Expand Down Expand Up @@ -4093,6 +4178,8 @@ dump_l2arc_log_entries(uint64_t log_entries,
(u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop));
(void) printf("|\t\t\t\tcompr: %llu\n",
(u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop));
(void) printf("|\t\t\t\tcomplevel: %llu\n",
(u_longlong_t)(&le[j])->le_complevel);
(void) printf("|\t\t\t\ttype: %llu\n",
(u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop));
(void) printf("|\t\t\t\tprotected: %llu\n",
Expand Down Expand Up @@ -4186,16 +4273,14 @@ dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr,
switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) {
case ZIO_COMPRESS_OFF:
break;
case ZIO_COMPRESS_LZ4:
default:
abd = abd_alloc_for_io(asize, B_TRUE);
abd_copy_from_buf_off(abd, &this_lb, 0, asize);
zio_decompress_data(L2BLK_GET_COMPRESS(
(&lbps[0])->lbp_prop), abd, &this_lb,
asize, sizeof (this_lb));
asize, sizeof (this_lb), NULL);
abd_free(abd);
break;
default:
break;
}

if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
Expand Down Expand Up @@ -7684,9 +7769,9 @@ zdb_decompress_block(abd_t *pabd, void *buf, void *lbuf, uint64_t lsize,
VERIFY0(random_get_pseudo_bytes(lbuf2, lsize));

if (zio_decompress_data(*cfuncp, pabd,
lbuf, psize, lsize) == 0 &&
lbuf, psize, lsize, NULL) == 0 &&
zio_decompress_data(*cfuncp, pabd,
lbuf2, psize, lsize) == 0 &&
lbuf2, psize, lsize, NULL) == 0 &&
bcmp(lbuf, lbuf2, lsize) == 0)
break;
}
Expand Down Expand Up @@ -8078,7 +8163,7 @@ main(int argc, char **argv)
zfs_btree_verify_intensity = 3;

while ((c = getopt(argc, argv,
"AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XYy")) != -1) {
"AbcCdDeEFGhiI:klLmMo:Op:PqRsSt:uU:vVx:XYyZ")) != -1) {
switch (c) {
case 'b':
case 'c':
Expand All @@ -8098,6 +8183,7 @@ main(int argc, char **argv)
case 'S':
case 'u':
case 'y':
case 'Z':
dump_opt[c]++;
dump_all = 0;
break;
Expand Down
3 changes: 2 additions & 1 deletion config/ax_code_coverage.m4
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ CODE_COVERAGE_GENHTML_OPTIONS ?= $(CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT)

# Add any folders you want to ignore here
# Ignore tmp and tests themselves
CODE_COVERAGE_IGNORE_PATTERN ?= "/tmp/*" "*/tests/*"
CODE_COVERAGE_IGNORE_PATTERN ?= "/tmp/*" "*/tests/*"
CODE_COVERAGE_IGNORE_PATTERN += "*/module/zstd/lib/*"

GITIGNOREFILES ?=
GITIGNOREFILES += $(CODE_COVERAGE_OUTPUT_FILE) $(CODE_COVERAGE_OUTPUT_DIRECTORY)
Expand Down
3 changes: 3 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ AC_CONFIG_FILES([
include/sys/fs/Makefile
include/sys/lua/Makefile
include/sys/sysevent/Makefile
include/sys/zstd/Makefile
lib/Makefile
lib/libavl/Makefile
lib/libefi/Makefile
Expand Down Expand Up @@ -163,6 +164,7 @@ AC_CONFIG_FILES([
lib/libzfs_core/Makefile
lib/libzfs_core/libzfs_core.pc
lib/libzpool/Makefile
lib/libzstd/Makefile
lib/libzutil/Makefile
man/Makefile
man/man1/Makefile
Expand All @@ -180,6 +182,7 @@ AC_CONFIG_FILES([
module/unicode/Makefile
module/zcommon/Makefile
module/zfs/Makefile
module/zstd/Makefile
rpm/Makefile
rpm/generic/Makefile
rpm/generic/zfs-dkms.spec
Expand Down
2 changes: 2 additions & 0 deletions cppcheck-suppressions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ uninitvar:module/os/freebsd/zfs/vdev_geom.c
uninitvar:module/os/freebsd/zfs/zfs_vfsops.c
uninitvar:module/os/freebsd/spl/spl_zone.c
uninitvar:lib/libzutil/os/freebsd/zutil_import_os.c
*:module/zstd/lib/zstd.c
*:module/zstd/lib/zstd.h
2 changes: 1 addition & 1 deletion include/sys/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SUBDIRS = fm fs crypto lua sysevent
SUBDIRS = fm fs crypto lua sysevent zstd

COMMON_H = \
abd.h \
Expand Down
12 changes: 8 additions & 4 deletions include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2019, Klara Inc.
*/

#ifndef _SYS_ARC_H
Expand Down Expand Up @@ -252,18 +254,20 @@ void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
int32_t size);
arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
uint64_t psize, uint64_t lsize, enum zio_compress compression_type,
uint8_t complevel);
arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
enum zio_compress compression_type, uint8_t complevel);
uint8_t arc_get_complevel(arc_buf_t *buf);
arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
enum zio_compress compression_type, uint8_t complevel);
arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
enum zio_compress compression_type, uint8_t complevel);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_destroy(arc_buf_t *buf, void *tag);
Expand Down
6 changes: 5 additions & 1 deletion include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,12 +269,13 @@ typedef struct l2arc_log_ent_phys {
*/
uint64_t le_prop;
uint64_t le_daddr; /* buf location on l2dev */
uint64_t le_complevel;
/*
* We pad the size of each entry to a power of 2 so that the size of
* l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT,
* because of the L2ARC_SET_*SIZE macros.
*/
const uint64_t le_pad[3]; /* pad to 64 bytes */
const uint64_t le_pad[2]; /* pad to 64 bytes */
} l2arc_log_ent_phys_t;

#define L2ARC_LOG_BLK_MAX_ENTRIES (1022)
Expand Down Expand Up @@ -460,6 +461,9 @@ struct arc_buf_hdr {
uint64_t b_birth;

arc_buf_contents_t b_type;
uint8_t b_complevel;
uint8_t b_reserved1; /* used for 4 byte alignment */
uint16_t b_reserved2; /* used for 4 byte alignment */
arc_buf_hdr_t *b_hash_next;
arc_flags_t b_flags;

Expand Down
1 change: 1 addition & 0 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ struct objset {
uint64_t os_dnodesize; /* default dnode size for new objects */
enum zio_checksum os_checksum;
enum zio_compress os_compress;
uint8_t os_complevel;
uint8_t os_copies;
enum zio_checksum os_dedup_checksum;
boolean_t os_dedup_verify;
Expand Down
2 changes: 2 additions & 0 deletions include/sys/dsl_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,8 @@ int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
uint64_t quota);
int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
uint64_t reservation);
int dsl_dataset_set_compression(const char *dsname, zprop_source_t source,
uint64_t compression);

boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
uint64_t earlier_txg);
Expand Down
3 changes: 3 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2019, Klara Inc.
*/

#ifndef _SYS_SPA_H
Expand Down Expand Up @@ -120,6 +122,7 @@ struct dsl_crypto_params;

#define SPA_COMPRESSBITS 7
#define SPA_VDEVBITS 24
#define SPA_COMPRESSMASK ((1U << SPA_COMPRESSBITS) - 1)

/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
Expand Down
Loading