Skip to content

Commit

Permalink
Add zstream redup command to convert deduplicated send streams
Browse files Browse the repository at this point in the history
Deduplicated send and receive is deprecated.  To ease migration to the
new dedup-send-less world, the commit adds a `zstream redup` utility to
convert deduplicated send streams to normal streams, so that they can
continue to be received indefinitely.

The new `zstream` command also replaces the functionality of
`zstreamdump`, by way of the `zstream dump` subcommand.  The
`zstreamdump` command is replaced by a shell script which invokes
`zstream dump`.

The way that `zstream redup` works under the hood is that as we read the
send stream, we build up a hash table which maps from `<GUID, object,
offset> -> <file_offset>`.

Whenever we see a WRITE record, we add a new entry to the hash table,
which indicates where in the stream file to find the WRITE record for
this block. (The key is `drr_toguid, drr_object, drr_offset`.)

For entries other than WRITE_BYREF, we pass them through unchanged
(except for the running checksum, which is recalculated).

For WRITE_BYREF records, we change them to WRITE records.  We find the
referenced WRITE record by looking in the hash table (for the record
with key `drr_refguid, drr_refobject, drr_refoffset`), and then reading
the record header and payload from the specified offset in the stream
file.  This is why the stream can not be a pipe.  The found WRITE record
replaces the WRITE_BYREF record, with its `drr_toguid`, `drr_object`,
and `drr_offset` fields changed to be the same as the WRITE_BYREF's
(i.e. we are writing the same logical block, but with the data supplied
by the previous WRITE record).

This algorithm requires memory proportional to the number of WRITE
records (same as `zfs send -D`), but the size per WRITE record is
relatively low (40 bytes, vs. 72 for `zfs send -D`).  A 1TB send stream
with 8KB blocks (`recordsize=8k`) would use around 5GB of RAM to
"redup".

Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Reviewed-by: Paul Dagnelie <pcd@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
Closes openzfs#10124 
Closes openzfs#10156
  • Loading branch information
ahrens authored and jsai20 committed Mar 30, 2021
1 parent eb3ffe4 commit a3bb1aa
Show file tree
Hide file tree
Showing 16 changed files with 728 additions and 36 deletions.
2 changes: 1 addition & 1 deletion cmd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest
SUBDIRS = zfs zpool zdb zhack zinject zstream zstreamdump ztest
SUBDIRS += fsck_zfs vdev_id raidz_test zgenhostid

if USING_PYTHON
Expand Down
1 change: 1 addition & 0 deletions cmd/zstream/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
zstream
13 changes: 13 additions & 0 deletions cmd/zstream/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
include $(top_srcdir)/config/Rules.am

sbin_PROGRAMS = zstream

zstream_SOURCES = \
zstream.c \
zstream.h \
zstream_dump.c \
zstream_redup.c

zstream_LDADD = \
$(top_builddir)/lib/libnvpair/libnvpair.la \
$(top_builddir)/lib/libzfs/libzfs.la
61 changes: 61 additions & 0 deletions cmd/zstream/zstream.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2020 by Delphix. All rights reserved.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
#include <libintl.h>
#include <stddef.h>
#include <libzfs.h>
#include "zstream.h"

void
zstream_usage(void)
{
(void) fprintf(stderr,
"usage: zstream command args ...\n"
"Available commands are:\n"
"\n"
"\tzstream dump [-vCd] FILE\n"
"\t... | zstream dump [-vCd]\n"
"\n"
"\tzstream redup [-v] FILE | ...\n");
exit(1);
}

int
main(int argc, char *argv[])
{
if (argc < 2)
zstream_usage();

char *subcommand = argv[1];

if (strcmp(subcommand, "dump") == 0) {
return (zstream_do_dump(argc - 1, argv + 1));
} else if (strcmp(subcommand, "redup") == 0) {
return (zstream_do_redup(argc - 1, argv + 1));
} else {
zstream_usage();
}
}
35 changes: 35 additions & 0 deletions cmd/zstream/zstream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2020 by Delphix. All rights reserved.
*/

#ifndef _ZSTREAM_H
#define _ZSTREAM_H

#ifdef __cplusplus
extern "C" {
#endif

extern int zstream_do_redup(int, char *[]);
extern int zstream_do_dump(int, char *[]);
extern void zstream_usage(void);

#ifdef __cplusplus
}
#endif

#endif /* _ZSTREAM_H */
45 changes: 24 additions & 21 deletions cmd/zstreamdump/zstreamdump.c → cmd/zstream/zstream_dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/zio.h>
#include <zfs_fletcher.h>
#include "zstream.h"

/*
* If dump mode is enabled, the number of bytes to print per line
Expand All @@ -58,17 +59,6 @@ FILE *send_stream = 0;
boolean_t do_byteswap = B_FALSE;
boolean_t do_cksum = B_TRUE;

static void
usage(void)
{
(void) fprintf(stderr, "usage: zstreamdump [-v] [-C] [-d] < file\n");
(void) fprintf(stderr, "\t -v -- verbose\n");
(void) fprintf(stderr, "\t -C -- suppress checksum verification\n");
(void) fprintf(stderr, "\t -d -- dump contents of blocks modified, "
"implies verbose\n");
exit(1);
}

static void *
safe_malloc(size_t size)
{
Expand Down Expand Up @@ -215,7 +205,7 @@ sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
}

int
main(int argc, char *argv[])
zstream_do_dump(int argc, char *argv[])
{
char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
Expand Down Expand Up @@ -273,26 +263,39 @@ main(int argc, char *argv[])
case ':':
(void) fprintf(stderr,
"missing argument for '%c' option\n", optopt);
usage();
zstream_usage();
break;
case '?':
(void) fprintf(stderr, "invalid option '%c'\n",
optopt);
usage();
zstream_usage();
break;
}
}

if (isatty(STDIN_FILENO)) {
(void) fprintf(stderr,
"Error: Backup stream can not be read "
"from a terminal.\n"
"You must redirect standard input.\n");
exit(1);
if (argc > optind) {
const char *filename = argv[optind];
send_stream = fopen(filename, "r");
if (send_stream == NULL) {
(void) fprintf(stderr,
"Error while opening file '%s': %s\n",
filename, strerror(errno));
exit(1);
}
} else {
if (isatty(STDIN_FILENO)) {
(void) fprintf(stderr,
"Error: The send stream is a binary format "
"and can not be read from a\n"
"terminal. Standard input must be redirected, "
"or a file must be\n"
"specified as a command-line argument.\n");
exit(1);
}
send_stream = stdin;
}

fletcher_4_init();
send_stream = stdin;
while (read_hdr(drr, &zc)) {

/*
Expand Down
Loading

0 comments on commit a3bb1aa

Please sign in to comment.