From 366a45630cb657d23d909a33891205955a0e0146 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Thu, 24 Mar 2022 15:15:53 +0000 Subject: [PATCH] Add a "zstream decompress" subcommand It can be used to repair a ZFS file system corrupted by ZFS bug 12762. Use it like this: zfs send -LRec | zstream decompress , ... | zfs recv -s Workaround for #12762 Sponsored-by: Axcient Signed-off-by: Alan Somers --- cmd/zstream/Makefile.am | 1 + cmd/zstream/zstream.c | 4 + cmd/zstream/zstream.h | 4 + cmd/zstream/zstream_decompress.c | 295 +++++++++++++++++++++++++++++++ cmd/zstream/zstream_dump.c | 2 +- cmd/zstream/zstream_redup.c | 4 +- man/man8/zstream.8 | 20 ++- 7 files changed, 326 insertions(+), 4 deletions(-) create mode 100644 cmd/zstream/zstream_decompress.c diff --git a/cmd/zstream/Makefile.am b/cmd/zstream/Makefile.am index 8e813027fa3d..1ecdc6b7c1ff 100644 --- a/cmd/zstream/Makefile.am +++ b/cmd/zstream/Makefile.am @@ -6,6 +6,7 @@ zstream_SOURCES = \ zstream.c \ zstream.h \ zstream_dump.c \ + zstream_decompress.c \ zstream_redup.c \ zstream_token.c diff --git a/cmd/zstream/zstream.c b/cmd/zstream/zstream.c index a228f45fad79..6a70dda501f6 100644 --- a/cmd/zstream/zstream.c +++ b/cmd/zstream/zstream.c @@ -40,6 +40,8 @@ zstream_usage(void) "\tzstream dump [-vCd] FILE\n" "\t... | zstream dump [-vCd]\n" "\n" + "\tzstream decompress [INODE,OFFSET ...]\n" + "\n" "\tzstream token resume_token\n" "\n" "\tzstream redup [-v] FILE | ...\n"); @@ -61,6 +63,8 @@ main(int argc, char *argv[]) if (strcmp(subcommand, "dump") == 0) { return (zstream_do_dump(argc - 1, argv + 1)); + } else if (strcmp(subcommand, "decompress") == 0) { + return (zstream_do_decompress(argc - 1, argv + 1)); } else if (strcmp(subcommand, "token") == 0) { return (zstream_do_token(argc - 1, argv + 1)); } else if (strcmp(subcommand, "redup") == 0) { diff --git a/cmd/zstream/zstream.h b/cmd/zstream/zstream.h index 319fecb2876b..28850a315a16 100644 --- a/cmd/zstream/zstream.h +++ b/cmd/zstream/zstream.h @@ -24,8 +24,12 @@ extern "C" { #endif +extern void * safe_calloc(size_t n); +extern int sfread(void *buf, size_t size, FILE *fp); +extern void * safe_malloc(size_t size); extern int zstream_do_redup(int, char *[]); extern int zstream_do_dump(int, char *[]); +extern int zstream_do_decompress(int argc, char *argv[]); extern int zstream_do_token(int, char *[]); extern void zstream_usage(void); diff --git a/cmd/zstream/zstream_decompress.c b/cmd/zstream/zstream_decompress.c new file mode 100644 index 000000000000..2bda34f7e4c7 --- /dev/null +++ b/cmd/zstream/zstream_decompress.c @@ -0,0 +1,295 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2022 Axcient. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "zfs_fletcher.h" +#include "zstream.h" + +/* + * ssread - send stream read. + * + * Read while computing incremental checksum + */ +static size_t +ssread(void *buf, size_t len, zio_cksum_t *cksum) +{ + size_t outlen; + + if ((outlen = fread(buf, len, 1, stdin)) == 0) + return (0); + + return (outlen); +} + +static size_t +read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) +{ + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum); + if (r == 0) + return (0); + r = ssread(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), cksum); + if (r == 0) + return (0); + return (sizeof (*drr)); +} + +static int +dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, + zio_cksum_t *zc, int outfd) +{ + assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum) + == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + fletcher_4_incremental_native(drr, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); + if (drr->drr_type != DRR_BEGIN) { + assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. + drr_checksum.drr_checksum)); + drr->drr_u.drr_checksum.drr_checksum = *zc; + } + fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), zc); + if (write(outfd, drr, sizeof (*drr)) == -1) + return (errno); + if (payload_len != 0) { + fletcher_4_incremental_native(payload, payload_len, zc); + if (write(outfd, payload, payload_len) == -1) + return (errno); + } + return (0); +} + +int +zstream_do_decompress(int argc, char *argv[]) +{ + int bufsz = SPA_MAXBLOCKSIZE; + char *buf = safe_malloc(bufsz); + dmu_replay_record_t thedrr; + dmu_replay_record_t *drr = &thedrr; + zio_cksum_t stream_cksum; + int i; + + if (hcreate(argc) == 0) + errx(1, "hcreate"); + for(i = 0; i < argc; i++) { + ENTRY e = {.key = argv[i]}; + ENTRY *p; + + p = hsearch(e, ENTER); + if (p == NULL) + errx(1, "hsearch"); + } + + if (isatty(STDIN_FILENO)) { + (void) fprintf(stderr, + "Error: The send stream is a binary format " + "and can not be read from a\n" + "terminal. Standard input must be redirected, " + "or a file must be\n" + "specified as a command-line argument.\n"); + exit(1); + } + + fletcher_4_init(); + while (sfread(drr, sizeof (*drr), stdin) != 0) { + struct drr_write *drrw; + uint64_t payload_size = 0; + + /* + * We need to regenerate the checksum. + */ + if (drr->drr_type != DRR_BEGIN) { + bzero(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (drr->drr_u.drr_checksum.drr_checksum)); + } + + switch (drr->drr_type) { + case DRR_BEGIN: + { + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + + int sz = drr->drr_payloadlen; + if (sz != 0) { + if (sz > bufsz) { + free(buf); + buf = safe_calloc(sz); + bufsz = sz; + } + (void) sfread(buf, sz, stdin); + } + payload_size = sz; + break; + } + case DRR_END: + { + struct drr_end *drre = &drr->drr_u.drr_end; + /* + * Use the recalculated checksum, unless this is + * the END record of a stream package, which has + * no checksum. + */ + if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum)) + drre->drr_checksum = stream_cksum; + break; + } + + case DRR_OBJECT: + { + struct drr_object *drro = &drr->drr_u.drr_object; + + if (drro->drr_bonuslen > 0) { + payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); + (void) sfread(buf, payload_size, stdin); + } + break; + } + + case DRR_SPILL: + { + struct drr_spill *drrs = &drr->drr_u.drr_spill; + payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); + (void) sfread(buf, payload_size, stdin); + break; + } + + case DRR_WRITE_BYREF: + fprintf(stderr, "Dedupliated streams are not supported\n"); + exit(1); + break; + + case DRR_WRITE: + { + const int KEYSIZE = 64; + drrw = &thedrr.drr_u.drr_write; + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + char key[KEYSIZE]; + + snprintf(key, KEYSIZE, "%llu,%llu", + (u_longlong_t)drrw->drr_object, + (u_longlong_t)drrw->drr_offset); + ENTRY e = {.key = key}; + + if (hsearch(e, FIND) != NULL) { + /* + * Read and decompress the block + */ + char* lzbuf = calloc(1, payload_size); + (void) sfread(lzbuf, payload_size, stdin); + if (0 != lz4_decompress_zfs(lzbuf, buf, + payload_size, payload_size, 0)) + { + /* + * The block must not be compressed, + * possibly because it gets written + * multiple times in this stream. + */ + warnx("lz4_decompress_zfs failed for ino %lu offset %ld", + drrw->drr_object, drrw->drr_offset); + memcpy(buf, lzbuf, payload_size); + } else { + fprintf(stderr, "successfully decompressed ino %lu offset %ld.\n", + drrw->drr_object, drrw->drr_offset); + } + free(lzbuf); + } else { + /* + * Read the contents of the block unaltered + */ + (void) sfread(buf, payload_size, stdin); + } + break; + } + + case DRR_WRITE_EMBEDDED: + { + struct drr_write_embedded *drrwe = + &drr->drr_u.drr_write_embedded; + payload_size = + P2ROUNDUP((uint64_t)drrwe->drr_psize, 8); + (void) sfread(buf, payload_size, stdin); + break; + } + + case DRR_FREEOBJECTS: + case DRR_FREE: + case DRR_OBJECT_RANGE: + break; + + default: + (void) fprintf(stderr, "INVALID record type 0x%x\n", + drr->drr_type); + /* should never happen, so assert */ + assert(B_FALSE); + } + + if (feof(stdout)) { + fprintf(stderr, "Error: unexpected end-of-file\n"); + exit(1); + } + if (ferror(stdout)) { + fprintf(stderr, "Error while reading file: %s\n", + strerror(errno)); + exit(1); + } + + /* + * We need to recalculate the checksum, and it needs to be + * initially zero to do that. BEGIN records don't have + * a checksum. + */ + if (drr->drr_type != DRR_BEGIN) { + bzero(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (drr->drr_u.drr_checksum.drr_checksum)); + } + if (dump_record(drr, buf, payload_size, + &stream_cksum, STDOUT_FILENO) != 0) + break; + if (drr->drr_type == DRR_END) { + /* + * Typically the END record is either the last + * thing in the stream, or it is followed + * by a BEGIN record (which also zeros the checksum). + * However, a stream package ends with two END + * records. The last END record's checksum starts + * from zero. + */ + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + } + } + free(buf); + fletcher_4_fini(); + hdestroy(); + + return (0); +} diff --git a/cmd/zstream/zstream_dump.c b/cmd/zstream/zstream_dump.c index 977256cae400..170d84fed092 100644 --- a/cmd/zstream/zstream_dump.c +++ b/cmd/zstream/zstream_dump.c @@ -59,7 +59,7 @@ FILE *send_stream = 0; boolean_t do_byteswap = B_FALSE; boolean_t do_cksum = B_TRUE; -static void * +void * safe_malloc(size_t size) { void *rv = malloc(size); diff --git a/cmd/zstream/zstream_redup.c b/cmd/zstream/zstream_redup.c index 20aff17ae652..5807fabcecb5 100644 --- a/cmd/zstream/zstream_redup.c +++ b/cmd/zstream/zstream_redup.c @@ -65,7 +65,7 @@ highbit64(uint64_t i) return (NBBY * sizeof (uint64_t) - __builtin_clzll(i)); } -static void * +void * safe_calloc(size_t n) { void *rv = calloc(1, n); @@ -81,7 +81,7 @@ safe_calloc(size_t n) /* * Safe version of fread(), exits on error. */ -static int +int sfread(void *buf, size_t size, FILE *fp) { int rv = fread(buf, size, 1, fp); diff --git a/man/man8/zstream.8 b/man/man8/zstream.8 index c0322ee3ace0..1bc42a5ffdd7 100644 --- a/man/man8/zstream.8 +++ b/man/man8/zstream.8 @@ -20,7 +20,7 @@ .\" .\" Copyright (c) 2020 by Delphix. All rights reserved. .\" -.Dd May 8, 2021 +.Dd March 24, 2022 .Dt ZSTREAM 8 .Os . @@ -33,6 +33,9 @@ .Op Fl Cvd .Op Ar file .Nm +.Cm decompress +.Op Ar inode,offset ... +.Nm .Cm redup .Op Fl v .Ar file @@ -82,6 +85,21 @@ alias is provided for compatibility and is equivalent to running Dumps zfs resume token information .It Xo .Nm +.Cm decompress +.Op Ar inode,offset ... +.Xc +Decompress selected records in a ZFS send stream provided on standard input. +This allows recovering a ZFS dataset that was corrupted by OpenZFS bug +.Lk https://github.com/openzfs/zfs/issues/12762 12762 . +Specify the inode and byte offset of each record that you wish to decompress. +Every record for that inode beginning at that offset will be decompressed, if +possible. +It may not be possible, because the record may be corrupted in some but not +all of the stream's snapshots. +The repaired stream will be written to standard output. +Currently only LZ4 compression is supported. +.It Xo +.Nm .Cm redup .Op Fl v .Ar file