Skip to content

Commit

Permalink
implement corruption correcting recv
Browse files Browse the repository at this point in the history
This patch implements a new type of zfs receive: corrective receive
(-c). Thistype of recv is used to heal corrupted data when a replica
of the data already exists (in the form of a sendfile for example).
Metadata can not be healed using a corrective receive.

Signed-off-by: Alek Pinchuk <apinchuk@datto.com>
  • Loading branch information
alek-p committed Sep 18, 2019
1 parent fcd37b6 commit 90a196a
Show file tree
Hide file tree
Showing 18 changed files with 603 additions and 53 deletions.
5 changes: 4 additions & 1 deletion cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4487,7 +4487,7 @@ zfs_do_receive(int argc, char **argv)
nomem();

/* check options */
while ((c = getopt(argc, argv, ":o:x:dehnuvFsA")) != -1) {
while ((c = getopt(argc, argv, ":o:x:dehnuvFsAc")) != -1) {
switch (c) {
case 'o':
if (!parseprop(props, optarg)) {
Expand Down Expand Up @@ -4540,6 +4540,9 @@ zfs_do_receive(int argc, char **argv)
case 'A':
abort_resumable = B_TRUE;
break;
case 'c':
flags.heal = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,9 @@ typedef struct recvflags {

/* skip receive of snapshot holds */
boolean_t skipholds;

/* use this recv to check (and heal if needed) an existing snapshot */
boolean_t heal;
} recvflags_t;

extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
Expand Down
10 changes: 7 additions & 3 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@

/*
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
*/

#ifndef _LIBZFS_CORE_H
Expand Down Expand Up @@ -102,8 +102,12 @@ int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, boolean_t, int, const struct dmu_replay_record *, int,
uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, int,
const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t,
int, const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
uint64_t *, nvlist_t **);
int lzc_receive_with_heal(const char *, nvlist_t *, nvlist_t *,
uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, boolean_t,
int, const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
uint64_t *, nvlist_t **);
int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
int lzc_send_space_resume_redacted(const char *, const char *,
Expand Down
4 changes: 3 additions & 1 deletion include/sys/dmu_recv.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2019 Datto Inc.
*/

#ifndef _DMU_RECV_H
Expand All @@ -48,6 +49,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_byteswap;
uint64_t drc_featureflags;
boolean_t drc_force;
boolean_t drc_heal;
boolean_t drc_resumable;
boolean_t drc_raw;
boolean_t drc_clone;
Expand Down Expand Up @@ -80,7 +82,7 @@ typedef struct dmu_recv_cookie {
} dmu_recv_cookie_t;

int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
boolean_t force, boolean_t resumable, nvlist_t *localprops,
boolean_t force, boolean_t heal, boolean_t resumable, nvlist_t *localprops,
nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc,
vnode_t *vp, offset_t *voffp);
int dmu_recv_stream(dmu_recv_cookie_t *drc, int cleanup_fd,
Expand Down
5 changes: 4 additions & 1 deletion include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019 Datto Inc.
*/

#ifndef _SYS_SPA_H
Expand Down Expand Up @@ -1164,6 +1164,9 @@ extern const char *spa_state_to_name(spa_t *spa);
/* error handling */
struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb);
extern void spa_remove_healed_errors(spa_t *spa, avl_tree_t *, avl_tree_t *,
dmu_tx_t *tx);
extern int zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset,
uint64_t length);
Expand Down
3 changes: 2 additions & 1 deletion include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019 Datto Inc.
*/

#ifndef _SYS_SPA_IMPL_H
Expand Down Expand Up @@ -341,6 +341,7 @@ struct spa {
kmutex_t spa_errlist_lock; /* error list/ereport lock */
avl_tree_t spa_errlist_last; /* last error list */
avl_tree_t spa_errlist_scrub; /* scrub error list */
avl_tree_t spa_errlist_healed; /* list of healed blocks */
uint64_t spa_deflate; /* should we deflate? */
uint64_t spa_history; /* history object */
kmutex_t spa_history_lock; /* history lock */
Expand Down
71 changes: 64 additions & 7 deletions lib/libzfs/libzfs_sendrecv.c
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,29 @@ send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
}
}

/*
* returns snapshot guid
* and returns 0 if the snapshot does not exist
*/
static uint64_t
get_snap_guid(libzfs_handle_t *hdl, const char *fs, const char *snap)
{
char name[MAXPATHLEN + 1];
uint64_t guid = 0;

if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
return (guid);

(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
if (zhp != NULL) {
guid = zfs_prop_get_int(zhp, ZFS_PROP_GUID);
zfs_close(zhp);
}

return (guid);
}

/*
* returns snapshot creation txg
* and returns 0 if the snapshot does not exist
Expand Down Expand Up @@ -4650,9 +4673,34 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_REDACTED;

if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
if (flags->heal) {
if (flags->isprefix || flags->istail || flags->force ||
flags->canmountoff || flags->resumable || flags->nomount ||
flags->skipholds) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"corrective recv can not be used when combined with"
" this flag"));
err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
goto out;
}
uint64_t guid =
get_snap_guid(hdl, name, strchr(destsnap, '@') + 1);
if (guid == 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"corrective recv must specify an existing snapshot"
" to heal"));
err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
goto out;
} else if (guid != drrb->drr_toguid) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"local snapshot doesn't match the snapshot"
" in the provided stream"));
err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
goto out;
}
} else if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
zfs_cmd_t zc = {"\0"};
zfs_handle_t *zhp;
zfs_handle_t *zhp = NULL;
boolean_t encrypted;

(void) strcpy(zc.zc_name, name);
Expand Down Expand Up @@ -4845,8 +4893,9 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
}

if (flags->verbose) {
(void) printf("%s %s stream of %s into %s\n",
(void) printf("%s %s%s stream of %s into %s\n",
flags->dryrun ? "would receive" : "receiving",
flags->heal ? " corrective" : "",
drrb->drr_fromguid ? "incremental" : "full",
drrb->drr_toname, destsnap);
(void) fflush(stdout);
Expand Down Expand Up @@ -4907,10 +4956,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF);
}

err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable,
raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags,
action_handlep, &prop_errors);
if (flags->heal) {
err = ioctl_err = lzc_receive_with_heal(destsnap, rcvprops,
oxprops, wkeydata, wkeylen, origin, flags->force,
flags->heal, flags->resumable, raw, infd, drr_noswap,
cleanup_fd, &read_bytes, &errflags, action_handlep,
&prop_errors);
} else {
err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
oxprops, wkeydata, wkeylen, origin, flags->force,
flags->resumable, raw, infd, drr_noswap, cleanup_fd,
&read_bytes, &errflags, action_handlep, &prop_errors);
}
ioctl_errno = ioctl_err;
prop_errflags = errflags;

Expand Down
45 changes: 34 additions & 11 deletions lib/libzfs_core/libzfs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
/*
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
*/

/*
Expand Down Expand Up @@ -778,7 +778,7 @@ recv_read(int fd, void *buf, int ilen)
static int
recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
boolean_t resumable, boolean_t raw, int input_fd,
boolean_t heal, boolean_t resumable, boolean_t raw, int input_fd,
const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
Expand Down Expand Up @@ -824,7 +824,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
/*
* All receives with a payload should use the new interface.
*/
if (resumable || raw || wkeydata != NULL || payload) {
if (resumable || heal || raw || wkeydata != NULL || payload) {
nvlist_t *outnvl = NULL;
nvlist_t *innvl = fnvlist_alloc();

Expand Down Expand Up @@ -864,6 +864,9 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
if (resumable)
fnvlist_add_boolean(innvl, "resumable");

if (heal)
fnvlist_add_boolean(innvl, "heal");

if (cleanup_fd >= 0)
fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);

Expand Down Expand Up @@ -982,7 +985,7 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, boolean_t raw, int fd)
{
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
B_FALSE, B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
}

/*
Expand All @@ -996,7 +999,7 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, boolean_t raw, int fd)
{
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
B_FALSE, B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
}

/*
Expand All @@ -1019,7 +1022,8 @@ lzc_receive_with_header(const char *snapname, nvlist_t *props,
return (EINVAL);

return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
resumable, raw, fd, begin_record, -1, NULL, NULL, NULL, NULL));
B_FALSE, resumable, raw, fd, begin_record, -1, NULL, NULL, NULL,
NULL));
}

/*
Expand Down Expand Up @@ -1049,8 +1053,8 @@ int lzc_receive_one(const char *snapname, nvlist_t *props,
nvlist_t **errors)
{
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
resumable, raw, input_fd, begin_record, cleanup_fd, read_bytes,
errflags, action_handle, errors));
B_FALSE, resumable, raw, input_fd, begin_record, cleanup_fd,
read_bytes, errflags, action_handle, errors));
}

/*
Expand All @@ -1063,13 +1067,32 @@ int lzc_receive_one(const char *snapname, nvlist_t *props,
*/
int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
const dmu_replay_record_t *begin_record, int cleanup_fd,
boolean_t force, boolean_t resumable, boolean_t raw,
int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
{
return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
force, B_FALSE, resumable, raw, input_fd, begin_record, cleanup_fd,
read_bytes, errflags, action_handle, errors));
}

/*
* Like lzc_receive_with_cmdprops, but allows the caller to pass an additional
* 'heal' argument.
*
* The heal arguments tells us to heal the provided snapshot using the provided
* send stream
*/
int lzc_receive_with_heal(const char *snapname, nvlist_t *props,
nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
boolean_t force, boolean_t heal, boolean_t resumable, boolean_t raw,
int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
{
return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
force, resumable, raw, input_fd, begin_record, cleanup_fd,
force, heal, resumable, raw, input_fd, begin_record, cleanup_fd,
read_bytes, errflags, action_handle, errors));
}

Expand Down
13 changes: 13 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -2846,6 +2846,19 @@ estimates.
Default value: \fB0\fR.
.RE

.sp
.ne 2
.na
\fBzfs_recv_best_effort_corrective\fR (int)
.ad
.RS 12n
When this variable is set to non-zero a corrective receive will not stop healing
and will continue going through the provided stream if a healing error is
encountered.
.sp
Default value: \fB0\fR.
.RE

.sp
.ne 2
.na
Expand Down
24 changes: 23 additions & 1 deletion man/man8/zfs.8
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@
.\" Copyright 2019 Richard Laager. All rights reserved.
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\" Copyright 2019 Datto Inc.
.\"
.Dd June 30, 2019
.Dd Sept 09, 2019
.Dt ZFS 8 SMM
.Os Linux
.Sh NAME
Expand Down Expand Up @@ -234,6 +235,11 @@
.Fl A
.Ar filesystem Ns | Ns Ar volume
.Nm
.Cm receive
.Fl c
.Op Fl vn
.Ar snapshot
.Nm
.Cm redact
.Ar snapshot redaction_bookmark
.Ar redaction_snapshot Ns ...
Expand Down Expand Up @@ -4360,6 +4366,22 @@ Abort an interrupted
deleting its saved partially received state.
.It Xo
.Nm
.Cm receive
.Fl c
.Op Fl vn
.Ar snapshot
.Xc
Attempt to correct data corruption in the specified
.Nm snapshot,
by using the provided stream as the source of healthy data. This method of
healing can only heal data blocks present in the stream. Metadata is not
able to be healed by corrective receive. Running a scrub is recommended post
healing to ensure all corruption had been healed. It's important to consider
why corruption has happened in the first place since if you have slowly failing
hardware periodically healing the data is not going to save you from data loss
later on when the hardware fails completeley.
.It Xo
.Nm
.Cm redact
.Ar snapshot redaction_bookmark
.Ar redaction_snapshot Ns ...
Expand Down
Loading

0 comments on commit 90a196a

Please sign in to comment.