Skip to content

Commit

Permalink
implement corruption correcting recv
Browse files Browse the repository at this point in the history
This patch implements a new type of zfs receive: corrective receive
(-c). Thistype of recv is used to heal corrupted data when a replica
of the data already exists (in the form of a sendfile for example).
Metadata can not be healed using a corrective receive.

Signed-off-by: Alek Pinchuk <apinchuk@axcient.com>
  • Loading branch information
alek-p committed Nov 5, 2019
1 parent 936e2d6 commit c17d25c
Show file tree
Hide file tree
Showing 21 changed files with 898 additions and 79 deletions.
5 changes: 4 additions & 1 deletion cmd/zfs/zfs_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4487,7 +4487,7 @@ zfs_do_receive(int argc, char **argv)
nomem();

/* check options */
while ((c = getopt(argc, argv, ":o:x:dehnuvFsA")) != -1) {
while ((c = getopt(argc, argv, ":o:x:dehnuvFsAc")) != -1) {
switch (c) {
case 'o':
if (!parseprop(props, optarg)) {
Expand Down Expand Up @@ -4540,6 +4540,9 @@ zfs_do_receive(int argc, char **argv)
case 'A':
abort_resumable = B_TRUE;
break;
case 'c':
flags.heal = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,9 @@ typedef struct recvflags {

/* mount the filesystem unless nomount is specified */
boolean_t domount;

/* use this recv to check (and heal if needed) an existing snapshot */
boolean_t heal;
} recvflags_t;

extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
Expand Down
6 changes: 5 additions & 1 deletion include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@

/*
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
*/

#ifndef _LIBZFS_CORE_H
Expand Down Expand Up @@ -105,6 +105,10 @@ int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, int,
const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
uint64_t *, nvlist_t **);
int lzc_receive_with_heal(const char *, nvlist_t *, nvlist_t *,
uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, boolean_t,
int, const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
uint64_t *, nvlist_t **);
int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
int lzc_send_space_resume_redacted(const char *, const char *,
enum lzc_send_flags, uint64_t, uint64_t, uint64_t, const char *,
Expand Down
4 changes: 3 additions & 1 deletion include/sys/dmu_recv.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2019 Datto Inc.
*/

#ifndef _DMU_RECV_H
Expand All @@ -48,6 +49,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_byteswap;
uint64_t drc_featureflags;
boolean_t drc_force;
boolean_t drc_heal;
boolean_t drc_resumable;
boolean_t drc_raw;
boolean_t drc_clone;
Expand Down Expand Up @@ -80,7 +82,7 @@ typedef struct dmu_recv_cookie {
} dmu_recv_cookie_t;

int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
boolean_t force, boolean_t resumable, nvlist_t *localprops,
boolean_t force, boolean_t heal, boolean_t resumable, nvlist_t *localprops,
nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc,
vnode_t *vp, offset_t *voffp);
int dmu_recv_stream(dmu_recv_cookie_t *drc, int cleanup_fd,
Expand Down
3 changes: 2 additions & 1 deletion include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019 Datto Inc.
*/

#ifndef _SYS_SPA_H
Expand Down Expand Up @@ -1123,6 +1123,7 @@ extern const char *spa_state_to_name(spa_t *spa);
/* error handling */
struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb);
extern int zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset,
uint64_t length);
Expand Down
3 changes: 2 additions & 1 deletion include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019 Datto Inc.
*/

#ifndef _SYS_SPA_IMPL_H
Expand Down Expand Up @@ -341,6 +341,7 @@ struct spa {
kmutex_t spa_errlist_lock; /* error list/ereport lock */
avl_tree_t spa_errlist_last; /* last error list */
avl_tree_t spa_errlist_scrub; /* scrub error list */
avl_tree_t spa_errlist_healed; /* list of healed blocks */
uint64_t spa_deflate; /* should we deflate? */
uint64_t spa_history; /* history object */
kmutex_t spa_history_lock; /* history lock */
Expand Down
2 changes: 2 additions & 0 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,8 @@ extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, enum zio_flag flags);

extern void zio_destroy(zio_t *zio);

extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
struct abd *data, uint64_t lsize, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
Expand Down
95 changes: 84 additions & 11 deletions lib/libzfs/libzfs_sendrecv.c
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,29 @@ send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
}
}

/*
* returns snapshot guid
* and returns 0 if the snapshot does not exist
*/
static uint64_t
get_snap_guid(libzfs_handle_t *hdl, const char *fs, const char *snap)
{
char name[MAXPATHLEN + 1];
uint64_t guid = 0;

if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
return (guid);

(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
if (zhp != NULL) {
guid = zfs_prop_get_int(zhp, ZFS_PROP_GUID);
zfs_close(zhp);
}

return (guid);
}

/*
* returns snapshot creation txg
* and returns 0 if the snapshot does not exist
Expand Down Expand Up @@ -4655,9 +4678,34 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_REDACTED;

if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
if (flags->heal) {
if (flags->isprefix || flags->istail || flags->force ||
flags->canmountoff || flags->resumable || flags->nomount ||
flags->skipholds) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"corrective recv can not be used when combined with"
" this flag"));
err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
goto out;
}
uint64_t guid =
get_snap_guid(hdl, name, strchr(destsnap, '@') + 1);
if (guid == 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"corrective recv must specify an existing snapshot"
" to heal"));
err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
goto out;
} else if (guid != drrb->drr_toguid) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"local snapshot doesn't match the snapshot"
" in the provided stream"));
err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
goto out;
}
} else if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
zfs_cmd_t zc = {"\0"};
zfs_handle_t *zhp;
zfs_handle_t *zhp = NULL;
boolean_t encrypted;

(void) strcpy(zc.zc_name, name);
Expand Down Expand Up @@ -4850,8 +4898,9 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
}

if (flags->verbose) {
(void) printf("%s %s stream of %s into %s\n",
(void) printf("%s %s%s stream of %s into %s\n",
flags->dryrun ? "would receive" : "receiving",
flags->heal ? " corrective" : "",
drrb->drr_fromguid ? "incremental" : "full",
drrb->drr_toname, destsnap);
(void) fflush(stdout);
Expand Down Expand Up @@ -4921,10 +4970,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF);
}

err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable,
raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags,
action_handlep, &prop_errors);
if (flags->heal) {
err = ioctl_err = lzc_receive_with_heal(destsnap, rcvprops,
oxprops, wkeydata, wkeylen, origin, flags->force,
flags->heal, flags->resumable, raw, infd, drr_noswap,
cleanup_fd, &read_bytes, &errflags, action_handlep,
&prop_errors);
} else {
err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
oxprops, wkeydata, wkeylen, origin, flags->force,
flags->resumable, raw, infd, drr_noswap, cleanup_fd,
&read_bytes, &errflags, action_handlep, &prop_errors);
}
ioctl_errno = ioctl_err;
prop_errflags = errflags;

Expand Down Expand Up @@ -5048,7 +5105,11 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
break;
case EACCES:
if (raw && stream_wantsnewfs) {
if (flags->heal) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"key must be loaded to do a non-raw correc"
"tive recv on an encrypted dataset."));
} else if (raw && stream_wantsnewfs) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"failed to create encryption key"));
} else if (raw && !stream_wantsnewfs) {
Expand Down Expand Up @@ -5087,12 +5148,24 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
case ECKSUM:
recv_ecksum_set_aux(hdl, destsnap, flags->resumable);
if (flags->heal)
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"corrective receive was not able to recon"
"struct the data needed for healing."));
else
recv_ecksum_set_aux(hdl, destsnap,
flags->resumable);
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
break;
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded to receive this stream."));
if (flags->heal)
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"stream is not compatible with the "
"data in the pool."));
else
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded to receive this "
"stream."));
(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
case EDQUOT:
Expand Down
45 changes: 34 additions & 11 deletions lib/libzfs_core/libzfs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
/*
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
*/

/*
Expand Down Expand Up @@ -778,7 +778,7 @@ recv_read(int fd, void *buf, int ilen)
static int
recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
boolean_t resumable, boolean_t raw, int input_fd,
boolean_t heal, boolean_t resumable, boolean_t raw, int input_fd,
const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
Expand Down Expand Up @@ -824,7 +824,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
/*
* All receives with a payload should use the new interface.
*/
if (resumable || raw || wkeydata != NULL || payload) {
if (resumable || heal || raw || wkeydata != NULL || payload) {
nvlist_t *outnvl = NULL;
nvlist_t *innvl = fnvlist_alloc();

Expand Down Expand Up @@ -864,6 +864,9 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
if (resumable)
fnvlist_add_boolean(innvl, "resumable");

if (heal)
fnvlist_add_boolean(innvl, "heal");

if (cleanup_fd >= 0)
fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd);

Expand Down Expand Up @@ -982,7 +985,7 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, boolean_t raw, int fd)
{
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
B_FALSE, B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
}

/*
Expand All @@ -996,7 +999,7 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, boolean_t raw, int fd)
{
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
B_FALSE, B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL));
}

/*
Expand All @@ -1019,7 +1022,8 @@ lzc_receive_with_header(const char *snapname, nvlist_t *props,
return (EINVAL);

return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
resumable, raw, fd, begin_record, -1, NULL, NULL, NULL, NULL));
B_FALSE, resumable, raw, fd, begin_record, -1, NULL, NULL, NULL,
NULL));
}

/*
Expand Down Expand Up @@ -1049,8 +1053,8 @@ int lzc_receive_one(const char *snapname, nvlist_t *props,
nvlist_t **errors)
{
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
resumable, raw, input_fd, begin_record, cleanup_fd, read_bytes,
errflags, action_handle, errors));
B_FALSE, resumable, raw, input_fd, begin_record, cleanup_fd,
read_bytes, errflags, action_handle, errors));
}

/*
Expand All @@ -1063,13 +1067,32 @@ int lzc_receive_one(const char *snapname, nvlist_t *props,
*/
int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
const dmu_replay_record_t *begin_record, int cleanup_fd,
boolean_t force, boolean_t resumable, boolean_t raw,
int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
{
return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
force, B_FALSE, resumable, raw, input_fd, begin_record, cleanup_fd,
read_bytes, errflags, action_handle, errors));
}

/*
* Like lzc_receive_with_cmdprops, but allows the caller to pass an additional
* 'heal' argument.
*
* The heal arguments tells us to heal the provided snapshot using the provided
* send stream
*/
int lzc_receive_with_heal(const char *snapname, nvlist_t *props,
nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
boolean_t force, boolean_t heal, boolean_t resumable, boolean_t raw,
int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
{
return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
force, resumable, raw, input_fd, begin_record, cleanup_fd,
force, heal, resumable, raw, input_fd, begin_record, cleanup_fd,
read_bytes, errflags, action_handle, errors));
}

Expand Down
13 changes: 13 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -2846,6 +2846,19 @@ estimates.
Default value: \fB0\fR.
.RE

.sp
.ne 2
.na
\fBzfs_recv_best_effort_corrective\fR (int)
.ad
.RS 12n
When this variable is set to non-zero a corrective receive will not stop healing
and will continue going through the provided stream if a healing error is
encountered.
.sp
Default value: \fB0\fR.
.RE

.sp
.ne 2
.na
Expand Down
Loading

0 comments on commit c17d25c

Please sign in to comment.