diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index e6b5978259d6..ade4f6e6f30c 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -4487,7 +4487,7 @@ zfs_do_receive(int argc, char **argv) nomem(); /* check options */ - while ((c = getopt(argc, argv, ":o:x:dehnuvFsA")) != -1) { + while ((c = getopt(argc, argv, ":o:x:dehnuvFsAc")) != -1) { switch (c) { case 'o': if (!parseprop(props, optarg)) { @@ -4540,6 +4540,9 @@ zfs_do_receive(int argc, char **argv) case 'A': abort_resumable = B_TRUE; break; + case 'c': + flags.heal = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); diff --git a/contrib/pyzfs/libzfs_core/_libzfs_core.py b/contrib/pyzfs/libzfs_core/_libzfs_core.py index 06797b0f36d5..87dd0921714d 100644 --- a/contrib/pyzfs/libzfs_core/_libzfs_core.py +++ b/contrib/pyzfs/libzfs_core/_libzfs_core.py @@ -1306,8 +1306,8 @@ def lzc_receive_one( @_uncommitted() def lzc_receive_with_cmdprops( - snapname, fd, begin_record, force=False, resumable=False, raw=False, - origin=None, props=None, cmdprops=None, key=None, cleanup_fd=-1, + snapname, fd, begin_record, force=False, heal=False, resumable=False, + raw=False, origin=None, props=None, cmdprops=None, key=None, cleanup_fd=-1, action_handle=0 ): ''' @@ -1322,6 +1322,8 @@ def lzc_receive_with_cmdprops( structure. :param bool force: whether to roll back or destroy the target filesystem if that is required to receive the stream. + :param bool heal: True when we want to heal the snapshot (snapname) L0 data + using the data from the provided stream. :param bool resumable: whether this stream should be treated as resumable. If the receive fails due to premature stream termination, the intermediate state will be preserved on disk and may subsequently be @@ -1417,8 +1419,8 @@ def lzc_receive_with_cmdprops( properrs = {} with nvlist_out(properrs) as c_errors: ret = _lib.lzc_receive_with_cmdprops( - snapname, nvlist, cmdnvlist, key, len(key), c_origin, - force, resumable, raw, fd, begin_record, cleanup_fd, c_read_bytes, + snapname, nvlist, cmdnvlist, key, len(key), c_origin, force, + heal, resumable, raw, fd, begin_record, cleanup_fd, c_read_bytes, c_errflags, c_action_handle, c_errors) errors.lzc_receive_translate_errors( ret, snapname, fd, force, raw, False, False, origin, properrs) diff --git a/contrib/pyzfs/libzfs_core/bindings/libzfs_core.py b/contrib/pyzfs/libzfs_core/bindings/libzfs_core.py index 1b46a0891944..1a2d2ba7c57d 100644 --- a/contrib/pyzfs/libzfs_core/bindings/libzfs_core.py +++ b/contrib/pyzfs/libzfs_core/bindings/libzfs_core.py @@ -109,7 +109,7 @@ int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t, boolean_t, int); int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *, - uint8_t *, uint_t, const char *, boolean_t, boolean_t, + uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, boolean_t, int, const dmu_replay_record_t *, int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **); int lzc_receive_with_header(const char *, nvlist_t *, const char *, diff --git a/include/libzfs.h b/include/libzfs.h index 22cb0408e620..3d299a8fd873 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -740,6 +740,9 @@ typedef struct recvflags { /* skip receive of snapshot holds */ boolean_t skipholds; + + /* use this recv to check (and heal if needed) an existing snapshot */ + boolean_t heal; } recvflags_t; extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *, diff --git a/include/libzfs_core.h b/include/libzfs_core.h index 04cc47781359..24a82ec6868a 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -21,9 +21,9 @@ /* * Copyright (c) 2012, 2018 by Delphix. All rights reserved. - * Copyright (c) 2017 Datto Inc. * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. + * Copyright (c) 2019 Datto Inc. */ #ifndef _LIBZFS_CORE_H @@ -102,8 +102,8 @@ int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t, boolean_t, boolean_t, int, const struct dmu_replay_record *, int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **); int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *, - uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, int, - const struct dmu_replay_record *, int, uint64_t *, uint64_t *, + uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, boolean_t, + int, const struct dmu_replay_record *, int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **); int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *); int lzc_send_space_resume_redacted(const char *, const char *, diff --git a/include/sys/dmu_recv.h b/include/sys/dmu_recv.h index 1a7347d66e8f..36c2037a0d36 100644 --- a/include/sys/dmu_recv.h +++ b/include/sys/dmu_recv.h @@ -24,6 +24,7 @@ * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2019 Datto Inc. */ #ifndef _DMU_RECV_H @@ -48,6 +49,7 @@ typedef struct dmu_recv_cookie { boolean_t drc_byteswap; uint64_t drc_featureflags; boolean_t drc_force; + boolean_t drc_heal; boolean_t drc_resumable; boolean_t drc_raw; boolean_t drc_clone; @@ -80,7 +82,7 @@ typedef struct dmu_recv_cookie { } dmu_recv_cookie_t; int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, - boolean_t force, boolean_t resumable, nvlist_t *localprops, + boolean_t force, boolean_t heal, boolean_t resumable, nvlist_t *localprops, nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp); int dmu_recv_stream(dmu_recv_cookie_t *drc, int cleanup_fd, diff --git a/include/sys/spa.h b/include/sys/spa.h index 51e4c0f77bb5..c9a6a962ab04 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -26,8 +26,8 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Joyent, Inc. - * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019 Datto Inc. */ #ifndef _SYS_SPA_H @@ -1164,6 +1164,8 @@ extern const char *spa_state_to_name(spa_t *spa); /* error handling */ struct zbookmark_phys; extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb); +extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb); +extern void spa_remove_healed_errors(spa_t *spa, dmu_tx_t *tx); extern int zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t length); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 71b07405cd58..aad6b9beddc2 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -25,8 +25,8 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. - * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019 Datto Inc. */ #ifndef _SYS_SPA_IMPL_H @@ -341,6 +341,7 @@ struct spa { kmutex_t spa_errlist_lock; /* error list/ereport lock */ avl_tree_t spa_errlist_last; /* last error list */ avl_tree_t spa_errlist_scrub; /* scrub error list */ + avl_tree_t spa_errlist_healed; /* list of healed blocks */ uint64_t spa_deflate; /* should we deflate? */ uint64_t spa_history; /* history object */ kmutex_t spa_history_lock; /* history lock */ diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 33d3eb6a5345..60031a3957ba 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -816,6 +816,29 @@ send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv) } } +/* + * returns snapshot guid + * and returns 0 if the snapshot does not exist + */ +static uint64_t +get_snap_guid(libzfs_handle_t *hdl, const char *fs, const char *snap) +{ + char name[MAXPATHLEN + 1]; + uint64_t guid = 0; + + if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0') + return (guid); + + (void) snprintf(name, sizeof (name), "%s@%s", fs, snap); + zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT); + if (zhp != NULL) { + guid = zfs_prop_get_int(zhp, ZFS_PROP_GUID); + zfs_close(zhp); + } + + return (guid); +} + /* * returns snapshot creation txg * and returns 0 if the snapshot does not exist @@ -4650,9 +4673,34 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED; - if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) { + if (flags->heal) { + if (flags->isprefix || flags->istail || flags->force || + flags->canmountoff || flags->resumable || flags->nomount || + flags->skipholds) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "corrective recv can not be used when combined with" + " this flag")); + err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto out; + } + uint64_t guid = + get_snap_guid(hdl, name, strchr(destsnap, '@') + 1); + if (guid == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "corrective recv must specify an existing snapshot" + " to heal")); + err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto out; + } else if (guid != drrb->drr_toguid) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "local snapshot doesn't match the snapshot" + " in the provided stream")); + err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf); + goto out; + } + } else if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) { zfs_cmd_t zc = {"\0"}; - zfs_handle_t *zhp; + zfs_handle_t *zhp = NULL; boolean_t encrypted; (void) strcpy(zc.zc_name, name); @@ -4845,8 +4893,9 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } if (flags->verbose) { - (void) printf("%s %s stream of %s into %s\n", + (void) printf("%s %s%s stream of %s into %s\n", flags->dryrun ? "would receive" : "receiving", + flags->heal ? " corrective" : "", drrb->drr_fromguid ? "incremental" : "full", drrb->drr_toname, destsnap); (void) fflush(stdout); @@ -4908,9 +4957,9 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops, - oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable, - raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags, - action_handlep, &prop_errors); + oxprops, wkeydata, wkeylen, origin, flags->force, flags->heal, + flags->resumable, raw, infd, drr_noswap, cleanup_fd, &read_bytes, + &errflags, action_handlep, &prop_errors); ioctl_errno = ioctl_err; prop_errflags = errflags; diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index a3dc70f9e486..9aaf8d85c29b 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -22,9 +22,9 @@ /* * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. - * Copyright (c) 2017 Datto Inc. * Copyright 2017 RackTop Systems. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. + * Copyright (c) 2019 Datto Inc. */ /* @@ -778,7 +778,7 @@ recv_read(int fd, void *buf, int ilen) static int recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force, - boolean_t resumable, boolean_t raw, int input_fd, + boolean_t heal, boolean_t resumable, boolean_t raw, int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors) @@ -824,7 +824,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, /* * All receives with a payload should use the new interface. */ - if (resumable || raw || wkeydata != NULL || payload) { + if (resumable || heal || raw || wkeydata != NULL || payload) { nvlist_t *outnvl = NULL; nvlist_t *innvl = fnvlist_alloc(); @@ -864,6 +864,9 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops, if (resumable) fnvlist_add_boolean(innvl, "resumable"); + if (heal) + fnvlist_add_boolean(innvl, "heal"); + if (cleanup_fd >= 0) fnvlist_add_int32(innvl, "cleanup_fd", cleanup_fd); @@ -982,7 +985,7 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, boolean_t force, boolean_t raw, int fd) { return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, - B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL)); + B_FALSE, B_FALSE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL)); } /* @@ -996,7 +999,7 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, boolean_t force, boolean_t raw, int fd) { return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, - B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL)); + B_FALSE, B_TRUE, raw, fd, NULL, -1, NULL, NULL, NULL, NULL)); } /* @@ -1019,7 +1022,8 @@ lzc_receive_with_header(const char *snapname, nvlist_t *props, return (EINVAL); return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, - resumable, raw, fd, begin_record, -1, NULL, NULL, NULL, NULL)); + B_FALSE, resumable, raw, fd, begin_record, -1, NULL, NULL, NULL, + NULL)); } /* @@ -1049,8 +1053,8 @@ int lzc_receive_one(const char *snapname, nvlist_t *props, nvlist_t **errors) { return (recv_impl(snapname, props, NULL, NULL, 0, origin, force, - resumable, raw, input_fd, begin_record, cleanup_fd, read_bytes, - errflags, action_handle, errors)); + B_FALSE, resumable, raw, input_fd, begin_record, cleanup_fd, + read_bytes, errflags, action_handle, errors)); } /* @@ -1063,13 +1067,13 @@ int lzc_receive_one(const char *snapname, nvlist_t *props, */ int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props, nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin, - boolean_t force, boolean_t resumable, boolean_t raw, int input_fd, - const dmu_replay_record_t *begin_record, int cleanup_fd, + boolean_t force, boolean_t heal, boolean_t resumable, boolean_t raw, + int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors) { return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin, - force, resumable, raw, input_fd, begin_record, cleanup_fd, + force, heal, resumable, raw, input_fd, begin_record, cleanup_fd, read_bytes, errflags, action_handle, errors)); } diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 22e6ab44ec2c..75982f7ef6bc 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -2834,6 +2834,19 @@ estimates. Default value: \fB0\fR. .RE +.sp +.ne 2 +.na +\fBzfs_recv_best_effort_corrective\fR (int) +.ad +.RS 12n +When this variable is set to non-zero a corrective receive will not stop healing +and will continue going through the provided stream if a healing error is +encountered. +.sp +Default value: \fB0\fR. +.RE + .sp .ne 2 .na diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 09fa2831be96..59ea8434c20b 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -29,8 +29,9 @@ .\" Copyright 2019 Richard Laager. All rights reserved. .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. +.\" Copyright 2019 Datto Inc. .\" -.Dd June 30, 2019 +.Dd Sept 09, 2019 .Dt ZFS 8 SMM .Os Linux .Sh NAME @@ -234,6 +235,11 @@ .Fl A .Ar filesystem Ns | Ns Ar volume .Nm +.Cm receive +.Fl c +.Op Fl vn +.Ar snapshot +.Nm .Cm redact .Ar snapshot redaction_bookmark .Ar redaction_snapshot Ns ... @@ -4360,6 +4366,22 @@ Abort an interrupted deleting its saved partially received state. .It Xo .Nm +.Cm receive +.Fl c +.Op Fl vn +.Ar snapshot +.Xc +Attempt to correct data corruption in the specified +.Nm snapshot, +by using the provided stream as the source of healthy data. This method of +healing can only heal data blocks present in the stream. Metadata is not +able to be healed by corrective receive. Running a scrub is recommended post +healing to ensure all corruption had been healed. It's important to consider +why corruption has happened in the first place since if you have slowly failing +hardware periodically healing the data is not going to save you from data loss +later on when the hardware fails completeley. +.It Xo +.Nm .Cm redact .Ar snapshot redaction_bookmark .Ar redaction_snapshot Ns ... diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index 6249e165fa5d..7ab7ab654e5c 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -25,6 +25,7 @@ * Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved. * Copyright (c) 2018, loli10K . All rights reserved. + * Copyright (c) 2019 Datto Inc. */ #include @@ -64,6 +65,7 @@ int zfs_recv_queue_length = SPA_MAXBLOCKSIZE; int zfs_recv_queue_ff = 20; +int zfs_recv_best_effort_corrective = 0; static char *dmu_recv_tag = "dmu_recv_tag"; const char *recv_clone_name = "%recv"; @@ -102,6 +104,7 @@ struct receive_writer_arg { /* A map from guid to dataset to help handle dedup'd streams. */ avl_tree_t *guid_to_ds_map; boolean_t resumable; + boolean_t heal; boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */ boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */ uint64_t last_object; @@ -345,6 +348,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, uint64_t val; uint64_t children; int error; + dsl_dataset_t *snap; dsl_pool_t *dp = ds->ds_dir->dd_pool; boolean_t encrypted = ds->ds_dir->dd_crypto_obj != 0; boolean_t raw = (featureflags & DMU_BACKUP_FEATURE_RAW) != 0; @@ -361,12 +365,16 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, if (dsl_dataset_has_resume_receive_state(ds)) return (SET_ERROR(EBUSY)); - /* New snapshot name must not exist. */ + /* New snapshot name must not exist if we're not healing it */ error = zap_lookup(dp->dp_meta_objset, dsl_dataset_phys(ds)->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, 8, 1, &val); - if (error != ENOENT) + if (drba->drba_cookie->drc_heal) { + if (error != 0) + return (error); + } else if (error != ENOENT) { return (error == 0 ? SET_ERROR(EEXIST) : error); + } /* Must not have children if receiving a ZVOL. */ error = zap_count(dp->dp_meta_objset, @@ -391,8 +399,21 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, if (error != 0) return (error); - if (fromguid != 0) { - dsl_dataset_t *snap; + if (drba->drba_cookie->drc_heal) { + /* + * Healing can only be done if the send stream is for the same + * snapshot as the one we are trying to heal. + */ + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + error = dsl_dataset_hold_obj(dp, val, FTAG, &snap); + if (error == 0 && + drrb->drr_toguid != dsl_dataset_phys(snap)->ds_guid) { + dsl_dataset_rele(snap, FTAG); + return (SET_ERROR(EINVAL)); + } + dsl_dataset_rele(snap, FTAG); + } else if (fromguid != 0) { + /* Sanity check the incremental recv */ uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; /* Can't perform a raw receive on top of a non-raw receive */ @@ -452,7 +473,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, dsl_dataset_rele(snap, FTAG); } else { - /* if full, then must be forced */ + /* If full and not healing then must be forced */ if (!drba->drba_cookie->drc_force) return (SET_ERROR(EEXIST)); @@ -780,7 +801,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error == 0) { - /* create temporary clone */ + /* Create temporary clone */ dsl_dataset_t *snap = NULL; if (drba->drba_cookie->drc_fromsnapobj != 0) { @@ -788,8 +809,15 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) drba->drba_cookie->drc_fromsnapobj, FTAG, &snap)); ASSERT3P(dcp, ==, NULL); } - dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, - snap, crflags, drba->drba_cred, dcp, tx); + if (drc->drc_heal) { + /* When healing we want to use the provided snapshot */ + VERIFY0(dsl_dataset_snap_lookup(ds, drc->drc_tosnap, + &dsobj)); + } else { + dsobj = dsl_dataset_create_sync(ds->ds_dir, + recv_clone_name, snap, crflags, drba->drba_cred, + dcp, tx); + } if (drba->drba_cookie->drc_fromsnapobj != 0) dsl_dataset_rele(snap, FTAG); dsl_dataset_rele_flags(ds, dsflags, FTAG); @@ -907,7 +935,8 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) */ rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG); if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds)) && - (featureflags & DMU_BACKUP_FEATURE_RAW) == 0) { + (featureflags & DMU_BACKUP_FEATURE_RAW) == 0 && + !drc->drc_heal) { (void) dmu_objset_create_impl(dp->dp_spa, newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); } @@ -1102,7 +1131,7 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) */ int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, - boolean_t force, boolean_t resumable, nvlist_t *localprops, + boolean_t force, boolean_t heal, boolean_t resumable, nvlist_t *localprops, nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp) { @@ -1115,6 +1144,7 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, drc->drc_tosnap = tosnap; drc->drc_tofs = tofs; drc->drc_force = force; + drc->drc_heal = heal; drc->drc_resumable = resumable; drc->drc_cred = CRED(); drc->drc_clone = (origin != NULL); @@ -2077,7 +2107,8 @@ dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) rrw_exit(&ds->ds_bp_rwlock, FTAG); dsl_dataset_name(ds, name); dsl_dataset_disown(ds, dsflags, dmu_recv_tag); - (void) dsl_destroy_head(name); + if (!drc->drc_heal) + (void) dsl_destroy_head(name); } } @@ -2558,6 +2589,173 @@ receive_process_record(struct receive_writer_arg *rwa, return (err); } +static int +receive_heal_record(struct receive_writer_arg *rwa, + struct receive_record_arg *rrd) +{ + uint64_t size, lsize = 0, csize = 0, blkid = 0, obj = ZB_ROOT_OBJECT; + dmu_buf_t *dbp; + abd_t *abd; + void *buf; + int err = 0; + blkptr_t *bp = NULL; + objset_t *os = rwa->os; + dmu_replay_record_t drr = rrd->header; + enum zio_flag flags = ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL; + + /* Should only get here if we are doing a corrective recv */ + ASSERT(rwa->heal); + /* Processing in order, therefore bytes_read should be increasing. */ + ASSERT3U(rrd->bytes_read, >=, rwa->bytes_read); + + rwa->bytes_read = rrd->bytes_read; + + /* We can only heal write and spill records; other ones get ignored */ + if (drr.drr_type != DRR_WRITE && drr.drr_type != DRR_SPILL) + goto cleanup; + + switch (drr.drr_type) { + case DRR_WRITE: + { + uint64_t offset = drr.drr_u.drr_write.drr_offset; + obj = drr.drr_u.drr_write.drr_object; + lsize = drr.drr_u.drr_write.drr_logical_size; + csize = drr.drr_u.drr_write.drr_compressed_size; + + buf = kmem_alloc(lsize, KM_SLEEP); + /* Try to read the object to see if it needs healing */ + err = dmu_read(os, obj, offset, lsize, buf, + DMU_READ_NO_PREFETCH); + kmem_free(buf, lsize); + if (err != ECKSUM) + goto cleanup; /* no corruption found */ + + err = dmu_buf_hold_noread(os, obj, offset, FTAG, &dbp); + if (err != 0) { + err = SET_ERROR(EBUSY); + goto cleanup; + } + blkid = + dbuf_whichblock(DB_DNODE((dmu_buf_impl_t *)dbp), 0, offset); + /* Get the block pointer for the corrupted block */ + bp = dmu_buf_get_blkptr(dbp); + dmu_buf_rele(dbp, FTAG); + + break; + } + case DRR_SPILL: + { + dnode_t *dn; + dmu_buf_t *dbp_spill; + obj = drr.drr_u.drr_spill.drr_object; + lsize = drr.drr_u.drr_spill.drr_length; + csize = drr.drr_u.drr_spill.drr_compressed_size; + + err = dmu_bonus_hold(os, obj, FTAG, &dbp); + if (err != 0) + goto cleanup; + + /* Try to read the spill block to see if it needs healing */ + err = dmu_spill_hold_existing(dbp, FTAG, &dbp_spill); + if (err != ECKSUM) { + dmu_buf_rele(dbp, FTAG); + if (err == 0) + dmu_buf_rele(dbp_spill, FTAG); + goto cleanup; /* no corruption found */ + } + /* Get the block pointer to the corrupted spill block */ + dn = dmu_buf_dnode_enter(dbp); + bp = DN_SPILL_BLKPTR(dn->dn_phys); + dmu_buf_dnode_exit(dbp); + dmu_buf_rele(dbp, FTAG); + break; + } + default: + ASSERT0(1); + err = SET_ERROR(EINVAL); + goto cleanup; + } + + /* Get the good data from the recv record */ + abd = abd_get_from_buf(rrd->arc_buf->b_data, + arc_buf_size(rrd->arc_buf)); + abd_take_ownership_of_buf(abd, B_FALSE); + + if (arc_get_compression(rrd->arc_buf) != BP_GET_COMPRESS(bp)) { + /* + * The compression in the stream doesn't match what we had + * on disk; we need to re-compress the buf into the + * compression type that was written to disk previously. + */ + abd_t *decompressed_abd; + buf = kmem_alloc(lsize, KM_SLEEP); + /* Decompress the stream data */ + err = zio_decompress_data(arc_get_compression(rrd->arc_buf), + abd, buf, csize, lsize); + if (err != 0) { + kmem_free(buf, lsize); + abd_release_ownership_of_buf(abd); + abd_put(abd); + goto cleanup; + } + + decompressed_abd = abd_get_from_buf(buf, lsize); + abd_take_ownership_of_buf(decompressed_abd, B_FALSE); + /* Recompress the stream data */ + VERIFY3U(zio_compress_data(BP_GET_COMPRESS(bp), + decompressed_abd, buf, lsize), ==, csize); + size = csize; + + /* Swap in the newly compressed data into the abd */ + abd_release_ownership_of_buf(decompressed_abd); + abd_put(decompressed_abd); + abd_release_ownership_of_buf(abd); + abd_put(abd); + abd = abd_get_from_buf(buf, csize); + abd_take_ownership_of_buf(abd, B_FALSE); + + kmem_free(buf, lsize); + } else { + size = arc_buf_size(rrd->arc_buf); + } + + /* Correct the corruption in place */ + err = zio_wait(zio_rewrite(NULL, os->os_spa, 0, bp, abd, size, NULL, + NULL, ZIO_PRIORITY_SYNC_WRITE, flags, NULL)); + + if (err == 0) { + abd_t *zio_abd = abd_alloc_for_io(size, B_FALSE); + /* Test if healing worked by re-reading the bp */ + err = zio_wait(zio_read(NULL, os->os_spa, bp, zio_abd, size, + NULL, NULL, ZIO_PRIORITY_SYNC_READ, flags, NULL)); + abd_free(zio_abd); + if (err == 0) { + /* Corruption corrected; update error log if needed */ + zbookmark_phys_t zb; + SET_BOOKMARK(&zb, dmu_objset_id(os), obj, 0, blkid); + spa_remove_error(os->os_spa, &zb); + } else if (zfs_recv_best_effort_corrective != 0) { + err = 0; + } + } else if (zfs_recv_best_effort_corrective != 0) { + err = 0; + } + + abd_release_ownership_of_buf(abd); + abd_put(abd); + +cleanup: + if (rrd->arc_buf != NULL) + dmu_return_arcbuf(rrd->arc_buf); + else if (rrd->payload != NULL) + kmem_free(rrd->payload, rrd->payload_size); + rrd->arc_buf = NULL; + rrd->payload = NULL; + + return (err); +} + /* * dmu_recv_stream's worker thread; pull records off the queue, and then call * receive_process_record When we're done, signal the main thread and exit. @@ -2577,7 +2775,10 @@ receive_writer_thread(void *arg) * can exit. */ if (rwa->err == 0) { - rwa->err = receive_process_record(rwa, rrd); + if (rwa->heal) + rwa->err = receive_heal_record(rwa, rrd); + else + rwa->err = receive_process_record(rwa, rrd); } else if (rrd->arc_buf != NULL) { dmu_return_arcbuf(rrd->arc_buf); rrd->arc_buf = NULL; @@ -2746,6 +2947,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, int cleanup_fd, mutex_init(&rwa->mutex, NULL, MUTEX_DEFAULT, NULL); rwa->os = drc->drc_os; rwa->byteswap = drc->drc_byteswap; + rwa->heal = drc->drc_heal; rwa->resumable = drc->drc_resumable; rwa->raw = drc->drc_raw; rwa->spill = drc->drc_spill; @@ -2879,7 +3081,9 @@ dmu_recv_end_check(void *arg, dmu_tx_t *tx) ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag); - if (!drc->drc_newfs) { + if (drc->drc_heal) { + error = 0; + } else if (!drc->drc_newfs) { dsl_dataset_t *origin_head; error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head); @@ -2958,7 +3162,10 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) tx, "snap=%s", drc->drc_tosnap); drc->drc_ds->ds_objset->os_raw_receive = B_FALSE; - if (!drc->drc_newfs) { + if (drc->drc_heal) { + spa_remove_healed_errors(dp->dp_spa, tx); + spa_errlog_sync(dp->dp_spa, dmu_tx_get_txg(tx)); + } else if (!drc->drc_newfs) { dsl_dataset_t *origin_head; VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG, @@ -3207,4 +3414,7 @@ ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, queue_length, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, queue_ff, INT, ZMOD_RW, "Receive queue fill fraction"); + +ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, best_effort_corrective, INT, ZMOD_RW, + "Ignore errors during corrective receive"); /* END CSTYLED */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 8330ab1ce8d3..82545eb2d8ba 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -29,9 +29,9 @@ * Copyright 2016 Toomas Soome * Copyright (c) 2016 Actifio, Inc. All rights reserved. * Copyright 2018 Joyent, Inc. - * Copyright (c) 2017 Datto Inc. * Copyright 2017 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019 Datto Inc. */ /* @@ -1264,6 +1264,9 @@ spa_activate(spa_t *spa, int mode) avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); + avl_create(&spa->spa_errlist_healed, + spa_error_entry_compare, sizeof (spa_error_entry_t), + offsetof(spa_error_entry_t, se_avl)); spa_keystore_init(&spa->spa_keystore); @@ -1369,6 +1372,7 @@ spa_deactivate(spa_t *spa) spa_errlog_drain(spa); avl_destroy(&spa->spa_errlist_scrub); avl_destroy(&spa->spa_errlist_last); + avl_destroy(&spa->spa_errlist_healed); spa_keystore_fini(&spa->spa_keystore); diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index fa5120eb61b3..3e9d67b69d5e 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2019 Datto Inc. */ /* @@ -54,6 +55,7 @@ #include #include +const int NAME_MAX_LEN = 64; /* * Convert a bookmark to a string. @@ -128,6 +130,103 @@ spa_log_error(spa_t *spa, const zbookmark_phys_t *zb) mutex_exit(&spa->spa_errlist_lock); } +/* + * If this error exists in the given tree remove it. + */ +static void +remove_error_from_list(spa_t *spa, avl_tree_t *t, const zbookmark_phys_t *zb) +{ + spa_error_entry_t search, *found; + avl_index_t where; + + mutex_enter(&spa->spa_errlist_lock); + search.se_bookmark = *zb; + if ((found = avl_find(t, &search, &where)) != NULL) + avl_remove(t, found); + mutex_exit(&spa->spa_errlist_lock); +} + +/* + * If a healed bookmark matches an entry in the error log we stash it in a tree + * so that we can later remove the related log entries in sync context. + */ +static void +spa_add_healed_error(spa_t *spa, uint64_t obj, zbookmark_phys_t *healed_zb) +{ + char name[NAME_MAX_LEN]; + + if (obj == 0) + return; + + mutex_enter(&spa->spa_errlog_lock); + bookmark_to_name(healed_zb, name, sizeof (name)); + if (zap_contains(spa->spa_meta_objset, obj, name) == 0) { + /* + * Found an error matching healed zb, add zb to our + * tree of healed errors + */ + avl_tree_t *tree = &spa->spa_errlist_healed; + spa_error_entry_t search; + spa_error_entry_t *new; + avl_index_t where; + search.se_bookmark = *healed_zb; + mutex_enter(&spa->spa_errlist_lock); + if (avl_find(tree, &search, &where) != NULL) { + mutex_exit(&spa->spa_errlist_lock); + return; + } + new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP); + new->se_bookmark = *healed_zb; + avl_insert(tree, new, where); + mutex_exit(&spa->spa_errlist_lock); + } + mutex_exit(&spa->spa_errlog_lock); +} + +/* + * Removes all of the recv healed errors from both on-disk error logs + */ +void +spa_remove_healed_errors(spa_t *spa, dmu_tx_t *tx) +{ + char name[NAME_MAX_LEN]; + spa_error_entry_t *se; + void *cookie = NULL; + + mutex_enter(&spa->spa_errlist_lock); + if (avl_is_empty(&spa->spa_errlist_healed)) { + mutex_exit(&spa->spa_errlist_lock); + return; + } + while ((se = avl_destroy_nodes(&spa->spa_errlist_healed, + &cookie)) != NULL) { + bookmark_to_name(&se->se_bookmark, name, sizeof (name)); + kmem_free(se, sizeof (spa_error_entry_t)); + (void) zap_remove(spa->spa_meta_objset, + spa->spa_errlog_last, name, tx); + (void) zap_remove(spa->spa_meta_objset, + spa->spa_errlog_scrub, name, tx); + } + mutex_exit(&spa->spa_errlist_lock); +} + +/* + * Remove errors from the in-core error list and stash away bookmarks to remove + * them from the on-disk error log later in spa_remove_healed_errors(). + */ +void +spa_remove_error(spa_t *spa, zbookmark_phys_t *zb) +{ + char name[NAME_MAX_LEN]; + + bookmark_to_name(zb, name, sizeof (name)); + + remove_error_from_list(spa, &spa->spa_errlist_last, zb); + remove_error_from_list(spa, &spa->spa_errlist_scrub, zb); + spa_add_healed_error(spa, spa->spa_errlog_last, zb); + spa_add_healed_error(spa, spa->spa_errlog_scrub, zb); +} + /* * Return the number of errors currently in the error log. This is actually the * sum of both the last log and the current log, since we don't know the union @@ -301,7 +400,7 @@ static void sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx) { spa_error_entry_t *se; - char buf[64]; + char buf[NAME_MAX_LEN]; void *cookie; if (avl_numnodes(t) != 0) { diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index c5093fd447aa..a468f22cce55 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -4676,9 +4676,9 @@ static boolean_t zfs_ioc_recv_inject_err; static int zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops, nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force, - boolean_t resumable, int input_fd, dmu_replay_record_t *begin_record, - int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags, - uint64_t *action_handle, nvlist_t **errors) + boolean_t heal, boolean_t resumable, int input_fd, + dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes, + uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors) { dmu_recv_cookie_t drc; int error = 0; @@ -4701,7 +4701,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops, return (SET_ERROR(EBADF)); off = input_fp->f_offset; - error = dmu_recv_begin(tofs, tosnap, begin_record, force, + error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal, resumable, localprops, hidden_args, origin, &drc, input_fp->f_vnode, &off); if (error != 0) @@ -5050,7 +5050,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) begin_record.drr_u.drr_begin = zc->zc_begin_record; error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops, - NULL, zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record, + NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record, zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj, &zc->zc_action_handle, &errors); nvlist_free(recvdprops); @@ -5105,6 +5105,7 @@ static const zfs_ioc_key_t zfs_keys_recv_new[] = { {"begin_record", DATA_TYPE_BYTE_ARRAY, 0}, {"input_fd", DATA_TYPE_INT32, 0}, {"force", DATA_TYPE_BOOLEAN, ZK_OPTIONAL}, + {"heal", DATA_TYPE_BOOLEAN, ZK_OPTIONAL}, {"resumable", DATA_TYPE_BOOLEAN, ZK_OPTIONAL}, {"cleanup_fd", DATA_TYPE_INT32, ZK_OPTIONAL}, {"action_handle", DATA_TYPE_UINT64, ZK_OPTIONAL}, @@ -5125,6 +5126,7 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) char *tosnap; char tofs[ZFS_MAX_DATASET_NAME_LEN]; boolean_t force; + boolean_t heal; boolean_t resumable; uint64_t action_handle = 0; uint64_t read_bytes = 0; @@ -5156,6 +5158,7 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) input_fd = fnvlist_lookup_int32(innvl, "input_fd"); force = nvlist_exists(innvl, "force"); + heal = nvlist_exists(innvl, "heal"); resumable = nvlist_exists(innvl, "resumable"); error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd); @@ -5180,8 +5183,8 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) return (error); error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops, - hidden_args, force, resumable, input_fd, begin_record, cleanup_fd, - &read_bytes, &errflags, &action_handle, &errors); + hidden_args, force, heal, resumable, input_fd, begin_record, + cleanup_fd, &read_bytes, &errflags, &action_handle, &errors); fnvlist_add_uint64(outnvl, "read_bytes", read_bytes); fnvlist_add_uint64(outnvl, "error_flags", errflags); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 182e6137aae1..422b3f62d771 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -212,7 +212,7 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos', 'receive-o-x_props_override', 'zfs_receive_from_encrypted', 'zfs_receive_to_encrypted', 'zfs_receive_raw', - 'zfs_receive_raw_incremental', 'zfs_receive_-e'] + 'zfs_receive_raw_incremental', 'zfs_receive_-e', 'zfs_receive_corrective'] tags = ['functional', 'cli_root', 'zfs_receive'] [tests/functional/cli_root/zfs_rename] diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am index bf112a77e6a2..46b83f62686b 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am @@ -22,4 +22,5 @@ dist_pkgdata_SCRIPTS = \ zfs_receive_to_encrypted.ksh \ zfs_receive_raw.ksh \ zfs_receive_raw_incremental.ksh \ - zfs_receive_-e.ksh + zfs_receive_-e.ksh \ + zfs_receive_corrective.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh new file mode 100755 index 000000000000..806a52ea8e3e --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_corrective.ksh @@ -0,0 +1,133 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS be able to heal using corrective recv +# +# STRATEGY: +# 1. Create a dataset +# 2. Snapshot the dataset +# 3. Create a file and get its checksum +# 4. Snapshot the dataset +# 5. Recv dataset into a filesystem with different compression +# 5. Corrupt the file +# 6. Heal the corruption using a corrective send and full sendfile +# 7. Corrupt the file again +# 8. Heal the corruption using a corrective send an incremental sendfile +# 9. Corrupt the file again +# 10. Heal the corruption when the target snapshot and the sendfile have +# different compressions algorithms +# + +verify_runnable "both" + +backup=$TEST_BASE_DIR/backup +ibackup=$TEST_BASE_DIR/ibackup.$$ +garbage=$TEST_BASE_DIR/garbage.$$ +DISK=${DISKS%% *} + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + + for file in $garbage $ibackup $backup; do + [[ -f $file ]] && log_must rm -f $file + done +} + +function corrupt_offset +{ + log_must dd bs=512 count=1 if=garbage conv=notrunc \ + oflag=sync of=$DEV_RDSKDIR/$DISK seek=$((($1 / 512) + (0x400000 / 512))) +} + +function test_corrective_recv +{ + typeset objid="$(zdb -dd $1 | sed -n '/file/s/ \+/ /gp' | \ + cut -f2 -d' ')" + typeset start_offset="0x$(zdb -ddddd $1 $objid | \ + sed -n '/ 0 L0 0/s/ \+/ /gp' | cut -f2 -d':')" + typeset mid_offset="0x$(zdb -ddddd $1 $objid | \ + sed -n '/ 27e00 L0 0:/s/ \+/ /gp' | cut -f2 -d':')" + + corrupt_offset "$start_offset" + corrupt_offset "$mid_offset" + + log_must zpool scrub $TESTPOOL + log_must sleep 5 # let scrub finish + log_must eval "zpool status -v $TESTPOOL | \ + grep \"Permanent errors have been detected\"" + + # make sure we will read the corruption from disk by flushing the ARC + log_must zinject -a + + log_must eval "zfs recv -c $1 < $2" + + log_mustnot eval "zpool status -v $TESTPOOL | \ + grep \"Permanent errors have been detected\"" + typeset cksum=$(md5digest $file) + [[ "$cksum" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum1 != $checksum)" +} + +log_onexit cleanup + +log_assert "ZFS corrective receive should be able to heal corruption" + +# we will use this as the source of corruption +log_must dd if=/dev/urandom of=garbage bs=512 count=1 oflag=sync + +typeset snap1="$TESTPOOL/$TESTFS1@snap1" +typeset snap2="$TESTPOOL/$TESTFS1@snap2" +typeset file="/$TESTPOOL/$TESTFS1/$TESTFILE0" + +log_must zfs create -o primarycache=none -o recordsize=512 \ + -o compression=lz4 $TESTPOOL/$TESTFS1 + +log_must zfs snapshot $snap1 + +log_must dd if=/dev/urandom of=$file bs=1024 count=4096 oflag=sync +typeset checksum=$(md5digest $file) + +log_must zfs snapshot $snap2 + +log_must eval "zfs send $snap2 > $backup" +log_must eval "zfs send -i $snap1 $snap2 > $ibackup" +log_must eval "zfs recv -o compression=gzip -o primarycache=none \ + -o recordsize=512 $TESTPOOL/$TESTFS2 < $backup" + +typeset compr=$(get_prop compression $TESTPOOL/$TESTFS2) +[[ "$compr" == "gzip" ]] || \ + log_fail "Unexpected compression $compr in recved dataset" + +# test healing recv from a full sendfile +test_corrective_recv $snap2 $backup +# test healing recv from an incremental sendfile +test_corrective_recv $snap2 $ibackup +# test healing recv when compression doesn't match between sendfile and on-disk +test_corrective_recv "$TESTPOOL/$TESTFS2@snap2" $backup + +log_pass "ZFS corrective recv works for healing"