diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 35a59710c05e..df2e6429f03d 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -395,7 +395,7 @@ get_usage(zpool_help_t idx) return (gettext("\tinitialize [-c | -s] [-w] " "[ ...]\n")); case HELP_SCRUB: - return (gettext("\tscrub [-s | -p] [-w] ...\n")); + return (gettext("\tscrub [-s | -p] [-w] [-e] ...\n")); case HELP_RESILVER: return (gettext("\tresilver ...\n")); case HELP_TRIM: @@ -7145,8 +7145,9 @@ wait_callback(zpool_handle_t *zhp, void *data) } /* - * zpool scrub [-s | -p] [-w] ... + * zpool scrub [-s | -p] [-w] [-e] ... * + * -e Only scrub blocks in the error log. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. * -w Wait. Blocks until scrub has completed. @@ -7162,14 +7163,21 @@ zpool_do_scrub(int argc, char **argv) cb.cb_type = POOL_SCAN_SCRUB; cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + boolean_t is_error_scrub = B_FALSE; + boolean_t is_pause = B_FALSE; + boolean_t is_stop = B_FALSE; + /* check options */ - while ((c = getopt(argc, argv, "spw")) != -1) { + while ((c = getopt(argc, argv, "spwe")) != -1) { switch (c) { + case 'e': + is_error_scrub = B_TRUE; + break; case 's': - cb.cb_type = POOL_SCAN_NONE; + is_stop = B_TRUE; break; case 'p': - cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; + is_pause = B_TRUE; break; case 'w': wait = B_TRUE; @@ -7181,11 +7189,27 @@ zpool_do_scrub(int argc, char **argv) } } - if (cb.cb_type == POOL_SCAN_NONE && - cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) { - (void) fprintf(stderr, gettext("invalid option combination: " - "-s and -p are mutually exclusive\n")); + if (is_pause && is_stop) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-s and -p are mutually exclusive\n")); usage(B_FALSE); + } else { + if (is_error_scrub) { + cb.cb_type = POOL_ERRORSCRUB; + if (is_pause) { + cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; + } else if (is_stop) { + cb.cb_scrub_cmd = POOL_ERRORSCRUB_STOP; + } else { + cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + } + } else { + if (is_pause) { + cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; + } else if (is_stop) { + cb.cb_type = POOL_SCAN_NONE; + } + } } if (wait && (cb.cb_type == POOL_SCAN_NONE || @@ -7408,6 +7432,70 @@ secs_to_dhms(uint64_t total, char *buf) } } +/* + * Print out detailed error scrub status. + */ +static void +print_err_scrub_status(pool_scan_stat_t *ps) +{ + time_t start, end, pause; + uint64_t total_secs_left; + uint64_t secs_left, mins_left, hours_left, days_left; + uint64_t examined, to_be_examined; + + if (ps == NULL || ps->pss_error_scrub_func != POOL_ERRORSCRUB) { + return; + } + + (void) printf(gettext(" scrub: ")); + + start = ps->pss_error_scrub_start; + end = ps->pss_error_scrub_end; + pause = ps->pss_pass_error_scrub_pause; + examined = ps->pss_error_scrub_examined; + to_be_examined = ps->pss_error_scrub_to_be_examined; + + assert(ps->pss_error_scrub_func == POOL_ERRORSCRUB); + + if (ps->pss_error_scrub_state == DSS_FINISHED) { + total_secs_left = end - start; + days_left = total_secs_left / 60 / 60 / 24; + hours_left = (total_secs_left / 60 / 60) % 24; + mins_left = (total_secs_left / 60) % 60; + secs_left = (total_secs_left % 60); + + (void) printf(gettext("scrubbed %llu error blocks in %llu days " + "%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined, + (u_longlong_t)days_left, (u_longlong_t)hours_left, + (u_longlong_t)mins_left, (u_longlong_t)secs_left, + ctime(&end)); + + return; + } else if (ps->pss_error_scrub_state == DSS_CANCELED) { + (void) printf(gettext("error scrub canceled on %s"), + ctime(&end)); + return; + } + assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING); + + /* Error scrub is in progress. */ + if (pause == 0) { + (void) printf(gettext("error scrub in progress since %s"), + ctime(&start)); + } else { + (void) printf(gettext("error scrub paused since %s"), + ctime(&pause)); + (void) printf(gettext("\terror scrub started on %s"), + ctime(&start)); + } + + double fraction_done = (double)examined / (to_be_examined + examined); + (void) printf(gettext("\t%.2f%% done, issued I/O for %llu error" + " blocks"), 100 * fraction_done, (u_longlong_t)examined); + + (void) printf("\n"); +} + /* * Print out detailed scrub status. */ @@ -7733,6 +7821,7 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot) { uint64_t rebuild_end_time = 0, resilver_end_time = 0; boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE; + boolean_t have_errorscrub = B_FALSE; boolean_t active_resilver = B_FALSE; pool_checkpoint_stat_t *pcs = NULL; pool_scan_stat_t *ps = NULL; @@ -7747,6 +7836,7 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot) have_resilver = (ps->pss_func == POOL_SCAN_RESILVER); have_scrub = (ps->pss_func == POOL_SCAN_SCRUB); + have_errorscrub = (ps->pss_error_scrub_func == POOL_ERRORSCRUB); } boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time); @@ -7755,6 +7845,8 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot) /* Always print the scrub status when available. */ if (have_scrub) print_scan_scrub_resilver_status(ps); + if (have_errorscrub) + print_err_scrub_status(ps); /* * When there is an active resilver or rebuild print its status. @@ -8463,6 +8555,7 @@ status_callback(zpool_handle_t *zhp, void *data) (void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c); + print_removal_status(zhp, prs); (void) nvlist_lookup_uint64_array(nvroot, diff --git a/include/libzfs.h b/include/libzfs.h index 9ef280636d4c..ea1ab536b6ec 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -126,11 +126,15 @@ typedef enum zfs_error { EZFS_THREADCREATEFAILED, /* thread create failed */ EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */ EZFS_SCRUBBING, /* currently scrubbing */ + EZFS_ERRORSCRUBBING, /* currently error scrubbing */ + EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */ EZFS_NO_SCRUB, /* no active scrub */ + EZFS_NO_ERRORSCRUB, /* no active error scrub */ EZFS_DIFF, /* general failure of zfs diff */ EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_SCRUB_PAUSED, /* scrub currently paused */ + EZFS_SCRUB_PAUSED_TO_CANCEL, /* scrub currently paused */ EZFS_ACTIVE_POOL, /* pool is imported on a different system */ EZFS_CRYPTOFAILED, /* failed to setup encryption */ EZFS_NO_PENDING, /* cannot cancel, no operation is pending */ diff --git a/include/libzfs_core.h b/include/libzfs_core.h index 83d8211ab615..7071a99730bc 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -143,6 +143,9 @@ _LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *); _LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *); _LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **); + +_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **); + #ifdef __cplusplus } #endif diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 10e29a45c89f..60d31c5413ef 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -370,6 +370,7 @@ typedef struct dmu_buf { #define DMU_POOL_DDT_STATS "DDT-statistics" #define DMU_POOL_CREATION_VERSION "creation_version" #define DMU_POOL_SCAN "scan" +#define DMU_POOL_ERRORSCRUB "error_scrub" #define DMU_POOL_FREE_BPOBJ "free_bpobj" #define DMU_POOL_BPTREE_OBJ "bptree_obj" #define DMU_POOL_EMPTY_BPOBJ "empty_bpobj" diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index fb1f1d65bad4..c9a6dc2ce521 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -78,6 +79,21 @@ typedef enum dsl_scan_flags { #define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN) +typedef struct dsl_errorscrub_phys { + uint64_t dep_func; /* pool_scan_func_t */ + uint64_t dep_state; /* dsl_scan_state_t */ + uint64_t dep_cursor; /* serialized zap cursor for tracing progress */ + uint64_t dep_start_time; /* error scrub start time, unix timestamp */ + uint64_t dep_end_time; /* error scrub end time, unix timestamp */ + uint64_t dep_to_examine; /* total error blocks to be scrubbed */ + uint64_t dep_examined; /* blocks scrubbed so far */ + uint64_t dep_errors; /* error scrub I/O error count */ + uint64_t dep_paused_flags; /* flag for paused */ +} dsl_errorscrub_phys_t; + +#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \ + / sizeof (uint64_t)) + /* * Every pool will have one dsl_scan_t and this structure will contain * in-memory information about the scan and a pointer to the on-disk @@ -151,11 +167,16 @@ typedef struct dsl_scan { uint64_t scn_avg_zio_size_this_txg; uint64_t scn_zios_this_txg; + /* zap cursor for tracing error scrub progress */ + zap_cursor_t errorscrub_cursor; /* members needed for syncing scan status to disk */ dsl_scan_phys_t scn_phys; /* on disk representation of scan */ dsl_scan_phys_t scn_phys_cached; avl_tree_t scn_queue; /* queue of datasets to scan */ uint64_t scn_bytes_pending; /* outstanding data to issue */ + + /* members needed for syncing error scrub status to disk */ + dsl_errorscrub_phys_t errorscrub_phys; } dsl_scan_t; typedef struct dsl_scan_io_queue dsl_scan_io_queue_t; @@ -167,12 +188,16 @@ int dsl_scan_setup_check(void *, dmu_tx_t *); void dsl_scan_setup_sync(void *, dmu_tx_t *); void dsl_scan_fini(struct dsl_pool *dp); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); -int dsl_scan_cancel(struct dsl_pool *); +int dsl_scan_cancel(struct dsl_pool *, pool_scan_func_t func); int dsl_scan(struct dsl_pool *, pool_scan_func_t); void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd); boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp); -int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd); +boolean_t dsl_errorscrubbing(const struct dsl_pool *dp); +boolean_t dsl_errorscrub_active(dsl_scan_t *scn); void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg); +int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, + pool_scrub_cmd_t cmd, pool_scan_func_t func); +void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *); boolean_t dsl_scan_resilvering(struct dsl_pool *dp); boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); @@ -184,6 +209,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2, struct dmu_tx *tx); boolean_t dsl_scan_active(dsl_scan_t *scn); boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn); +boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn); void dsl_scan_freed(spa_t *spa, const blkptr_t *bp); void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue); void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 5d43750594cd..a63b1062402f 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -942,6 +942,7 @@ typedef enum pool_scan_func { POOL_SCAN_NONE, POOL_SCAN_SCRUB, POOL_SCAN_RESILVER, + POOL_ERRORSCRUB, POOL_SCAN_FUNCS } pool_scan_func_t; @@ -951,6 +952,7 @@ typedef enum pool_scan_func { typedef enum pool_scrub_cmd { POOL_SCRUB_NORMAL = 0, POOL_SCRUB_PAUSE, + POOL_ERRORSCRUB_STOP, POOL_SCRUB_FLAGS_END } pool_scrub_cmd_t; @@ -1005,6 +1007,20 @@ typedef struct pool_scan_stat { uint64_t pss_pass_scrub_spent_paused; uint64_t pss_pass_issued; /* issued bytes per scan pass */ uint64_t pss_issued; /* total bytes checked by scanner */ + + /* error scrub values stored on disk */ + uint64_t pss_error_scrub_func; /* pool_scan_func_t */ + uint64_t pss_error_scrub_state; /* dsl_scan_state_t */ + uint64_t pss_error_scrub_start; /* error scrub start time */ + uint64_t pss_error_scrub_end; /* error scrub end time */ + uint64_t pss_error_scrub_examined; /* error blocks issued I/O */ + /* error blocks to be issued I/O */ + uint64_t pss_error_scrub_to_be_examined; + + /* error scrub values not stored on disk */ + /* error scrub pause time in milliseconds */ + uint64_t pss_pass_error_scrub_pause; + } pool_scan_stat_t; typedef struct pool_removal_stat { @@ -1026,6 +1042,7 @@ typedef enum dsl_scan_state { DSS_SCANNING, DSS_FINISHED, DSS_CANCELED, + DSS_ERRORSCRUBBING, DSS_NUM_STATES } dsl_scan_state_t; @@ -1357,6 +1374,7 @@ typedef enum zfs_ioc { ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */ ZFS_IOC_WAIT, /* 0x5a53 */ ZFS_IOC_WAIT_FS, /* 0x5a54 */ + ZFS_IOC_POOL_SCRUB, /* 0x5a55 */ /* * Per-platform (Optional) - 8/128 numbers reserved. diff --git a/include/sys/spa.h b/include/sys/spa.h index 08eba250d3a3..21ab59fc0c6f 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -818,8 +818,9 @@ extern void spa_l2cache_drop(spa_t *spa); /* scanning */ extern int spa_scan(spa_t *spa, pool_scan_func_t func); -extern int spa_scan_stop(spa_t *spa); -extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag); +extern int spa_scan_stop(spa_t *spa, pool_scan_func_t func); +extern int spa_scrub_pause_resume(spa_t *spa, pool_scan_func_t func, + pool_scrub_cmd_t flag); /* spa syncing */ extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */ @@ -1140,6 +1141,7 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd); extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate); extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd); extern uint64_t spa_get_errlog_size(spa_t *spa); +extern uint64_t spa_get_last_errlog_size(spa_t *spa); extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count); extern void spa_errlog_rotate(spa_t *spa); extern void spa_errlog_drain(spa_t *spa); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 280f8cf16958..8e1b93932cf8 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -291,6 +291,10 @@ struct spa { uint64_t spa_scan_pass_exam; /* examined bytes per pass */ uint64_t spa_scan_pass_issued; /* issued bytes per pass */ + /* error scrub pause time in milliseconds */ + uint64_t spa_scan_pass_errorscrub_pause; + /* total error scrub paused time in milliseconds */ + uint64_t spa_scan_pass_errorscrub_spent_paused; /* * We are in the middle of a resilver, and another resilver * is needed once this one completes. This is set iff any diff --git a/include/sys/sysevent/eventdefs.h b/include/sys/sysevent/eventdefs.h index 2067b355afb4..7bff66e1ce9a 100644 --- a/include/sys/sysevent/eventdefs.h +++ b/include/sys/sysevent/eventdefs.h @@ -123,6 +123,11 @@ extern "C" { #define ESC_ZFS_TRIM_CANCEL "trim_cancel" #define ESC_ZFS_TRIM_RESUME "trim_resume" #define ESC_ZFS_TRIM_SUSPEND "trim_suspend" +#define ESC_ZFS_ERRORSCRUB_START "error_scrub_start" +#define ESC_ZFS_ERRORSCRUB_FINISH "error_scrub_finish" +#define ESC_ZFS_ERRORSCRUB_ABORT "error_scrub_abort" +#define ESC_ZFS_ERRORSCRUB_RESUME "error_scrub_resume" +#define ESC_ZFS_ERRORSCRUB_PAUSED "error_scrub_paused" /* * datalink subclass definitions. diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 7338b9d72cad..ea716a65eadc 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -2480,49 +2480,85 @@ zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds, int zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) { - zfs_cmd_t zc = {"\0"}; char msg[1024]; int err; libzfs_handle_t *hdl = zhp->zpool_hdl; - (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - zc.zc_cookie = func; - zc.zc_flags = cmd; - - if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0) - return (0); + nvlist_t *args = fnvlist_alloc(); + fnvlist_add_uint64(args, "scan_type", (uint64_t)func); + fnvlist_add_uint64(args, "scan_command", (uint64_t)cmd); - err = errno; + err = lzc_scrub(ZFS_IOC_POOL_SCRUB, zhp->zpool_name, args, NULL); + fnvlist_free(args); - /* ECANCELED on a scrub means we resumed a paused scrub */ - if (err == ECANCELED && func == POOL_SCAN_SCRUB && - cmd == POOL_SCRUB_NORMAL) + if (err == 0) return (0); - if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL) + /* + * An ECANCELED on a scrub means one of the following: + * 1. we resumed a paused scrub. + * 2. we resumed a paused error scrub. + * 3. Error scrub is not run because of no error log. + */ + if (err == ECANCELED && (func == POOL_SCAN_SCRUB || + func == POOL_ERRORSCRUB) && cmd == POOL_SCRUB_NORMAL) return (0); + /* + * The following cases have been handled here: + * 1. Paused a scrub/error scrub if there is no in progross. + */ + if (err == ENOENT && func != POOL_SCAN_NONE && (cmd == + POOL_SCRUB_NORMAL || cmd != POOL_ERRORSCRUB_STOP)) { + return (0); + } + + ASSERT3U(func, >=, POOL_SCAN_NONE); + ASSERT3U(func, <, POOL_SCAN_FUNCS); if (func == POOL_SCAN_SCRUB) { if (cmd == POOL_SCRUB_PAUSE) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot pause scrubbing %s"), zc.zc_name); + "cannot pause scrubbing %s"), zhp->zpool_name); } else { assert(cmd == POOL_SCRUB_NORMAL); (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot scrub %s"), zc.zc_name); + "cannot scrub %s"), zhp->zpool_name); + } + } else if (func == POOL_ERRORSCRUB) { + if (cmd == POOL_SCRUB_PAUSE) { + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot pause error scrubbing %s"), + zhp->zpool_name); + } else if (cmd == POOL_SCRUB_NORMAL) { + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot error scrub %s"), zhp->zpool_name); + } else { + assert(cmd == POOL_ERRORSCRUB_STOP); + (void) snprintf(msg, sizeof (msg), + dgettext(TEXT_DOMAIN, "cannot cancel error " + "scrubbing %s"), zhp->zpool_name); } } else if (func == POOL_SCAN_RESILVER) { - assert(cmd == POOL_SCRUB_NORMAL); + ASSERT3U(cmd, ==, POOL_SCRUB_NORMAL); (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, - "cannot restart resilver on %s"), zc.zc_name); + "cannot restart resilver on %s"), zhp->zpool_name); } else if (func == POOL_SCAN_NONE) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"), - zc.zc_name); - } else { - assert(!"unexpected result"); + zhp->zpool_name); } + /* + * With EBUSY, five cases are possible: + * + * Currently State Requested + * 1. Normal Scrub Running Normal Scrub or Error Scrub + * 2. Normal Scrub Paused Error Scrub + * 3. Normal Scrub Paused Pause Normal Scrub + * 4. Error Scrub Running Normal Scrub or Error Scrub + * 5. Error Scrub Paused Pause Error Scrub + * 6. Resilvering Anything else + */ if (err == EBUSY) { nvlist_t *nvroot; pool_scan_stat_t *ps = NULL; @@ -2534,15 +2570,46 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc); if (ps && ps->pss_func == POOL_SCAN_SCRUB && ps->pss_state == DSS_SCANNING) { - if (cmd == POOL_SCRUB_PAUSE) - return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg)); - else + if (ps->pss_pass_scrub_pause == 0) { + /* handles case 1 */ + assert(cmd == POOL_SCRUB_NORMAL); return (zfs_error(hdl, EZFS_SCRUBBING, msg)); + } else { + if (func == POOL_ERRORSCRUB) { + /* handles case 2 */ + ASSERT3U(cmd, ==, POOL_SCRUB_NORMAL); + return (zfs_error(hdl, + EZFS_SCRUB_PAUSED_TO_CANCEL, msg)); + } else { + /* handles case 3 */ + ASSERT3U(func, ==, POOL_SCAN_SCRUB); + ASSERT3U(cmd, ==, POOL_SCRUB_PAUSE); + return (zfs_error(hdl, + EZFS_SCRUB_PAUSED, msg)); + } + } + } else if (ps && ps->pss_error_scrub_func == POOL_ERRORSCRUB && + ps->pss_error_scrub_state == DSS_ERRORSCRUBBING) { + if (ps->pss_pass_error_scrub_pause == 0) { + /* handles case 4 */ + ASSERT3U(cmd, ==, POOL_SCRUB_NORMAL); + return (zfs_error(hdl, EZFS_ERRORSCRUBBING, + msg)); + } else { + /* handles case 5 */ + ASSERT3U(func, ==, POOL_ERRORSCRUB); + ASSERT3U(cmd, ==, POOL_SCRUB_PAUSE); + return (zfs_error(hdl, EZFS_ERRORSCRUB_PAUSED, + msg)); + } } else { + /* handles case 6 */ return (zfs_error(hdl, EZFS_RESILVERING, msg)); } - } else if (err == ENOENT) { + } else if (err == ENOENT && func == POOL_SCAN_NONE) { return (zfs_error(hdl, EZFS_NO_SCRUB, msg)); + } else if (err == ENOENT && func == POOL_ERRORSCRUB) { + return (zfs_error(hdl, EZFS_NO_ERRORSCRUB, msg)); } else if (err == ENOTSUP && func == POOL_SCAN_RESILVER) { return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, msg)); } else { diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 68e97e4830d8..d45d79d9eb0c 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -241,12 +241,25 @@ libzfs_error_description(libzfs_handle_t *hdl) "into a new one")); case EZFS_SCRUB_PAUSED: return (dgettext(TEXT_DOMAIN, "scrub is paused; " - "use 'zpool scrub' to resume")); + "use 'zpool scrub' to resume scrub")); + case EZFS_SCRUB_PAUSED_TO_CANCEL: + return (dgettext(TEXT_DOMAIN, "scrub is paused; " + "use 'zpool scrub' to resume or 'zpool scrub -s' to " + "cancel scrub")); case EZFS_SCRUBBING: return (dgettext(TEXT_DOMAIN, "currently scrubbing; " - "use 'zpool scrub -s' to cancel current scrub")); + "use 'zpool scrub -s' to cancel scrub")); + case EZFS_ERRORSCRUBBING: + return (dgettext(TEXT_DOMAIN, "currently error scrubbing; " + "use 'zpool scrub -e -s' to cancel error scrub")); + case EZFS_ERRORSCRUB_PAUSED: + return (dgettext(TEXT_DOMAIN, "error scrub is paused; " + "use 'zpool scrub -e' to resume error scrub")); case EZFS_NO_SCRUB: return (dgettext(TEXT_DOMAIN, "there is no active scrub")); + case EZFS_NO_ERRORSCRUB: + return (dgettext(TEXT_DOMAIN, "there is no active error " + "scrub")); case EZFS_DIFF: return (dgettext(TEXT_DOMAIN, "unable to generate diffs")); case EZFS_DIFFDATA: diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index c54a994f79bc..eea6bc4436ae 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -8,6 +8,7 @@ + @@ -218,6 +219,7 @@ + @@ -285,775 +287,236 @@ - + - - - - - - - - - - - - - - - + + + + - + - + - - - - + - + - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - + + + + + + + + + + - - - - - - - - + + + - - - - - - + + + + + - - - - - + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - + + + + - - - - - - - - - - - - + + + + - - - - + + + + + + + + + + + + + + + - - - + + + + + + + + + + + + + - - - + + + - - - - - - - - - - - - - + + + + + + + + + + + - - - - - - - - + + - - - - - + + + + + + - - - - - - + + + + + + + - - - - - - - + + - - - - - - - - - - + + - - - - - + + + + + + + - - - - - - - + + + - - - - - - - - - - - - - - - - - - + + + - - - - - + + + + - - - + + + + - - - - - + + + - - - - - - - + + + + + - + - + - + - + - - + + @@ -1067,115 +530,157 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - - + + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + @@ -1252,1742 +757,1744 @@ - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - - + + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + - + - + - + - + - + - + - + - - + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + + + - + - - - - + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + - - - - + + + + + + + - - - - + + + + + + + + - - - + + + + + + + + + + + + + - - - + + + + + + + + + + - - - - + + + + + + + + - - - + + + + + + + - - - + + + + + + - - - - - - - + + + + + - - - - - + + + + - - + + + - - + + + + - - - - - - - + + + + - - - - - + + + + + + + + - - + + + + - - - - - - + + + + - - - + + - - - - - - - - - - - - - + + + - - - - - - - - - - - - - - - + + + + - - - - + + + + - - - - - - - - - - - + + + + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + - - - - - + - - - - - - - - - - - + + - - - - - - + + - - - - + + - - - - - + + - - - - - + + - - - - - - + + - - - + + - - - - - - + + - - - - + + - - - - - + + - - - - - + + - - - - - + + - - - - + + - - - - - - - - + + - - - - - + + - - + + - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + - + - + - - - + + + + - + - + - - - - - - - + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + - + + + + + + + + + + - + + + + - - - - + - - - + + - - - - - - - - - - + + + + + + + - - - - - - + + - - - + + - - - - + + - - - + + - - - - - + + - - - + + - - - - + + + + + - - - - - + + + - - - - - + + + - - - - + + + - - - + + + - - + + + - - - - - - - + + + - - - + + + - - - + + - - - - - - - - + + - - - + + - - - - - - + + - - - - - + + - - - + + - - - + + - - - + + - - - - - + + + + + - - - - + + + - - - - - - + + + + + + - - - - - + + + + - - - - - + + + + + + - - - - - + + - - - - - + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - - - - - - + + + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - + - + - - - - + - - + + - - - - - - - - - - - - - - - - - - - - + + - - - - + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + + + + + + + - + - + - + - - + + - - + + - - - - - - - + + - - + + - - + + + + + + + + + + + - + - - + + - - + + - - - - + + - + + + + + + + + + + + + - + - + - - - - + - - + + - - + + - - + + + + - - + + - - - - - - - - - - - + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + - - - + + + - - - - + + + - - + + + - - - - - - - - - + + + + + + - - - - - - - - - + + + + - - - + + + + - - - + + + + - - - - - - + + + + - - - - - - + + + + - - - - - + + + + - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - + + + + + + - - - - - - + + + + + + - - - + + + + + - - - + + + - - - + + + - - - - + + + + + + + + + + - - + + - + - + - + - + - + - + - + - + - + - + - + @@ -3005,1006 +2512,811 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - - - - - - - - - - - + - + - + - + - + - + - + - + - + - + - - + + - + - + - - + + - - + + - - + + + - + - + - - - - + + + + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - - + + + + - - + + - - - - - + + + - - - - - - - + + + - - - + + - - - - + + + - - - - + + + - - - - + + + - - - - + + + - - - + + + + - - - - - - - - - - + + + + + + + - - - - - - + + - - - - + + - - - - + + - - - - + + - - - - + + - - - - - + + - - - + + - - - - - + + - - - + + - - - - - - - - - - - - - - - - - + + - - - - - - - + + - - - + + - - - + + - - - + + - - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - - + + - - + + - - - - - - - + + - - - - + + - - - - + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + + - - - - - - - - + + - - + + - - + + - - - - - - - - - - - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - + + + - - - - - - - - + + + + + - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - - - + + + + - - - - - - - - - - + + + + - - - - - - - - - - + + + + - - - - - - - - - - - - - - - - + + + + - - - - - - - - + + + - - - - - - - - + + + - - - - - - - - + + + - - - + + + - - - - + + - - - - + + + + - - - - + + - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -4013,510 +3325,443 @@ - + - + - + - + + + + - + - + - + - + - + - + - + - - - - + + + - - + + - - - - + + + + + + + - - + + - - - - - - - - - + + + - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + - - + + - + - + - + - + - + - + - - - - - - - - - - - - + + + + + + - - - - - + + + + + - - - - + + + - - - - - + + + + - - - - + + + + - - - - + + + + - - - + + + + - - - + + + - - + + - - - + + + - - - - + + + - - - - + + + + - - - - + + + + - - - - - - + + + + - - - + + + + + - - - + + + + + - - - + + + + + + + + + - - + + - - + + - - - - - - - - - - - - - - - - - - + + + + + + + + - + - + - + - - - - + + + + - + - - - - + + + + - + - - + + - - - - + + - - - + + - + + + + + + + + + + + + + + + + - + - + - + - + - - - - + - - - - + - + - + - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + - - - - - + + + + + + + + - + - - + + + - + - + + - - - - - + + + + + - - - - - + + - - - - - - - - - - - - - - + - - - + + - - - + + diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index ce33b2153062..d9a73255f246 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -244,6 +244,20 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, return (error); } +/* + * This is interface for scan/scrub to use new ioctl signature. + */ +int +lzc_scrub(zfs_ioc_t ioc, const char *name, + nvlist_t *source, nvlist_t **resultp) +{ + int error; + + error = lzc_ioctl(ioc, name, source, resultp); + + return (error); +} + int lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props, uint8_t *wkeydata, uint_t wkeylen) diff --git a/man/man8/zpool-scrub.8 b/man/man8/zpool-scrub.8 index 10375b6393ac..8e05c50ee88e 100644 --- a/man/man8/zpool-scrub.8 +++ b/man/man8/zpool-scrub.8 @@ -38,6 +38,7 @@ .Cm scrub .Op Fl s Ns | Ns Fl p .Op Fl w +.Op Fl e .Ar pool Ns … . .Sh DESCRIPTION @@ -78,8 +79,14 @@ During this period, no completion time estimate will be provided. .Bl -tag -width "-s" .It Fl s Stop scrubbing. +Along with +.Fl e +stops any error scrub. .It Fl p Pause scrubbing. +Along with +.Fl e +pauses any error scrub. Scrub pause state and progress are periodically synced to disk. If the system is restarted or pool is exported during a paused scrub, even after import, scrub will remain paused until it is resumed. @@ -87,9 +94,22 @@ Once resumed the scrub will pick up from the place where it was last checkpointed to disk. To resume a paused scrub issue .Nm zpool Cm scrub +or +.Nm zpool Cm scrub +.Fl e again. .It Fl w Wait until scrub has completed before returning. +.It Fl e +Only scrub blocks in the error log. +Error scrubs can also be paused and cancelled with +.Fl p +and +.Fl s +respectively. +Error scrubbing is I/O-intensive and cannot run simultaneously with regular +scrubbing or resilvering. +It also cannot run if a regular scrub is in the paused state. .El . .Sh SEE ALSO diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 62ee9bb9ab6c..3bfb8e81c9d5 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -52,6 +52,7 @@ #include #include #include +#include #ifdef _KERNEL #include #endif @@ -127,6 +128,7 @@ static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); static uint64_t dsl_scan_count_data_disks(vdev_t *vd); +static void read_by_block_level(dsl_scan_t *scn, zbookmark_phys_t zb); extern int zfs_vdev_async_write_active_min_dirty_percent; @@ -203,6 +205,10 @@ int zfs_resilver_disable_defer = 0; /* set to disable resilver deferring */ */ int zfs_free_bpobj_enabled = 1; +/* Error blocks to be scrubbed in one txg. */ +unsigned long zfs_scrub_error_blocks_in_one_txg = 1 << 12; +int zfs_error_scrub_min_time_ms = 1000; /* min millisecs to error scrub txg */ + /* the order has to match pool_scan_type */ static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = { NULL, @@ -479,9 +485,14 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) "scrub_queue", sizeof (uint64_t), 1, &scn->scn_phys.scn_queue_obj); } else { + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ERRORSCRUB, sizeof (uint64_t), + ERRORSCRUB_PHYS_NUMINTS, &scn->errorscrub_phys); + err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys); + /* * Detect if the pool contains the signature of #2094. If it * does properly update the scn->scn_phys structure and notify @@ -626,6 +637,22 @@ dsl_scan_scrubbing(const dsl_pool_t *dp) scn_phys->scn_func == POOL_SCAN_SCRUB); } +boolean_t +dsl_errorscrubbing(const dsl_pool_t *dp) +{ + dsl_errorscrub_phys_t *errorscrub_phys = &dp->dp_scan->errorscrub_phys; + + return (errorscrub_phys->dep_state == DSS_ERRORSCRUBBING && + errorscrub_phys->dep_func == POOL_ERRORSCRUB); +} + +boolean_t +dsl_errorscrub_is_paused(const dsl_scan_t *scn) +{ + return (dsl_errorscrubbing(scn->scn_dp) && + scn->errorscrub_phys.dep_paused_flags); +} + boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn) { @@ -633,6 +660,68 @@ dsl_scan_is_paused_scrub(const dsl_scan_t *scn) scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED); } +static void +dsl_errorscrub_sync_state(dsl_scan_t *scn, dmu_tx_t *tx) +{ + scn->errorscrub_phys.dep_cursor = + zap_cursor_serialize(&scn->errorscrub_cursor); + + VERIFY0(zap_update(scn->scn_dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_ERRORSCRUB, sizeof (uint64_t), ERRORSCRUB_PHYS_NUMINTS, + &scn->errorscrub_phys, tx)); +} + +static void +dsl_errorscrub_setup_sync(void *arg, dmu_tx_t *tx) +{ + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; + pool_scan_func_t *funcp = arg; + dsl_pool_t *dp = scn->scn_dp; + spa_t *spa = dp->dp_spa; + + ASSERT(!dsl_scan_is_running(scn)); + ASSERT(!dsl_errorscrubbing(scn->scn_dp)); + ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS); + + bzero(&scn->errorscrub_phys, sizeof (scn->errorscrub_phys)); + scn->errorscrub_phys.dep_func = *funcp; + scn->errorscrub_phys.dep_state = DSS_ERRORSCRUBBING; + scn->errorscrub_phys.dep_start_time = gethrestime_sec(); + scn->errorscrub_phys.dep_to_examine = spa_get_last_errlog_size(spa); + scn->errorscrub_phys.dep_examined = 0; + scn->errorscrub_phys.dep_errors = 0; + scn->errorscrub_phys.dep_cursor = 0; + zap_cursor_init_serialized(&scn->errorscrub_cursor, + spa->spa_meta_objset, spa->spa_errlog_last, + scn->errorscrub_phys.dep_cursor); + + vdev_config_dirty(spa->spa_root_vdev); + spa_event_notify(spa, NULL, NULL, ESC_ZFS_ERRORSCRUB_START); + + dsl_errorscrub_sync_state(scn, tx); + + spa_history_log_internal(spa, "error scrub setup", tx, + "func=%u mintxg=%u maxtxg=%llu", + *funcp, 0, (u_longlong_t)tx->tx_txg); +} + +static int +dsl_errorscrub_setup_check(void *arg, dmu_tx_t *tx) +{ + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; + + if (dsl_scan_is_running(scn) || (dsl_errorscrubbing(scn->scn_dp))) { + return (SET_ERROR(EBUSY)); + } + + if (spa_get_last_errlog_size(scn->scn_dp->dp_spa) == 0) { + return (ECANCELED); + } + return (0); +} + + /* * Writes out a persistent dsl_scan_phys_t record to the pool directory. * Because we can be running in the block sorting algorithm, we do not always @@ -707,7 +796,8 @@ dsl_scan_setup_check(void *arg, dmu_tx_t *tx) dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev; - if (dsl_scan_is_running(scn) || vdev_rebuild_active(rvd)) + if (dsl_scan_is_running(scn) || vdev_rebuild_active(rvd) || + dsl_errorscrubbing(scn->scn_dp)) return (SET_ERROR(EBUSY)); return (0); @@ -725,6 +815,14 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) ASSERT(!dsl_scan_is_running(scn)); ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS); bzero(&scn->scn_phys, sizeof (scn->scn_phys)); + + /* + * If we are starting a fresh scrub. We erase error scrub information + * from disk. + */ + bzero(&scn->errorscrub_phys, sizeof (scn->errorscrub_phys)); + dsl_errorscrub_sync_state(scn, tx); + scn->scn_phys.scn_func = *funcp; scn->scn_phys.scn_state = DSS_SCANNING; scn->scn_phys.scn_min_txg = 0; @@ -811,8 +909,9 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) } /* - * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver. - * Can also be called to resume a paused scrub. + * Called by ZFS_IOC_POOL_SCRUB and ZFS_IOC_POOL_SCAN ioctl to start a scrub, + * error scrub or resilver. Can also be called to resume a paused scrub or + * error scrub. */ int dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) @@ -838,15 +937,34 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) return (0); } + if (func == POOL_ERRORSCRUB) { + if (dsl_errorscrub_is_paused(dp->dp_scan)) { + /* + * got error scrub start cmd, resume paused error scrub. + */ + int err = dsl_scrub_set_pause_resume(scn->scn_dp, + POOL_SCRUB_NORMAL, func); + if (err == 0) { + spa_event_notify(spa, NULL, NULL, + ESC_ZFS_ERRORSCRUB_RESUME); + return (ECANCELED); + } + return (SET_ERROR(err)); + } + + return (dsl_sync_task(spa_name(dp->dp_spa), + dsl_errorscrub_setup_check, dsl_errorscrub_setup_sync, + &func, 0, ZFS_SPACE_CHECK_RESERVED)); + } + if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) { /* got scrub start cmd, resume paused scrub */ int err = dsl_scrub_set_pause_resume(scn->scn_dp, - POOL_SCRUB_NORMAL); + POOL_SCRUB_NORMAL, func); if (err == 0) { spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_RESUME); return (SET_ERROR(ECANCELED)); } - return (SET_ERROR(err)); } @@ -854,6 +972,33 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } +static void +dsl_errorscrub_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) +{ + dsl_pool_t *dp = scn->scn_dp; + spa_t *spa = dp->dp_spa; + + if (complete) { + spa_event_notify(spa, NULL, NULL, ESC_ZFS_ERRORSCRUB_FINISH); + spa_history_log_internal(spa, "error scrub done", tx, + "errors=%llu", (u_longlong_t)spa_get_errlog_size(spa)); + } else { + spa_history_log_internal(spa, "error scrub cancelled", tx, + "errors=%llu", (u_longlong_t)spa_get_errlog_size(spa)); + } + + scn->errorscrub_phys.dep_state = complete ? DSS_FINISHED : DSS_CANCELED; + spa->spa_scrub_active = B_FALSE; + spa_errlog_rotate(spa); + scn->errorscrub_phys.dep_end_time = gethrestime_sec(); + zap_cursor_fini(&scn->errorscrub_cursor); + + if (spa->spa_errata == ZPOOL_ERRATA_ZOL_2094_SCRUB) + spa->spa_errata = 0; + + ASSERT(!dsl_errorscrubbing(scn->scn_dp)); +} + /* ARGSUSED */ static void dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) @@ -1001,6 +1146,92 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) ASSERT(!dsl_scan_is_running(scn)); } + + +static int +dsl_errorscrub_pause_resume_check(void *arg, dmu_tx_t *tx) +{ + pool_scrub_cmd_t *cmd = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_scan_t *scn = dp->dp_scan; + + if (*cmd == POOL_SCRUB_PAUSE) { + /* + * can't pause a error scrub when there is no in-progress + * error scrub. + */ + if (!dsl_errorscrubbing(dp)) + return (SET_ERROR(ENOENT)); + + /* can't pause a paused error scrub */ + if (dsl_errorscrub_is_paused(scn)) + return (SET_ERROR(EBUSY)); + } else if (*cmd != POOL_SCRUB_NORMAL) { + return (SET_ERROR(ENOTSUP)); + } + + return (0); +} + +static void +dsl_errorscrub_pause_resume_sync(void *arg, dmu_tx_t *tx) +{ + pool_scrub_cmd_t *cmd = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_t *spa = dp->dp_spa; + dsl_scan_t *scn = dp->dp_scan; + + if (*cmd == POOL_SCRUB_PAUSE) { + spa->spa_scan_pass_errorscrub_pause = gethrestime_sec(); + scn->errorscrub_phys.dep_paused_flags = B_TRUE; + dsl_errorscrub_sync_state(scn, tx); + spa_event_notify(spa, NULL, NULL, ESC_ZFS_ERRORSCRUB_PAUSED); + } else { + ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL); + if (dsl_errorscrub_is_paused(scn)) { + /* + * We need to keep track of how much time we spend + * paused per pass so that we can adjust the error scrub + * rate shown in the output of 'zpool status'. + */ + spa->spa_scan_pass_errorscrub_spent_paused += + gethrestime_sec() - + spa->spa_scan_pass_errorscrub_pause; + + spa->spa_scan_pass_errorscrub_pause = 0; + scn->errorscrub_phys.dep_paused_flags = B_FALSE; + + zap_cursor_init_serialized( + &scn->errorscrub_cursor, + spa->spa_meta_objset, spa->spa_errlog_last, + scn->errorscrub_phys.dep_cursor); + + dsl_errorscrub_sync_state(scn, tx); + } + } +} + +static int +dsl_errorscrub_cancel_check(void *arg, dmu_tx_t *tx) +{ + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; + /* can't cancel a error scrub when there is no one in-progress */ + if (!dsl_errorscrubbing(scn->scn_dp)) + return (SET_ERROR(ENOENT)); + return (0); +} + +static void +dsl_errorscrub_cancel_sync(void *arg, dmu_tx_t *tx) +{ + dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; + + dsl_errorscrub_done(scn, B_FALSE, tx); + dsl_errorscrub_sync_state(scn, tx); + spa_event_notify(scn->scn_dp->dp_spa, NULL, NULL, + ESC_ZFS_ERRORSCRUB_ABORT); +} + /* ARGSUSED */ static int dsl_scan_cancel_check(void *arg, dmu_tx_t *tx) @@ -1024,8 +1255,13 @@ dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx) } int -dsl_scan_cancel(dsl_pool_t *dp) +dsl_scan_cancel(dsl_pool_t *dp, pool_scan_func_t func) { + if (func == POOL_ERRORSCRUB) { + return (dsl_sync_task(spa_name(dp->dp_spa), + dsl_errorscrub_cancel_check, dsl_errorscrub_cancel_sync, + NULL, 3, ZFS_SPACE_CHECK_RESERVED)); + } return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check, dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED)); } @@ -1090,8 +1326,15 @@ dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx) * Set scrub pause/resume state if it makes sense to do so */ int -dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd) +dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd, + pool_scan_func_t func) { + if (func == POOL_ERRORSCRUB) { + return (dsl_sync_task(spa_name(dp->dp_spa), + dsl_errorscrub_pause_resume_check, + dsl_errorscrub_pause_resume_sync, &cmd, 3, + ZFS_SPACE_CHECK_RESERVED)); + } return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3, ZFS_SPACE_CHECK_RESERVED)); @@ -1372,6 +1615,42 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) return (B_FALSE); } +static boolean_t +dsl_error_scrub_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) +{ + /* + * We suspend if: + * - we have scrubbed for at least the minimum time (default 1 sec + * for error scrub), someone is explicitly waiting for this txg + * to complete, or we have used up all of the time in the txg + * timeout (default 5 sec). + * or + * - the spa is shutting down because this pool is being exported + * or the machine is rebooting. + */ + uint64_t curr_time_ns = gethrtime(); + uint64_t error_scrub_time_ns = curr_time_ns - scn->scn_sync_start_time; + uint64_t sync_time_ns = curr_time_ns - + scn->scn_dp->dp_spa->spa_sync_starttime; + int mintime = zfs_error_scrub_min_time_ms; + + if ((NSEC2MSEC(error_scrub_time_ns) > mintime && + (txg_sync_waiting(scn->scn_dp) || + NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) || + spa_shutting_down(scn->scn_dp->dp_spa)) { + if (zb) { + dprintf("error scrub suspending at bookmark " + "%llx/%llx/%llx/%llx\n", + (longlong_t)zb->zb_objset, + (longlong_t)zb->zb_object, + (longlong_t)zb->zb_level, + (longlong_t)zb->zb_blkid); + } + return (B_TRUE); + } + return (B_FALSE); +} + typedef struct zil_scan_arg { dsl_pool_t *zsa_dp; zil_header_t *zsa_zh; @@ -2802,6 +3081,7 @@ scan_io_queue_check_suspend(dsl_scan_t *scn) uint64_t scan_time_ns = curr_time_ns - scn->scn_sync_start_time; uint64_t sync_time_ns = curr_time_ns - scn->scn_dp->dp_spa->spa_sync_starttime; + int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max; int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ? zfs_resilver_min_time_ms : zfs_scrub_min_time_ms; @@ -3272,6 +3552,20 @@ dsl_scan_active(dsl_scan_t *scn) return ((used != 0) || (clones_left)); } + +boolean_t +dsl_errorscrub_active(dsl_scan_t *scn) +{ + spa_t *spa = scn->scn_dp->dp_spa; + if (spa->spa_load_state != SPA_LOAD_NONE) + return (B_FALSE); + if (spa_shutting_down(spa)) + return (B_FALSE); + if (dsl_errorscrubbing(scn->scn_dp)) + return (B_TRUE); + return (B_FALSE); +} + static boolean_t dsl_scan_check_deferred(vdev_t *vd) { @@ -3487,6 +3781,153 @@ dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx) return (0); } +static void +name_to_bookmark(char *buf, zbookmark_phys_t *zb) +{ + zb->zb_objset = zfs_strtonum(buf, &buf); + ASSERT(*buf == ':'); + zb->zb_object = zfs_strtonum(buf + 1, &buf); + ASSERT(*buf == ':'); + zb->zb_level = (int)zfs_strtonum(buf + 1, &buf); + ASSERT(*buf == ':'); + zb->zb_blkid = zfs_strtonum(buf + 1, &buf); + ASSERT(*buf == '\0'); +} + +static void +read_by_block_level(dsl_scan_t *scn, zbookmark_phys_t zb) +{ + int err; + dsl_pool_t *dp = scn->scn_dp; + dsl_dataset_t *ds; + objset_t *os; + err = dsl_dataset_hold_obj(dp, zb.zb_objset, FTAG, &ds); + if (err != 0) { + return; + } + + if (dmu_objset_from_ds(ds, &os) != 0) { + dsl_dataset_rele(ds, FTAG); + return; + } + + dnode_t *dn; + blkptr_t bp; + + if (dnode_hold(os, zb.zb_object, FTAG, &dn) != 0) { + dsl_dataset_rele(ds, FTAG); + return; + } + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + err = dbuf_dnode_findbp(dn, zb.zb_level, zb.zb_blkid, &bp, NULL, NULL); + + if (err != 0 || BP_IS_HOLE(&bp)) { + dnode_rele(dn, FTAG); + dsl_dataset_rele(ds, FTAG); + return; + } + + int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL; + zio_flags |= ZIO_FLAG_SCRUB; + + /* If it's an intent log block, failure is expected. */ + if (zb.zb_level == ZB_ZIL_LEVEL) { + zio_flags |= ZIO_FLAG_SPECULATIVE; + } + ASSERT(!BP_IS_EMBEDDED(&bp)); + scan_exec_io(dp, &bp, zio_flags, &zb, NULL); + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + dsl_dataset_rele(ds, FTAG); +} + +void +dsl_errorscrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) +{ + + spa_t *spa = dp->dp_spa; + dsl_scan_t *scn = dp->dp_scan; + + /* + * Only process scans in sync pass 1. + */ + + if (spa_sync_pass(spa) > 1) + return; + + /* + * If the spa is shutting down, then stop scanning. This will + * ensure that the scan does not dirty any new data during the + * shutdown phase. + */ + if (spa_shutting_down(spa)) + return; + + if (!dsl_errorscrub_active(scn) || dsl_errorscrub_is_paused(scn)) { + return; + } + + if (dsl_scan_resilvering(scn->scn_dp)) { + /* cancel the error scrub if resilver started */ + dsl_scan_cancel(scn->scn_dp, POOL_ERRORSCRUB); + return; + } + + spa->spa_scrub_active = B_TRUE; + scn->scn_sync_start_time = gethrtime(); + + /* + * zfs_scan_suspend_progress can be set to disable scrub progress. + * See more detailed comment in dsl_scan_sync(). + */ + if (zfs_scan_suspend_progress) { + uint64_t scan_time_ns = gethrtime() - scn->scn_sync_start_time; + int mintime = zfs_error_scrub_min_time_ms; + + while (zfs_scan_suspend_progress && + !txg_sync_waiting(scn->scn_dp) && + !spa_shutting_down(scn->scn_dp->dp_spa) && + NSEC2MSEC(scan_time_ns) < mintime) { + delay(hz); + scan_time_ns = gethrtime() - scn->scn_sync_start_time; + } + return; + } + + int i = 0; + zap_attribute_t za; + zbookmark_phys_t zb; + boolean_t limit_exceeded = B_FALSE; + for (; zap_cursor_retrieve(&scn->errorscrub_cursor, &za) == 0; + zap_cursor_advance(&scn->errorscrub_cursor)) { + name_to_bookmark(za.za_name, &zb); + + scn->scn_zio_root = zio_root(dp->dp_spa, NULL, + NULL, ZIO_FLAG_CANFAIL); + dsl_pool_config_enter(dp, FTAG); + read_by_block_level(scn, zb); + dsl_pool_config_exit(dp, FTAG); + + (void) zio_wait(scn->scn_zio_root); + scn->scn_zio_root = NULL; + + scn->errorscrub_phys.dep_examined += 1; + scn->errorscrub_phys.dep_to_examine -= 1; + i++; + if (i == zfs_scrub_error_blocks_in_one_txg || + dsl_error_scrub_check_suspend(scn, &zb)) { + limit_exceeded = B_TRUE; + break; + } + } + + if (!limit_exceeded) { + dsl_errorscrub_done(scn, B_TRUE, tx); + } + dsl_errorscrub_sync_state(scn, tx); +} + /* * This is the primary entry point for scans that is called from syncing * context. Scans must happen entirely during syncing context so that we @@ -3495,6 +3936,7 @@ dsl_process_async_destroys(dsl_pool_t *dp, dmu_tx_t *tx) * transaction groups proceed, instead of the normal handling provided by * txg_sync_thread(). */ + void dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) { @@ -3545,6 +3987,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (!scn->scn_async_stalled && !dsl_scan_active(scn)) return; + /* reset scan statistics */ scn->scn_visited_this_txg = 0; scn->scn_dedup_frees_this_txg = 0; @@ -4020,7 +4463,14 @@ dsl_scan_scrub_done(zio_t *zio) if (zio->io_error && (zio->io_error != ECKSUM || !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) { - atomic_inc_64(&spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors); + if (dsl_errorscrubbing(spa->spa_dsl_pool) && + !dsl_errorscrub_is_paused(spa->spa_dsl_pool->dp_scan)) { + atomic_inc_64(&spa->spa_dsl_pool->dp_scan + ->errorscrub_phys.dep_errors); + } else { + atomic_inc_64(&spa->spa_dsl_pool->dp_scan->scn_phys + .scn_errors); + } } } @@ -4413,4 +4863,10 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, "Process all resilvers immediately"); + +ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_in_one_txg, ULONG, ZMOD_RW, + "Error blocks to be scrubbed in one txg"); + +ZFS_MODULE_PARAM(zfs, zfs_, error_scrub_min_time_ms, INT, ZMOD_RW, + "Min millisecs to scrub error blocks per txg"); /* END CSTYLED */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index f6dce076d136..3acc173bb8c1 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -7988,23 +7988,24 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) * ========================================================================== */ int -spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd) +spa_scrub_pause_resume(spa_t *spa, pool_scan_func_t func, pool_scrub_cmd_t cmd) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (dsl_scan_resilvering(spa->spa_dsl_pool)) return (SET_ERROR(EBUSY)); - return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd)); + return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd, func)); } int -spa_scan_stop(spa_t *spa) +spa_scan_stop(spa_t *spa, pool_scan_func_t func) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (dsl_scan_resilvering(spa->spa_dsl_pool)) return (SET_ERROR(EBUSY)); - return (dsl_scan_cancel(spa->spa_dsl_pool)); + + return (dsl_scan_cancel(spa->spa_dsl_pool, func)); } int @@ -9066,6 +9067,7 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx) ddt_sync(spa, txg); dsl_scan_sync(dp, tx); + dsl_errorscrub_sync(dp, tx); svr_sync(spa, tx); spa_sync_upgrades(spa, tx); diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index fa5120eb61b3..66ea538309d3 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -128,6 +128,21 @@ spa_log_error(spa_t *spa, const zbookmark_phys_t *zb) mutex_exit(&spa->spa_errlist_lock); } +/* Return the number of errors in the error log */ +uint64_t +spa_get_last_errlog_size(spa_t *spa) +{ + uint64_t total = 0, count; + mutex_enter(&spa->spa_errlog_lock); + + if (spa->spa_errlog_last != 0 && + zap_count(spa->spa_meta_objset, spa->spa_errlog_last, + &count) == 0) + total += count; + mutex_exit(&spa->spa_errlog_lock); + return (total); +} + /* * Return the number of errors currently in the error log. This is actually the * sum of both the last log and the current log, since we don't know the union @@ -306,17 +321,17 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx) if (avl_numnodes(t) != 0) { /* create log if necessary */ - if (*obj == 0) + if (*obj == 0) { *obj = zap_create(spa->spa_meta_objset, DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx); + } /* add errors to the current log */ for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) { char *name = se->se_name ? se->se_name : ""; bookmark_to_name(&se->se_bookmark, buf, sizeof (buf)); - (void) zap_update(spa->spa_meta_objset, *obj, buf, 1, strlen(name) + 1, name, tx); } @@ -407,6 +422,7 @@ spa_errlog_sync(spa_t *spa, uint64_t txg) #if defined(_KERNEL) /* error handling */ EXPORT_SYMBOL(spa_log_error); +EXPORT_SYMBOL(spa_get_last_errlog_size); EXPORT_SYMBOL(spa_get_errlog_size); EXPORT_SYMBOL(spa_get_errlog); EXPORT_SYMBOL(spa_errlog_rotate); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 29a5381e4b48..8dc4c8a06bdc 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2555,9 +2555,18 @@ spa_scan_stat_init(spa_t *spa) spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start; else spa->spa_scan_pass_scrub_pause = 0; + + if (dsl_errorscrub_is_paused(spa->spa_dsl_pool->dp_scan)) + spa->spa_scan_pass_errorscrub_pause = spa->spa_scan_pass_start; + else + spa->spa_scan_pass_errorscrub_pause = 0; + spa->spa_scan_pass_scrub_spent_paused = 0; spa->spa_scan_pass_exam = 0; spa->spa_scan_pass_issued = 0; + + // error scrub stats + spa->spa_scan_pass_errorscrub_spent_paused = 0; vdev_scan_stat_init(spa->spa_root_vdev); } @@ -2569,8 +2578,11 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) { dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL; - if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE) + if (scn == NULL || (scn->scn_phys.scn_func == POOL_SCAN_NONE && + scn->errorscrub_phys.dep_func == POOL_SCAN_NONE)) { return (SET_ERROR(ENOENT)); + } + bzero(ps, sizeof (pool_scan_stat_t)); /* data stored on disk */ @@ -2593,6 +2605,18 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) ps->pss_issued = scn->scn_issued_before_pass + spa->spa_scan_pass_issued; + /* error scrub data stored on disk */ + ps->pss_error_scrub_func = scn->errorscrub_phys.dep_func; + ps->pss_error_scrub_state = scn->errorscrub_phys.dep_state; + ps->pss_error_scrub_start = scn->errorscrub_phys.dep_start_time; + ps->pss_error_scrub_end = scn->errorscrub_phys.dep_end_time; + ps->pss_error_scrub_examined = scn->errorscrub_phys.dep_examined; + ps->pss_error_scrub_to_be_examined = + scn->errorscrub_phys.dep_to_examine; + + /* error scrub data not stored on disk */ + ps->pss_pass_error_scrub_pause = spa->spa_scan_pass_errorscrub_pause; + return (0); } diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c index 4d7de0c6c44c..fabf0fb7e9bd 100644 --- a/module/zfs/vdev_rebuild.c +++ b/module/zfs/vdev_rebuild.c @@ -750,7 +750,7 @@ vdev_rebuild_thread(void *arg) */ dsl_pool_t *dsl = spa_get_dsl(spa); if (dsl_scan_scrubbing(dsl)) - dsl_scan_cancel(dsl); + dsl_scan_cancel(dsl, POOL_SCAN_SCRUB); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); mutex_enter(&vd->vdev_rebuild_lock); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 96a021acbc95..29af9ffc6d59 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1684,9 +1684,10 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc) return (error); if (zc->zc_flags == POOL_SCRUB_PAUSE) - error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE); + error = spa_scrub_pause_resume(spa, zc->zc_cookie, + POOL_SCRUB_PAUSE); else if (zc->zc_cookie == POOL_SCAN_NONE) - error = spa_scan_stop(spa); + error = spa_scan_stop(spa, zc->zc_cookie); else error = spa_scan(spa, zc->zc_cookie); @@ -1695,6 +1696,47 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * poolname name of the pool + * scan_type scan func (pool_scan_func_t) + * scan_command scrub pause/resume flag (pool_scrub_cmd_t) + */ +static const zfs_ioc_key_t zfs_keys_pool_scrub[] = { + {"scan_type", DATA_TYPE_UINT64, 0}, + {"scan_command", DATA_TYPE_UINT64, 0}, +}; + +static int +zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) +{ + spa_t *spa; + int error; + pool_scan_func_t scan_type = + (pool_scan_func_t)fnvlist_lookup_uint64(innvl, "scan_type"); + pool_scrub_cmd_t scan_cmd = + (pool_scrub_cmd_t)fnvlist_lookup_uint64(innvl, "scan_command"); + + if (scan_cmd >= POOL_SCRUB_FLAGS_END) + return (SET_ERROR(EINVAL)); + + if ((error = spa_open(poolname, &spa, FTAG)) != 0) + return (error); + + if (scan_cmd == POOL_SCRUB_PAUSE) { + error = spa_scrub_pause_resume(spa, scan_type, + POOL_SCRUB_PAUSE); + } else if (scan_cmd == POOL_ERRORSCRUB_STOP) + error = spa_scan_stop(spa, scan_type); + else if (scan_type == POOL_SCAN_NONE) + error = spa_scan_stop(spa, scan_type); + else + error = spa_scan(spa, scan_type); + + spa_close(spa, FTAG); + return (error); +} + static int zfs_ioc_pool_freeze(zfs_cmd_t *zc) { @@ -7107,6 +7149,11 @@ zfs_ioctl_init(void) POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE, zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv)); + zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB, + zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_NONE, B_TRUE, B_TRUE, + zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 996e5f615cd4..2d4ce0581a25 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -462,7 +462,9 @@ tags = ['functional', 'cli_root', 'zpool_resilver'] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_encrypted_unloaded', 'zpool_scrub_print_repairing', - 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies'] + 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies', + 'zpool_error_scrub_001_pos', 'zpool_error_scrub_002_pos', + 'zpool_error_scrub_003_pos'] tags = ['functional', 'cli_root', 'zpool_scrub'] [tests/functional/cli_root/zpool_set] diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c index b671af7d8f42..5b1ab4438a51 100644 --- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c +++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c @@ -684,6 +684,16 @@ test_vdev_trim(const char *pool) nvlist_free(required); } +static void +test_scrub(const char *pool) +{ + nvlist_t *required = fnvlist_alloc(); + fnvlist_add_uint64(required, "scan_type", 1ULL << 30); + fnvlist_add_uint64(required, "scan_command", 1ULL << 31); + IOC_INPUT_TEST(ZFS_IOC_POOL_SCRUB, pool, required, NULL, EINVAL); + nvlist_free(required); +} + static int zfs_destroy(const char *dataset) { @@ -864,6 +874,8 @@ zfs_ioc_input_tests(const char *pool) test_set_bootenv(pool); test_get_bootenv(pool); + test_scrub(pool); + /* * cleanup */ @@ -1018,6 +1030,7 @@ validate_ioc_values(void) CHECK(ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS); CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT); CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS); + CHECK(ZFS_IOC_BASE + 85 == ZFS_IOC_POOL_SCRUB); CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT); CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR); CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK); diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index 5a360bd5e705..f1094ae0dfd2 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -2350,6 +2350,12 @@ function is_pool_scrubbing #pool return $? } +function is_pool_error_scrubbing #pool +{ + check_pool_status "$1" "scrub" "error scrub in progress since " $2 + return $? +} + function is_pool_scrubbed #pool { check_pool_status "$1" "scan" "scrub repaired" $2 @@ -2362,12 +2368,24 @@ function is_pool_scrub_stopped #pool return $? } +function is_pool_error_scrub_stopped #pool +{ + check_pool_status "$1" "scrub" "error scrub canceled on " $2 + return $? +} + function is_pool_scrub_paused #pool { check_pool_status "$1" "scan" "scrub paused since " $2 return $? } +function is_pool_error_scrub_paused #pool +{ + check_pool_status "$1" "scrub" "error scrub paused since " $2 + return $? +} + function is_pool_removing #pool { check_pool_status "$1" "remove" "in progress since " diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am index e2dfd9d64c40..80b88bd939b0 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am @@ -10,7 +10,10 @@ dist_pkgdata_SCRIPTS = \ zpool_scrub_encrypted_unloaded.ksh \ zpool_scrub_offline_device.ksh \ zpool_scrub_print_repairing.ksh \ - zpool_scrub_multiple_copies.ksh + zpool_scrub_multiple_copies.ksh \ + zpool_error_scrub_001_pos.ksh \ + zpool_error_scrub_002_pos.ksh \ + zpool_error_scrub_003_pos.ksh dist_pkgdata_DATA = \ zpool_scrub.cfg diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh new file mode 100755 index 000000000000..6034cb693365 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2019 by Delphix. All rights reserved. +# Use is subject to license terms. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg + +# +# DESCRIPTION: +# Verify scrub -e, scrub -e -p, and scrub -e -s show the right status. +# +# STRATEGY: +# 1. Create a pool and create a 10MB file in it. +# 2. Start a error scrub (-e) and verify it's doing a scrub. +# 3. Pause error scrub (-e, -p) and verify it's paused. +# 4. Try to pause a paused error scrub (-e, -p) and make sure that fails. +# 5. Resume the paused error scrub and verify again it's doing a scrub. +# 6. Verify zpool scrub -s -e succeed when the system is error scrubbing. +# + +verify_runnable "global" + +function cleanup +{ + log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0 + log_must zinject -c all + rm -f /$TESTPOOL/10m_file +} + +log_onexit cleanup + +log_assert "Verify scrub -e, -e -p, and -e -s show the right status." + +log_must mkfile 10m /$TESTPOOL/10m_file + +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must zinject -t data -e checksum -f 100 -am /$TESTPOOL/10m_file + +# create some error blocks +dd if=/$TESTPOOL/10m_file bs=1M count=1 || true + +# sync error blocks to disk +log_must sync_pool $TESTPOOL + +log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1 +log_must zpool scrub -e $TESTPOOL +log_must is_pool_error_scrubbing $TESTPOOL true +log_must zpool scrub -e -p $TESTPOOL +log_must is_pool_error_scrub_paused $TESTPOOL true +log_mustnot zpool scrub -e -p $TESTPOOL +log_must is_pool_error_scrub_paused $TESTPOOL true +log_must zpool scrub -e $TESTPOOL +log_must is_pool_error_scrubbing $TESTPOOL true +log_must zpool scrub -e -s $TESTPOOL +log_must is_pool_error_scrub_stopped $TESTPOOL true + +log_pass "Verified scrub -e, -s -e, and -p -e show expected status." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh new file mode 100755 index 000000000000..797500b64285 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2019 by Delphix. All rights reserved. +# Use is subject to license terms. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg + +# +# DESCRIPTION: +# Verify regular scrub and error scrub can't run at the same time. +# +# STRATEGY: +# 1. Create a pool and create a 10MB file in it. +# 2. Start a scrub and verify it's doing a scrub. +# 3. Start a error scrub (-e) and verify it fails. +# 4. Pause scrub (-p) and verify it's paused. +# 5. Start a error scrub (-e) verify it fails again. +# 5. Resume the paused scrub, verify it and cancel it. +# 6. Start a error scrub (-e) and verify it's doing error scrub. +# 7. Start a scrub and verify it fails. +# 8. Cancel error scrub (-e) and verify it is canceled. +# 9. Start scrub, verify it, cancel it and verify it. +# + +verify_runnable "global" + +function cleanup +{ + log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0 + log_must zinject -c all + rm -f $TESTDIR/10m_file +} + +log_onexit cleanup + +log_assert "Verify regular scrub and error scrub can't run at the same time." + +log_must mkfile 10m $TESTDIR/10m_file + +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must zinject -t data -e checksum -f 100 -am $TESTDIR/10m_file + +# create some error blocks before error scrub is requested. +dd if=$TESTDIR/10m_file bs=1M count=1 || true +# sync error blocks to disk +log_must sync_pool $TESTPOOL + +log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1 + +log_must zpool scrub $TESTPOOL +log_must is_pool_scrubbing $TESTPOOL true +log_mustnot zpool scrub -e $TESTPOOL +log_must zpool scrub -p $TESTPOOL +log_must is_pool_scrub_paused $TESTPOOL true +log_mustnot zpool scrub -e $TESTPOOL +log_must zpool scrub $TESTPOOL +log_must is_pool_scrubbing $TESTPOOL true +log_must zpool scrub -s $TESTPOOL +log_must is_pool_scrub_stopped $TESTPOOL true + +# create some error blocks before error scrub is requested. +dd if=$TESTDIR/10m_file bs=1M count=1 || true +# sync error blocks to disk +log_must sync_pool $TESTPOOL + +log_must zpool scrub -e $TESTPOOL +log_must is_pool_error_scrubbing $TESTPOOL true +log_mustnot zpool scrub $TESTPOOL +log_must zpool scrub -e -s $TESTPOOL +log_must is_pool_error_scrub_stopped $TESTPOOL true + +log_must zpool scrub $TESTPOOL +log_must is_pool_scrubbing $TESTPOOL true +log_must zpool scrub -s $TESTPOOL +log_must is_pool_scrub_stopped $TESTPOOL true + +log_pass "Verified regular scrub and error scrub can't run at the same time." diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh new file mode 100755 index 000000000000..a5ecff18e9bb --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh @@ -0,0 +1,70 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2019 by Delphix. All rights reserved. +# Use is subject to license terms. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg + +# +# DESCRIPTION: +# Verify error scrub clears the errorlog, if errors no longer exists. +# +# STRATEGY: +# 1. Create a pool and create a 10MB file in it. +# 2. Zinject errors and read using dd to log errors to disk. +# 3. Make sure file name is mentioned in the list of error files. +# 4. Start error scrub and wait for it finish. +# 5. Make sure file name is not mentioned in the list of error files. +# + +verify_runnable "global" + +function cleanup +{ + log_must zinject -c all + rm -f $TESTDIR/10m_file +} + +log_onexit cleanup + +log_assert "Verify error scrub clears the errorlog, if errors no longer exists." + +log_must mkfile 10m $TESTDIR/10m_file + +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must zinject -t data -e checksum -f 100 -am $TESTDIR/10m_file + +# create some error blocks +dd if=$TESTDIR/10m_file bs=1M count=1 || true + +# sync error blocks to disk +log_must sync_pool $TESTPOOL + +log_must eval "zpool status -v $TESTPOOL | grep '10m_file'" +log_must zinject -c all +log_must zpool scrub -e $TESTPOOL + +log_mustnot eval "zpool status -v $TESTPOOL | grep '10m_file'" + +log_pass "Verify error scrub clears the errorlog, if errors no longer exists."