Skip to content

Commit

Permalink
Teach zpool scrub to scrub only blocks in error log
Browse files Browse the repository at this point in the history
Added a flag -e in zpool scrub to scrub only blocks in error log. A
user can pause, resume and cancel the error scrub by passing additional
command line arguments -p -s just like a regular scrub. This involves
adding a new flag, creating new libzfs interfaces, a new ioctl, and the
actual iteration and read-issuing logic. Error scrubbing is executed in
multiple txg to make sure pool performance is not affected.

Original-patch-by: TulsiJain <tulsi.jain@delphix.com>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
  • Loading branch information
gamanakis committed Jul 15, 2021
1 parent d9f0f15 commit cadfaca
Show file tree
Hide file tree
Showing 27 changed files with 3,575 additions and 3,233 deletions.
111 changes: 102 additions & 9 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ get_usage(zpool_help_t idx)
return (gettext("\tinitialize [-c | -s] [-w] <pool> "
"[<device> ...]\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
return (gettext("\tscrub [-s | -p] [-w] [-e] <pool> ...\n"));
case HELP_RESILVER:
return (gettext("\tresilver <pool> ...\n"));
case HELP_TRIM:
Expand Down Expand Up @@ -7145,8 +7145,9 @@ wait_callback(zpool_handle_t *zhp, void *data)
}

/*
* zpool scrub [-s | -p] [-w] <pool> ...
* zpool scrub [-s | -p] [-w] [-e] <pool> ...
*
* -e Only scrub blocks in the error log.
* -s Stop. Stops any in-progress scrub.
* -p Pause. Pause in-progress scrub.
* -w Wait. Blocks until scrub has completed.
Expand All @@ -7162,14 +7163,21 @@ zpool_do_scrub(int argc, char **argv)
cb.cb_type = POOL_SCAN_SCRUB;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;

boolean_t is_error_scrub = B_FALSE;
boolean_t is_pause = B_FALSE;
boolean_t is_stop = B_FALSE;

/* check options */
while ((c = getopt(argc, argv, "spw")) != -1) {
while ((c = getopt(argc, argv, "spwe")) != -1) {
switch (c) {
case 'e':
is_error_scrub = B_TRUE;
break;
case 's':
cb.cb_type = POOL_SCAN_NONE;
is_stop = B_TRUE;
break;
case 'p':
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
is_pause = B_TRUE;
break;
case 'w':
wait = B_TRUE;
Expand All @@ -7181,11 +7189,27 @@ zpool_do_scrub(int argc, char **argv)
}
}

if (cb.cb_type == POOL_SCAN_NONE &&
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
(void) fprintf(stderr, gettext("invalid option combination: "
"-s and -p are mutually exclusive\n"));
if (is_pause && is_stop) {
(void) fprintf(stderr, gettext("invalid option "
"combination :-s and -p are mutually exclusive\n"));
usage(B_FALSE);
} else {
if (is_error_scrub) {
cb.cb_type = POOL_ERRORSCRUB;
if (is_pause) {
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
} else if (is_stop) {
cb.cb_scrub_cmd = POOL_ERRORSCRUB_STOP;
} else {
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
}
} else {
if (is_pause) {
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
} else if (is_stop) {
cb.cb_type = POOL_SCAN_NONE;
}
}
}

if (wait && (cb.cb_type == POOL_SCAN_NONE ||
Expand Down Expand Up @@ -7408,6 +7432,70 @@ secs_to_dhms(uint64_t total, char *buf)
}
}

/*
* Print out detailed error scrub status.
*/
static void
print_err_scrub_status(pool_scan_stat_t *ps)
{
time_t start, end, pause;
uint64_t total_secs_left;
uint64_t secs_left, mins_left, hours_left, days_left;
uint64_t examined, to_be_examined;

if (ps == NULL || ps->pss_error_scrub_func != POOL_ERRORSCRUB) {
return;
}

(void) printf(gettext(" scrub: "));

start = ps->pss_error_scrub_start;
end = ps->pss_error_scrub_end;
pause = ps->pss_pass_error_scrub_pause;
examined = ps->pss_error_scrub_examined;
to_be_examined = ps->pss_error_scrub_to_be_examined;

assert(ps->pss_error_scrub_func == POOL_ERRORSCRUB);

if (ps->pss_error_scrub_state == DSS_FINISHED) {
total_secs_left = end - start;
days_left = total_secs_left / 60 / 60 / 24;
hours_left = (total_secs_left / 60 / 60) % 24;
mins_left = (total_secs_left / 60) % 60;
secs_left = (total_secs_left % 60);

(void) printf(gettext("scrubbed %llu error blocks in %llu days "
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
(u_longlong_t)days_left, (u_longlong_t)hours_left,
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
ctime(&end));

return;
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
(void) printf(gettext("error scrub canceled on %s"),
ctime(&end));
return;
}
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBBING);

/* Error scrub is in progress. */
if (pause == 0) {
(void) printf(gettext("error scrub in progress since %s"),
ctime(&start));
} else {
(void) printf(gettext("error scrub paused since %s"),
ctime(&pause));
(void) printf(gettext("\terror scrub started on %s"),
ctime(&start));
}

double fraction_done = (double)examined / (to_be_examined + examined);
(void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
" blocks"), 100 * fraction_done, (u_longlong_t)examined);

(void) printf("\n");
}

/*
* Print out detailed scrub status.
*/
Expand Down Expand Up @@ -7733,6 +7821,7 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
{
uint64_t rebuild_end_time = 0, resilver_end_time = 0;
boolean_t have_resilver = B_FALSE, have_scrub = B_FALSE;
boolean_t have_errorscrub = B_FALSE;
boolean_t active_resilver = B_FALSE;
pool_checkpoint_stat_t *pcs = NULL;
pool_scan_stat_t *ps = NULL;
Expand All @@ -7747,6 +7836,7 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)

have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
have_errorscrub = (ps->pss_error_scrub_func == POOL_ERRORSCRUB);
}

boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
Expand All @@ -7755,6 +7845,8 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
/* Always print the scrub status when available. */
if (have_scrub)
print_scan_scrub_resilver_status(ps);
if (have_errorscrub)
print_err_scrub_status(ps);

/*
* When there is an active resilver or rebuild print its status.
Expand Down Expand Up @@ -8463,6 +8555,7 @@ status_callback(zpool_handle_t *zhp, void *data)

(void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);

print_removal_status(zhp, prs);

(void) nvlist_lookup_uint64_array(nvroot,
Expand Down
4 changes: 4 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,15 @@ typedef enum zfs_error {
EZFS_THREADCREATEFAILED, /* thread create failed */
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
EZFS_SCRUBBING, /* currently scrubbing */
EZFS_ERRORSCRUBBING, /* currently error scrubbing */
EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */
EZFS_NO_SCRUB, /* no active scrub */
EZFS_NO_ERRORSCRUB, /* no active error scrub */
EZFS_DIFF, /* general failure of zfs diff */
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_SCRUB_PAUSED_TO_CANCEL, /* scrub currently paused */
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
EZFS_CRYPTOFAILED, /* failed to setup encryption */
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ _LIBZFS_CORE_H int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);

_LIBZFS_CORE_H int lzc_set_bootenv(const char *, const nvlist_t *);
_LIBZFS_CORE_H int lzc_get_bootenv(const char *, nvlist_t **);

_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);

#ifdef __cplusplus
}
#endif
Expand Down
1 change: 1 addition & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ typedef struct dmu_buf {
#define DMU_POOL_DDT_STATS "DDT-statistics"
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_ERRORSCRUB "error_scrub"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"
Expand Down
30 changes: 28 additions & 2 deletions include/sys/dsl_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/zap.h>
#include <sys/ddt.h>
#include <sys/bplist.h>

Expand Down Expand Up @@ -78,6 +79,21 @@ typedef enum dsl_scan_flags {

#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)

typedef struct dsl_errorscrub_phys {
uint64_t dep_func; /* pool_scan_func_t */
uint64_t dep_state; /* dsl_scan_state_t */
uint64_t dep_cursor; /* serialized zap cursor for tracing progress */
uint64_t dep_start_time; /* error scrub start time, unix timestamp */
uint64_t dep_end_time; /* error scrub end time, unix timestamp */
uint64_t dep_to_examine; /* total error blocks to be scrubbed */
uint64_t dep_examined; /* blocks scrubbed so far */
uint64_t dep_errors; /* error scrub I/O error count */
uint64_t dep_paused_flags; /* flag for paused */
} dsl_errorscrub_phys_t;

#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
/ sizeof (uint64_t))

/*
* Every pool will have one dsl_scan_t and this structure will contain
* in-memory information about the scan and a pointer to the on-disk
Expand Down Expand Up @@ -151,11 +167,16 @@ typedef struct dsl_scan {
uint64_t scn_avg_zio_size_this_txg;
uint64_t scn_zios_this_txg;

/* zap cursor for tracing error scrub progress */
zap_cursor_t errorscrub_cursor;
/* members needed for syncing scan status to disk */
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
dsl_scan_phys_t scn_phys_cached;
avl_tree_t scn_queue; /* queue of datasets to scan */
uint64_t scn_bytes_pending; /* outstanding data to issue */

/* members needed for syncing error scrub status to disk */
dsl_errorscrub_phys_t errorscrub_phys;
} dsl_scan_t;

typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
Expand All @@ -167,12 +188,16 @@ int dsl_scan_setup_check(void *, dmu_tx_t *);
void dsl_scan_setup_sync(void *, dmu_tx_t *);
void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan_cancel(struct dsl_pool *, pool_scan_func_t func);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
boolean_t dsl_errorscrubbing(const struct dsl_pool *dp);
boolean_t dsl_errorscrub_active(dsl_scan_t *scn);
void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
pool_scrub_cmd_t cmd, pool_scan_func_t func);
void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
Expand All @@ -184,6 +209,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);
Expand Down
18 changes: 18 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,7 @@ typedef enum pool_scan_func {
POOL_SCAN_NONE,
POOL_SCAN_SCRUB,
POOL_SCAN_RESILVER,
POOL_ERRORSCRUB,
POOL_SCAN_FUNCS
} pool_scan_func_t;

Expand All @@ -951,6 +952,7 @@ typedef enum pool_scan_func {
typedef enum pool_scrub_cmd {
POOL_SCRUB_NORMAL = 0,
POOL_SCRUB_PAUSE,
POOL_ERRORSCRUB_STOP,
POOL_SCRUB_FLAGS_END
} pool_scrub_cmd_t;

Expand Down Expand Up @@ -1005,6 +1007,20 @@ typedef struct pool_scan_stat {
uint64_t pss_pass_scrub_spent_paused;
uint64_t pss_pass_issued; /* issued bytes per scan pass */
uint64_t pss_issued; /* total bytes checked by scanner */

/* error scrub values stored on disk */
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
uint64_t pss_error_scrub_start; /* error scrub start time */
uint64_t pss_error_scrub_end; /* error scrub end time */
uint64_t pss_error_scrub_examined; /* error blocks issued I/O */
/* error blocks to be issued I/O */
uint64_t pss_error_scrub_to_be_examined;

/* error scrub values not stored on disk */
/* error scrub pause time in milliseconds */
uint64_t pss_pass_error_scrub_pause;

} pool_scan_stat_t;

typedef struct pool_removal_stat {
Expand All @@ -1026,6 +1042,7 @@ typedef enum dsl_scan_state {
DSS_SCANNING,
DSS_FINISHED,
DSS_CANCELED,
DSS_ERRORSCRUBBING,
DSS_NUM_STATES
} dsl_scan_state_t;

Expand Down Expand Up @@ -1357,6 +1374,7 @@ typedef enum zfs_ioc {
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
ZFS_IOC_WAIT, /* 0x5a53 */
ZFS_IOC_WAIT_FS, /* 0x5a54 */
ZFS_IOC_POOL_SCRUB, /* 0x5a55 */

/*
* Per-platform (Optional) - 8/128 numbers reserved.
Expand Down
6 changes: 4 additions & 2 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -818,8 +818,9 @@ extern void spa_l2cache_drop(spa_t *spa);

/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
extern int spa_scan_stop(spa_t *spa, pool_scan_func_t func);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scan_func_t func,
pool_scrub_cmd_t flag);

/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
Expand Down Expand Up @@ -1140,6 +1141,7 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
Expand Down
4 changes: 4 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,10 @@ struct spa {
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
uint64_t spa_scan_pass_issued; /* issued bytes per pass */

/* error scrub pause time in milliseconds */
uint64_t spa_scan_pass_errorscrub_pause;
/* total error scrub paused time in milliseconds */
uint64_t spa_scan_pass_errorscrub_spent_paused;
/*
* We are in the middle of a resilver, and another resilver
* is needed once this one completes. This is set iff any
Expand Down
5 changes: 5 additions & 0 deletions include/sys/sysevent/eventdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ extern "C" {
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
#define ESC_ZFS_TRIM_RESUME "trim_resume"
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
#define ESC_ZFS_ERRORSCRUB_START "error_scrub_start"
#define ESC_ZFS_ERRORSCRUB_FINISH "error_scrub_finish"
#define ESC_ZFS_ERRORSCRUB_ABORT "error_scrub_abort"
#define ESC_ZFS_ERRORSCRUB_RESUME "error_scrub_resume"
#define ESC_ZFS_ERRORSCRUB_PAUSED "error_scrub_paused"

/*
* datalink subclass definitions.
Expand Down
Loading

0 comments on commit cadfaca

Please sign in to comment.