Skip to content

Commit

Permalink
Added a flag -e in zpool scrub to scrub only blocks in errorlog.
Browse files Browse the repository at this point in the history
Signed-off-by: TulsiJain <tulsi.jain@delphix.com>
  • Loading branch information
TulsiJain committed Aug 26, 2019
1 parent 95f0144 commit 66d6580
Show file tree
Hide file tree
Showing 26 changed files with 1,208 additions and 65 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ cscope.*
*.orig
*.log
venv
# Ignore Eclipse files
/.cproject
/.project

#
# Module leftovers
Expand Down
108 changes: 99 additions & 9 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,8 @@ get_usage(zpool_help_t idx)
return (gettext("\tinitialize [-c | -s] <pool> "
"[<device> ...]\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
return (gettext("\tscrub [-s | -p] [-e] <pool>"
" ...\n"));
case HELP_RESILVER:
return (gettext("\tresilver <pool> ...\n"));
case HELP_TRIM:
Expand Down Expand Up @@ -6704,8 +6705,9 @@ scrub_callback(zpool_handle_t *zhp, void *data)
}

/*
* zpool scrub [-s | -p] <pool> ...
* zpool scrub [-s | -p] [-e] <pool> ...
*
* -e Only scrub blocks in the error log.
* -s Stop. Stops any in-progress scrub.
* -p Pause. Pause in-progress scrub.
*/
Expand All @@ -6718,14 +6720,21 @@ zpool_do_scrub(int argc, char **argv)
cb.cb_type = POOL_SCAN_SCRUB;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;

boolean_t is_error_scrub = B_FALSE;
boolean_t is_pause = B_FALSE;
boolean_t is_stop = B_FALSE;

/* check options */
while ((c = getopt(argc, argv, "sp")) != -1) {
while ((c = getopt(argc, argv, "eps")) != -1) {
switch (c) {
case 'e':
is_error_scrub = B_TRUE;
break;
case 's':
cb.cb_type = POOL_SCAN_NONE;
is_stop = B_TRUE;
break;
case 'p':
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
is_pause = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
Expand All @@ -6734,11 +6743,27 @@ zpool_do_scrub(int argc, char **argv)
}
}

if (cb.cb_type == POOL_SCAN_NONE &&
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
(void) fprintf(stderr, gettext("invalid option combination: "
"-s and -p are mutually exclusive\n"));
if (is_pause && is_stop) {
(void) fprintf(stderr, gettext("invalid option "
"combination :-s and -p are mutually exclusive\n"));
usage(B_FALSE);
} else {
if (is_error_scrub) {
cb.cb_type = POOL_ERRORSCRUB;
if (is_pause) {
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
} else if (is_stop) {
cb.cb_scrub_cmd = POOL_ERRORSCRUB_STOP;
} else {
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
}
} else {
if (is_pause) {
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
} else if (is_stop) {
cb.cb_type = POOL_SCAN_NONE;
}
}
}

cb.cb_argc = argc;
Expand Down Expand Up @@ -6912,6 +6937,70 @@ zpool_do_trim(int argc, char **argv)
return (error);
}

/*
* Print out detailed error scrub status.
*/
static void
print_err_scrub_status(pool_scan_stat_t *ps)
{
time_t start, end, pause;
uint64_t total_secs_left;
uint64_t secs_left, mins_left, hours_left, days_left;
uint64_t examined, to_be_examined;

if (ps == NULL || ps->pss_error_scrub_func != POOL_ERRORSCRUB) {
return;
}

(void) printf(gettext(" scrub: "));

start = ps->pss_error_scrub_start;
end = ps->pss_error_scrub_end;
pause = ps->pss_pass_error_scrub_pause;
examined = ps->pss_error_scrub_examined;
to_be_examined = ps->pss_error_scrub_to_be_examined;

assert(ps->pss_error_scrub_func == POOL_ERRORSCRUB);

if (ps->pss_error_scrub_state == DSS_FINISHED) {
total_secs_left = end - start;
days_left = total_secs_left / 60 / 60 / 24;
hours_left = (total_secs_left / 60 / 60) % 24;
mins_left = (total_secs_left / 60) % 60;
secs_left = (total_secs_left % 60);

(void) printf(gettext("scrubbed %llu error blocks in %llu days "
"%02llu:%02llu:%02llu on %s"), (u_longlong_t)examined,
(u_longlong_t)days_left, (u_longlong_t)hours_left,
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
ctime(&end));

return;
} else if (ps->pss_error_scrub_state == DSS_CANCELED) {
(void) printf(gettext("error scrub canceled on %s"),
ctime(&end));
return;
}
assert(ps->pss_error_scrub_state == DSS_ERRORSCRUBING);

/* Error scrub is in progress. */
if (pause == 0) {
(void) printf(gettext("error scrub in progress since %s"),
ctime(&start));
} else {
(void) printf(gettext("error scrub paused since %s"),
ctime(&pause));
(void) printf(gettext("\terror scrub started on %s"),
ctime(&start));
}

double fraction_done = (double)examined / (to_be_examined + examined);
(void) printf(gettext("\t%.2f%% done, issued I/O for %llu error"
" blocks"), 100 * fraction_done, (u_longlong_t)examined);

(void) printf("\n");
}

/*
* Print out detailed scrub status.
*/
Expand Down Expand Up @@ -7687,6 +7776,7 @@ status_callback(zpool_handle_t *zhp, void *data)
ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t **)&prs, &c);

print_scan_status(ps);
print_err_scrub_status(ps);
print_checkpoint_scan_warning(ps, pcs);
print_removal_status(zhp, prs);
print_checkpoint_status(pcs);
Expand Down
4 changes: 4 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,15 @@ typedef enum zfs_error {
EZFS_THREADCREATEFAILED, /* thread create failed */
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
EZFS_SCRUBBING, /* currently scrubbing */
EZFS_ERRORSCRUBBING, /* currently error scrubbing */
EZFS_ERRORSCRUB_PAUSED, /* error scrub currently paused */
EZFS_NO_SCRUB, /* no active scrub */
EZFS_NO_ERRORSCRUB, /* no active error scrub */
EZFS_DIFF, /* general failure of zfs diff */
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_SCRUB_PAUSED_TO_CANCEL, /* scrub currently paused */
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
EZFS_CRYPTOFAILED, /* failed to setup encryption */
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */
Expand Down
2 changes: 2 additions & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ int lzc_reopen(const char *, boolean_t);
int lzc_pool_checkpoint(const char *);
int lzc_pool_checkpoint_discard(const char *);

int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);

#ifdef __cplusplus
}
#endif
Expand Down
1 change: 1 addition & 0 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ typedef struct dmu_buf {
#define DMU_POOL_DDT_STATS "DDT-statistics"
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_ERRORSCRUB "error_scrub"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"
Expand Down
28 changes: 26 additions & 2 deletions include/sys/dsl_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/zap.h>
#include <sys/ddt.h>
#include <sys/bplist.h>

Expand Down Expand Up @@ -76,6 +77,21 @@ typedef enum dsl_scan_flags {

#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)

typedef struct dsl_errorscrub_phys {
uint64_t dep_func; /* pool_scan_func_t */
uint64_t dep_state; /* dsl_scan_state_t */
uint64_t dep_cursor; /* serialized zap cursor for tracing progress */
uint64_t dep_start_time; /* error scrub start time, unix timestamp */
uint64_t dep_end_time; /* error scrub end time, unix timestamp */
uint64_t dep_to_examine; /* total error blocks to be scrubbed */
uint64_t dep_examined; /* blocks scrubbed so far */
uint64_t dep_errors; /* error scrub I/O error count */
uint64_t dep_paused_flags; /* flag for paused */
} dsl_errorscrub_phys_t;

#define ERRORSCRUB_PHYS_NUMINTS (sizeof (dsl_errorscrub_phys_t) \
/ sizeof (uint64_t))

/*
* Every pool will have one dsl_scan_t and this structure will contain
* in-memory information about the scan and a pointer to the on-disk
Expand Down Expand Up @@ -148,11 +164,16 @@ typedef struct dsl_scan {
uint64_t scn_avg_zio_size_this_txg;
uint64_t scn_zios_this_txg;

/* zap cursor for tracing error scrub progress */
zap_cursor_t errorscrub_cursor;
/* members needed for syncing scan status to disk */
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
dsl_scan_phys_t scn_phys_cached;
avl_tree_t scn_queue; /* queue of datasets to scan */
uint64_t scn_bytes_pending; /* outstanding data to issue */

/* members needed for syncing error scrub status to disk */
dsl_errorscrub_phys_t errorscrub_phys;
} dsl_scan_t;

typedef struct dsl_scan_io_queue dsl_scan_io_queue_t;
Expand All @@ -162,10 +183,12 @@ void scan_fini(void);
int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan_cancel(struct dsl_pool *, pool_scan_func_t func);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp,
pool_scrub_cmd_t cmd, pool_scan_func_t func);
void dsl_errorscrub_sync(struct dsl_pool *, dmu_tx_t *);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
Expand All @@ -177,6 +200,7 @@ void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
boolean_t dsl_errorscrub_is_paused(const dsl_scan_t *scn);
void dsl_scan_freed(spa_t *spa, const blkptr_t *bp);
void dsl_scan_io_queue_destroy(dsl_scan_io_queue_t *queue);
void dsl_scan_io_queue_vdev_xfer(vdev_t *svd, vdev_t *tvd);
Expand Down
18 changes: 18 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,7 @@ typedef enum pool_scan_func {
POOL_SCAN_NONE,
POOL_SCAN_SCRUB,
POOL_SCAN_RESILVER,
POOL_ERRORSCRUB,
POOL_SCAN_FUNCS
} pool_scan_func_t;

Expand All @@ -908,6 +909,7 @@ typedef enum pool_scan_func {
typedef enum pool_scrub_cmd {
POOL_SCRUB_NORMAL = 0,
POOL_SCRUB_PAUSE,
POOL_ERRORSCRUB_STOP,
POOL_SCRUB_FLAGS_END
} pool_scrub_cmd_t;

Expand Down Expand Up @@ -962,6 +964,20 @@ typedef struct pool_scan_stat {
uint64_t pss_pass_scrub_spent_paused;
uint64_t pss_pass_issued; /* issued bytes per scan pass */
uint64_t pss_issued; /* total bytes checked by scanner */

/* error scrub values stored on disk */
uint64_t pss_error_scrub_func; /* pool_scan_func_t */
uint64_t pss_error_scrub_state; /* dsl_scan_state_t */
uint64_t pss_error_scrub_start; /* error scrub start time */
uint64_t pss_error_scrub_end; /* error scrub end time */
uint64_t pss_error_scrub_examined; /* error blocks issued I/O */
/* error blocks to be issued I/O */
uint64_t pss_error_scrub_to_be_examined;

/* error scrub values not stored on disk */
/* error scrub pause time in milliseconds */
uint64_t pss_pass_error_scrub_pause;

} pool_scan_stat_t;

typedef struct pool_removal_stat {
Expand All @@ -983,6 +999,7 @@ typedef enum dsl_scan_state {
DSS_SCANNING,
DSS_FINISHED,
DSS_CANCELED,
DSS_ERRORSCRUBING,
DSS_NUM_STATES
} dsl_scan_state_t;

Expand Down Expand Up @@ -1277,6 +1294,7 @@ typedef enum zfs_ioc {
ZFS_IOC_POOL_TRIM, /* 0x5a50 */
ZFS_IOC_REDACT, /* 0x5a51 */
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
ZFS_IOC_POOL_SCRUB, /* 0x5a53 */

/*
* Linux - 3/64 numbers reserved.
Expand Down
6 changes: 4 additions & 2 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -863,8 +863,9 @@ extern void spa_l2cache_drop(spa_t *spa);

/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
extern int spa_scan_stop(spa_t *spa, pool_scan_func_t func);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scan_func_t func,
pool_scrub_cmd_t flag);

/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
Expand Down Expand Up @@ -1175,6 +1176,7 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern uint64_t spa_get_last_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
Expand Down
4 changes: 4 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ struct spa {
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
uint64_t spa_scan_pass_issued; /* issued bytes per pass */

/* error scrub pause time in milliseconds */
uint64_t spa_scan_pass_errorscrub_pause;
/* total error scrub paused time in milliseconds */
uint64_t spa_scan_pass_errorscrub_spent_paused;
/*
* We are in the middle of a resilver, and another resilver
* is needed once this one completes. This is set iff any
Expand Down
5 changes: 5 additions & 0 deletions include/sys/sysevent/eventdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ extern "C" {
#define ESC_ZFS_TRIM_CANCEL "trim_cancel"
#define ESC_ZFS_TRIM_RESUME "trim_resume"
#define ESC_ZFS_TRIM_SUSPEND "trim_suspend"
#define ESC_ZFS_ERRORSCRUB_START "error_scrub_start"
#define ESC_ZFS_ERRORSCRUB_FINISH "error_scrub_finish"
#define ESC_ZFS_ERRORSCRUB_ABORT "error_scrub_abort"
#define ESC_ZFS_ERRORSCRUB_RESUME "error_scrub_resume"
#define ESC_ZFS_ERRORSCRUB_PAUSED "error_scrub_paused"

/*
* datalink subclass definitions.
Expand Down
Loading

0 comments on commit 66d6580

Please sign in to comment.