Skip to content

Commit

Permalink
Add scrub after resilver zed script
Browse files Browse the repository at this point in the history
* Add a zed script to kick off a scrub after a resilver.  The script is
disabled by default.

* Tweak the resilver_finish event timing so that it happens after the
bad disk has been detached.  Previously you would see the
resilver_finish event and then then the vdev_detach event.

* Add a test mode (-t) option to zed to allow it to use the native
paths to the ZFS utilities.  This is needed when you're running zed
under the ZTS in a local workspace.

Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes: #4662
  • Loading branch information
tonyhutter committed Jan 30, 2018
1 parent 522db29 commit a2c84e0
Show file tree
Hide file tree
Showing 16 changed files with 198 additions and 24 deletions.
6 changes: 4 additions & 2 deletions cmd/zed/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ dist_zedexec_SCRIPTS = \
zed.d/statechange-notify.sh \
zed.d/vdev_clear-led.sh \
zed.d/vdev_attach-led.sh \
zed.d/pool_import-led.sh
zed.d/pool_import-led.sh \
zed.d/resilver_finish-start-scrub.sh

zedconfdefaults = \
all-syslog.sh \
Expand All @@ -77,7 +78,8 @@ zedconfdefaults = \
statechange-notify.sh \
vdev_clear-led.sh \
vdev_attach-led.sh \
pool_import-led.sh
pool_import-led.sh \
resilver_finish-start-scrub.sh

install-data-hook:
$(MKDIR_P) "$(DESTDIR)$(zedconfdir)"
Expand Down
16 changes: 16 additions & 0 deletions cmd/zed/zed.d/resilver_finish-start-scrub.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/sh
# resilver_finish-start-scrub.sh
# Run a scrub after a resilver
#
# Exit codes:
# 9: internal error
#
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
. "${ZED_ZEDLET_DIR}/zed-functions.sh"

[ -n "${ZEVENT_POOL}" ] || exit 9
[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
zed_check_cmd "${ZPOOL}" || exit 9

zed_log_msg "Starting scrub after resilver on ${ZEVENT_POOL}"
eval "${ZPOOL}" scrub "${ZEVENT_POOL}"
3 changes: 3 additions & 0 deletions cmd/zed/zed.d/zed.rc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@
#
ZED_USE_ENCLOSURE_LEDS=1

##
# Run a scrub after every resilver
#ZED_SCRUB_AFTER_RESILVER=1

##
# The syslog priority (e.g., specified as a "facility.level" pair).
Expand Down
7 changes: 6 additions & 1 deletion cmd/zed/zed_conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ _zed_conf_display_help(const char *prog, int got_err)
"Run daemon in the foreground.");
fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-M",
"Lock all pages in memory.");
fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-t",
"Testing mode (only used by ZTS).");
fprintf(fp, "%*c%*s %s\n", w1, 0x20, -w2, "-Z",
"Zero state file.");
fprintf(fp, "\n");
Expand Down Expand Up @@ -247,7 +249,7 @@ _zed_conf_parse_path(char **resultp, const char *path)
void
zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
{
const char * const opts = ":hLVc:d:p:s:vfFMZ";
const char * const opts = ":hLVc:d:p:s:vftFMZ";
int opt;

if (!zcp || !argv || !argv[0])
Expand Down Expand Up @@ -278,6 +280,9 @@ zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
case 's':
_zed_conf_parse_path(&zcp->state_file, optarg);
break;
case 't':
zcp->do_testmode = 1;
break;
case 'v':
zcp->do_verbose = 1;
break;
Expand Down
1 change: 1 addition & 0 deletions cmd/zed/zed_conf.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct zed_conf {
unsigned do_memlock:1; /* true if locking memory */
unsigned do_verbose:1; /* true if verbosity enabled */
unsigned do_zero:1; /* true if zeroing state */
unsigned do_testmode:1; /* true if testmode is set */
int syslog_facility; /* syslog facility value */
int min_events; /* RESERVED FOR FUTURE USE */
int max_events; /* RESERVED FOR FUTURE USE */
Expand Down
34 changes: 30 additions & 4 deletions cmd/zed/zed_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -733,12 +733,14 @@ _zed_event_add_nvpair(uint64_t eid, zed_strings_t *zsp, nvpair_t *nvp)

/*
* Restrict various environment variables to safe and sane values
* when constructing the environment for the child process.
* when constructing the environment for the child process, unless we're
* we're running in testmode (like under the ZFS test suite).
*
* Reference: Secure Programming Cookbook by Viega & Messier, Section 1.1.
*/
static void
_zed_event_add_env_restrict(uint64_t eid, zed_strings_t *zsp)
_zed_event_add_env_restrict(uint64_t eid, zed_strings_t *zsp,
boolean_t testmode)
{
const char *env_restrict[][2] = {
{ "IFS", " \t\n" },
Expand All @@ -753,11 +755,35 @@ _zed_event_add_env_restrict(uint64_t eid, zed_strings_t *zsp)
{ "ZFS_RELEASE", ZFS_META_RELEASE },
{ NULL, NULL }
};

/*
* In test mode, use the ZFS binaries from $PATH instead of the
* hard-coded ones.
*/
const char *env_testmode[][2] = {
{ "IFS", " \t\n" },
{ "PATH", NULL }, /* $PATH copied in later on */
{ "ZDB", "zdb" },
{ "ZED", "zed" },
{ "ZFS", "zfs" },
{ "ZINJECT", "zinject" },
{ "ZPOOL", "zpool" },
{ "ZFS_ALIAS", ZFS_META_ALIAS },
{ "ZFS_VERSION", ZFS_META_VERSION },
{ "ZFS_RELEASE", ZFS_META_RELEASE },
{ NULL, NULL }
};
const char *(*pa)[2];

assert(zsp != NULL);

for (pa = env_restrict; *(*pa); pa++) {
pa = testmode ? env_testmode : env_restrict;

for (; *(*pa); pa++) {
/* In testmode, use our native $PATH */
if (testmode && strcmp((*pa)[0], "PATH") == 0)
(*pa)[1] = getenv("PATH");

_zed_event_add_var(eid, zsp, NULL, (*pa)[0], "%s", (*pa)[1]);
}
}
Expand Down Expand Up @@ -902,7 +928,7 @@ zed_event_service(struct zed_conf *zcp)
while ((nvp = nvlist_next_nvpair(nvl, nvp)))
_zed_event_add_nvpair(eid, zsp, nvp);

_zed_event_add_env_restrict(eid, zsp);
_zed_event_add_env_restrict(eid, zsp, zcp->do_testmode);
_zed_event_add_env_preserve(eid, zsp);

_zed_event_add_var(eid, zsp, ZED_VAR_PREFIX, "PID",
Expand Down
9 changes: 8 additions & 1 deletion man/man8/zed.8.in
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ ZED \- ZFS Event Daemon
[\fB\-M\fR]
[\fB\-p\fR \fIpidfile\fR]
[\fB\-s\fR \fIstatefile\fR]
[\fB\-t\fR]
[\fB\-v\fR]
[\fB\-V\fR]
[\fB\-Z\fR]
Expand Down Expand Up @@ -80,7 +81,13 @@ Write the daemon's process ID to the specified file.
.TP
.BI \-s\ statefile
Write the daemon's state to the specified file.

.TP
.BI \-t
Test mode. Normally zedlets run in a locked-down environment, with hardcoded
paths to the ZFS commands ($ZFS, $ZPOOL, $ZED, ... etc) and a hardcoded $PATH.
This is done for security reasons. Test mode gets around this by including the
native environment's $PATH, and does away with hardcoded paths to the ZFS
commands. This is only used by the ZFS test suite; do not use in production!
.SH ZEVENTS
.PP
A zevent is comprised of a list of nvpairs (name/value pairs). Each zevent
Expand Down
31 changes: 21 additions & 10 deletions module/zfs/dsl_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -804,17 +804,28 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE);
if (complete) {
spa_event_notify(spa, NULL, NULL,
scn->scn_phys.scn_min_txg ?
ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
/*
* scn->scn_phys.scn_min_txg == 0 means we finished
* a scrub. scn_phys.scn_min_txg != 0 means we
* finished a resilver of the new disk.
*
* The actual resilver_finish event happens later in
* in spa_async_thread() after the old vdev is removed.
*/
if (scn->scn_phys.scn_min_txg == 0) {
spa_event_notify(spa, NULL, NULL,
ESC_ZFS_SCRUB_FINISH);

spa_errlog_rotate(spa);
} else {
/*
* We may have finished replacing a device. Let
* the async thread assess this and handle the
* detach.
*/
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
}
}
spa_errlog_rotate(spa);

/*
* We may have finished replacing a device.
* Let the async thread assess this and handle the detach.
*/
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
}

scn->scn_phys.scn_end_time = gethrestime_sec();
Expand Down
4 changes: 3 additions & 1 deletion module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -6160,8 +6160,10 @@ spa_async_thread(void *arg)
/*
* If any devices are done replacing, detach them.
*/
if (tasks & SPA_ASYNC_RESILVER_DONE)
if (tasks & SPA_ASYNC_RESILVER_DONE) {
spa_vdev_resilver_done(spa);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_RESILVER_FINISH);
}

/*
* Kick off a resilver.
Expand Down
3 changes: 2 additions & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@ tags = ['functional', 'exec']

[tests/functional/fault]
tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple']
'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple',
'scrub_after_resilver']
tags = ['functional', 'fault']

[tests/functional/features/async_destroy]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/include/commands.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ export SYSTEM_FILES='arp
pgrep
ping
pkill
printenv
printf
ps
pwd
Expand Down
34 changes: 33 additions & 1 deletion tests/zfs-tests/include/libtest.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -3053,9 +3053,32 @@ function wait_replacing #pool
done
}

#
# Wait for a pool to be scrubbed
#
# $1 pool name
# $2 number of seconds to wait (optional)
#
# Returns true when pool has been scrubbed, or false if there's a timeout or if
# no scrub was done.
#
function wait_scrubbed
{
typeset pool=${1:-$TESTPOOL}
typeset iter=${2:-10}
for i in {1..$iter} ; do
if is_pool_scrubbed $pool ; then
return 0
fi
sleep 1
done
return 1
}

#
# Setup custom environment for the ZED.
#
# $1 Optional zedlet script to copy into our zedlet test directory.
function zed_setup
{
if ! is_linux; then
Expand All @@ -3073,6 +3096,7 @@ function zed_setup
if [[ -e $VDEVID_CONF_ETC ]]; then
log_fail "Must not have $VDEVID_CONF_ETC file present on system"
fi
EXTRA_ZEDLET="$1"

# Create a symlink for /etc/zfs/vdev_id.conf file.
log_must ln -s $VDEVID_CONF $VDEVID_CONF_ETC
Expand All @@ -3081,6 +3105,9 @@ function zed_setup
# add additional ZEDLETs as needed for their specific test.
log_must cp ${ZEDLET_ETC_DIR}/zed.rc $ZEDLET_DIR
log_must cp ${ZEDLET_ETC_DIR}/zed-functions.sh $ZEDLET_DIR
if [[ -n "$EXTRA_ZEDLET" ]] ; then
log_must cp ${ZEDLET_ETC_DIR}/$EXTRA_ZEDLET $ZEDLET_DIR
fi

# Customize the zed.rc file to enable the full debug log.
log_must sed -i '/\#ZED_DEBUG_LOG=.*/d' $ZEDLET_DIR/zed.rc
Expand All @@ -3097,17 +3124,22 @@ function zed_setup
#
# Cleanup custom ZED environment.
#
# $1 Optional zedlet script(s) to remove from our zedlet test directory.
function zed_cleanup
{
if ! is_linux; then
return
fi
EXTRA_ZEDLET="$1"

log_must rm -f ${ZEDLET_DIR}/zed.rc
log_must rm -f ${ZEDLET_DIR}/zed-functions.sh
log_must rm -f ${ZEDLET_DIR}/all-syslog.sh
log_must rm -f ${ZEDLET_DIR}/all-debug.sh
log_must rm -f ${ZEDLET_DIR}/state
if [[ ! -z "$EXTRA_ZEDLET" ]] ; then
log_must rm -f ${ZEDLET_DIR}/$EXTRA_ZEDLET
fi
log_must rm -f $ZED_LOG
log_must rm -f $ZED_DEBUG_LOG
log_must rm -f $VDEVID_CONF_ETC
Expand Down Expand Up @@ -3139,7 +3171,7 @@ function zed_start
# run ZED in the background and redirect foreground logging
# output to $ZED_LOG.
log_must truncate -s 0 $ZED_DEBUG_LOG
log_must eval "zed -vF -d $ZEDLET_DIR -p $ZEDLET_DIR/zed.pid" \
log_must eval "zed -vF -t -d $ZEDLET_DIR -p $ZEDLET_DIR/zed.pid" \
"-s $ZEDLET_DIR/state 2>$ZED_LOG &"

return 0
Expand Down
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/fault/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ dist_pkgdata_SCRIPTS = \
auto_spare_001_pos.ksh \
auto_spare_002_pos.ksh \
auto_spare_ashift.ksh \
auto_spare_multiple.ksh
auto_spare_multiple.ksh \
scrub_after_resilver.ksh
2 changes: 1 addition & 1 deletion tests/zfs-tests/tests/functional/fault/cleanup.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ verify_runnable "global"
cleanup_devices $DISKS

zed_stop
zed_cleanup
zed_cleanup resilver_finish-start-scrub.sh

log_pass
Loading

0 comments on commit a2c84e0

Please sign in to comment.