Skip to content

Commit

Permalink
Fixes for spurious failures of resilver_restart_001 test
Browse files Browse the repository at this point in the history
The resilver restart test was reported as failing about 2% of the
time. Two issues were found:
- The event log wasn't large enough, so resilver events were missing
- One 'zpool sync' wasn't enough for resilver to start after zinject

Signed-off-by: John Poduska <jpoduska@datto.com>
Closes #9677
  • Loading branch information
jwpoduska committed Dec 6, 2019
1 parent f95704c commit 8fcd29b
Showing 1 changed file with 9 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,13 @@ function cleanup
{
echo $ORIG_RESILVER_MIN_TIME > $ZFS_PARAMS/zfs_resilver_min_time_ms
echo $ORIG_SCAN_SUSPEND_PROGRESS > $ZFS_PARAMS/zfs_scan_suspend_progress
echo $ORIG_ZFS_ZEVENT_LEN_MAX > $ZFS_PARAMS/zfs_zevent_len_max
log_must zinject -c all
destroy_pool $TESTPOOL
rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
}

# Count resilver events in zpool and number of deferred rsilvers on vdevs
# count resilver events in zpool and number of deferred rsilvers on vdevs
function verify_restarts # <msg> <cnt> <defer>
{
msg=$1
Expand Down Expand Up @@ -88,6 +89,7 @@ log_assert "Check for unnecessary resilver restarts"
ZFS_PARAMS=/sys/module/zfs/parameters
ORIG_RESILVER_MIN_TIME=$(cat $ZFS_PARAMS/zfs_resilver_min_time_ms)
ORIG_SCAN_SUSPEND_PROGRESS=$(cat $ZFS_PARAMS/zfs_scan_suspend_progress)
ORIG_ZFS_ZEVENT_LEN_MAX=$(cat $ZFS_PARAMS/zfs_zevent_len_max)

set -A RESTARTS -- '1' '2' '2' '2'
set -A VDEVS -- '' '' '' ''
Expand All @@ -98,12 +100,15 @@ VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"

log_onexit cleanup

# ensure that enough events will be saved
echo 512 > $ZFS_PARAMS/zfs_zevent_len_max

log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE

log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \
raidz ${VDEV_FILES[@]}

# Create 4 filesystems
# create 4 filesystems
for fs in fs{0..3}
do
log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs
Expand All @@ -118,7 +123,7 @@ do
done
wait

# Test without and with deferred resilve feature enabled
# test without and with deferred resilve feature enabled
for test in "without" "with"
do
log_note "Testing $test deferred resilvers"
Expand Down Expand Up @@ -177,6 +182,7 @@ do

# wait for a few txg's to see if a resilver happens
log_must zpool sync $TESTPOOL
log_must zpool sync $TESTPOOL

# there should now be 2 resilver starts
verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}"
Expand Down

0 comments on commit 8fcd29b

Please sign in to comment.