Skip to content

Commit

Permalink
Restrict raidz faulted vdev count
Browse files Browse the repository at this point in the history
Specifically, a child in a replacing vdev won't count when assessing
the dtl during a vdev_fault()

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Signed-off-by: Don Brady <don.brady@klarasystems.com>
  • Loading branch information
don-brady committed Sep 27, 2024
1 parent 6f50f8e commit 4c639ad
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 9 deletions.
44 changes: 35 additions & 9 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3149,9 +3149,9 @@ vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done)
* Reassess DTLs after a config change or scrub completion. If txg == 0 no
* write operations will be issued to the pool.
*/
void
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
boolean_t scrub_done, boolean_t rebuild_done)
static void
vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
boolean_t scrub_done, boolean_t rebuild_done, boolean_t dtl_outage)
{
spa_t *spa = vd->vdev_spa;
avl_tree_t reftree;
Expand All @@ -3160,8 +3160,8 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);

for (int c = 0; c < vd->vdev_children; c++)
vdev_dtl_reassess(vd->vdev_child[c], txg,
scrub_txg, scrub_done, rebuild_done);
vdev_dtl_reassess_impl(vd->vdev_child[c], txg,
scrub_txg, scrub_done, rebuild_done, dtl_outage);

if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux)
return;
Expand Down Expand Up @@ -3255,7 +3255,15 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
if (scrub_done)
range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
if (!vdev_readable(vd))

/*
* For the dtl_outage case, treat members of a replacing vdev
* as if they are not available. It's more likely than not that
* a vdev in a replacing vdev could encounter read errors so
* treat it as not being able to contribute.
*/
if (!vdev_readable(vd) || (dtl_outage &&
vd->vdev_parent->vdev_ops == &vdev_replacing_ops))
range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
else
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
Expand Down Expand Up @@ -3321,6 +3329,20 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
}
}

void
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
boolean_t scrub_done, boolean_t rebuild_done)
{
return (vdev_dtl_reassess_impl(vd, txg, scrub_txg, scrub_done,
rebuild_done, B_FALSE));
}

static void
vdev_dtl_reassess_outage(vdev_t *vd)
{
return (vdev_dtl_reassess_impl(vd, 0, 0, B_FALSE, B_FALSE, B_TRUE));
}

/*
* Iterate over all the vdevs except spare, and post kobj events
*/
Expand Down Expand Up @@ -3548,7 +3570,11 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
}

/*
* Determine whether the specified vdev can be offlined/detached/removed
* Determine whether the specified vdev can be
* - offlined
* - detached
* - removed
* - faulted
* without losing data.
*/
boolean_t
Expand All @@ -3570,10 +3596,10 @@ vdev_dtl_required(vdev_t *vd)
* If not, we can safely offline/detach/remove the device.
*/
vd->vdev_cant_read = B_TRUE;
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
vdev_dtl_reassess_outage(tvd);
required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
vd->vdev_cant_read = cant_read;
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
vdev_dtl_reassess_outage(tvd);

if (!required && zio_injection_enabled) {
required = !!zio_handle_device_injection(vd, NULL,
Expand Down
4 changes: 4 additions & 0 deletions tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,10 @@ tags = ['functional', 'exec']
tests = ['fallocate_punch-hole']
tags = ['functional', 'fallocate']

[tests/functional/fault]
tests = ['fault_limits']
tags = ['functional', 'fault']

[tests/functional/features/async_destroy]
tests = ['async_destroy_001_pos']
tags = ['functional', 'features', 'async_destroy']
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/fault/cleanup.ksh \
functional/fault/decompress_fault.ksh \
functional/fault/decrypt_fault.ksh \
functional/fault/fault_limits.ksh \
functional/fault/scrub_after_resilver.ksh \
functional/fault/suspend_resume_single.ksh \
functional/fault/setup.ksh \
Expand Down
96 changes: 96 additions & 0 deletions tests/zfs-tests/tests/functional/fault/fault_limits.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2024 by Klara, Inc.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg

#
# DESCRIPTION: Verify that raidz children vdev fault count is restricted
#
# STRATEGY:
# 1. Create a raidz2 or raidz3 pool and add some data to it
# 2. Replace one of the child vdevs to create a replacing vdev
# 3. While it is resilvering, attempt to fault disks
# 4. Verify that less than parity count was faulted while replacing
#

TESTPOOL="fault-test-pool"
PARITY=$((RANDOM%(2) + 2))
VDEV_CNT=$((4 + (2 * PARITY)))
VDEV_SIZ=512M

function cleanup
{
poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL"

for i in {0..$((VDEV_CNT - 1))}; do
log_must rm -f "$TEST_BASE_DIR/dev-$i"
done
}

log_onexit cleanup
log_assert "restricts raidz children vdev fault count"

log_note "creating $VDEV_CNT vdevs for parity $PARITY test"
typeset -a disks
for i in {0..$((VDEV_CNT - 1))}; do
device=$TEST_BASE_DIR/dev-$i
log_must truncate -s $VDEV_SIZ $device
disks[${#disks[*]}+1]=$device
done

log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1))]}

# Add some data to the pool
log_must zfs create $TESTPOOL/fs
MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)"
log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z
sync_pool $TESTPOOL

# Replace the last child vdev to form a replacing vdev
log_must zpool replace ${TESTPOOL} ${disks[$((VDEV_CNT - 1))]} ${disks[$VDEV_CNT]}
# imediately offline replacement disk to keep replacing vdev around
log_must zpool offline ${TESTPOOL} ${disks[$VDEV_CNT]}

# Fault disks while a replacing vdev is still active
for disk in ${disks[0..$PARITY]}; do
log_must zpool offline -tf ${TESTPOOL} $disk
done

zpool status $TESTPOOL

# Count the faults that succeeded
faults=0
for disk in ${disks[0..$PARITY]}; do
state=$(zpool get -H -o value state ${TESTPOOL} ${disk})
if [ "$state" = "FAULTED" ] ; then
((faults=faults+1))
fi
done

log_must test "$faults" -lt "$PARITY"
log_must test "$faults" -gt 0

log_pass "restricts raidz children vdev fault count"

0 comments on commit 4c639ad

Please sign in to comment.