Skip to content

Commit

Permalink
Address review feedback
Browse files Browse the repository at this point in the history
also allow clear of suspended mmp pool in debug builds if
it passes the spa_activity_check()

Signed-off-by: Don Brady <don.brady@klarasystems.com>
  • Loading branch information
don-brady committed Feb 8, 2024
1 parent 0757ab9 commit 80a9c3b
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 40 deletions.
2 changes: 2 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,8 @@ extern uint32_t spa_get_hostid(spa_t *spa);
extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
extern boolean_t spa_livelist_delete_check(spa_t *spa);

extern boolean_t spa_mmp_remote_host_active(spa_t *spa);

extern spa_mode_t spa_mode(spa_t *spa);
extern uint64_t zfs_strtonum(const char *str, char **nptr);

Expand Down
39 changes: 32 additions & 7 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -3600,7 +3600,8 @@ spa_activity_check_duration(spa_t *spa, uberblock_t *ub)
* we detected activity then fail.
*/
static int
spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config,
boolean_t importing)
{
uint64_t txg = ub->ub_txg;
uint64_t timestamp = ub->ub_timestamp;
Expand Down Expand Up @@ -3645,19 +3646,23 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)

import_expire = gethrtime() + import_delay;

spa_import_progress_set_notes(spa, "Checking MMP activity, waiting "
"%llu ms", (u_longlong_t)NSEC2MSEC(import_delay));
if (importing) {
spa_import_progress_set_notes(spa, "Checking MMP activity, "
"waiting %llu ms", (u_longlong_t)NSEC2MSEC(import_delay));
}

int interations = 0;
while ((now = gethrtime()) < import_expire) {
if (interations++ % 30 == 0) {
if (importing && interations++ % 30 == 0) {
spa_import_progress_set_notes(spa, "Checking MMP "
"activity, %llu ms remaining",
(u_longlong_t)NSEC2MSEC(import_expire - now));
}

(void) spa_import_progress_set_mmp_check(spa_guid(spa),
NSEC2SEC(import_expire - gethrtime()));
if (importing) {
(void) spa_import_progress_set_mmp_check(spa_guid(spa),
NSEC2SEC(import_expire - gethrtime()));
}

vdev_uberblock_load(rvd, ub, &mmp_label);

Expand Down Expand Up @@ -3739,6 +3744,25 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
return (error);
}

/*
* Called from zfs_ioc_clear for a pool that was suspended after failing
* mmp write checks.
*/
boolean_t
spa_mmp_remote_host_active(spa_t *spa)
{
#ifdef ZFS_DEBUG
ASSERT(spa_multihost(spa) && spa_suspended(spa));
cmn_err(CE_WARN, "spa_mmp_remote_host_active: checking '%s'",
spa_name(spa));

return (spa_activity_check(spa, &spa->spa_uberblock, spa->spa_config,
B_FALSE) != 0);
#else
return (B_TRUE);
#endif
}

static int
spa_verify_host(spa_t *spa, nvlist_t *mos_config)
{
Expand Down Expand Up @@ -4065,7 +4089,8 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO));
}

int error = spa_activity_check(spa, ub, spa->spa_config);
int error =
spa_activity_check(spa, ub, spa->spa_config, B_TRUE);
if (error) {
nvlist_free(label);
return (error);
Expand Down
24 changes: 4 additions & 20 deletions module/zfs/zfs_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -5763,24 +5763,6 @@ zfs_ioc_error_log(zfs_cmd_t *zc)
return (error);
}

/*
* Is this a test pool?
*
* Used by ZTS functional/mmp/mmp_write_slow_disk to tear down a
* suspended mmp pool.
*/
static boolean_t
spa_is_test_pool(spa_t *spa)
{
if (strstr(spa_name(spa), "test") == NULL)
return (B_FALSE);

uint64_t size = metaslab_class_get_space(spa_normal_class(spa));
size += metaslab_class_get_space(spa_embedded_log_class(spa));

return (size <= 1024 * 1024 * 1024);
}

static int
zfs_ioc_clear(zfs_cmd_t *zc)
{
Expand Down Expand Up @@ -5833,10 +5815,12 @@ zfs_ioc_clear(zfs_cmd_t *zc)

/*
* If multihost is enabled, resuming I/O is unsafe as another
* host may have imported the pool. (allowed for mini test pool)
* host may have imported the pool.
*/
if (spa_multihost(spa) && spa_suspended(spa) && !spa_is_test_pool(spa))
if (spa_multihost(spa) && spa_suspended(spa) &&
spa_mmp_remote_host_active(spa)) {
return (SET_ERROR(EINVAL));
}

spa_vdev_state_enter(spa, SCL_NONE);

Expand Down
21 changes: 8 additions & 13 deletions tests/zfs-tests/tests/functional/mmp/mmp_write_slow_disk.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
# Verify that long VDEV probes do not cause MMP checks to suspend pool
# Note: without PR-15839 fix, this test will suspend the pool.
#
# A device that is returning unexpeced errors will trigger a vdev_probe.
# A device that is returning unexpected errors will trigger a vdev_probe.
# When the device additionally has slow response times, the probe can hold
# the spa config loack as a writer for a long period of time such that the
# mmp uberblock updates stall when trying to aquire the spa config lock.
# the spa config lock as a writer for a long period of time such that the
# mmp uberblock updates stall when trying to acquire the spa config lock.
#
# STRATEGY:
# 1. Create a pool with multiple leaf vdevs
Expand All @@ -40,29 +40,25 @@

verify_runnable "both"

# Note pool name must contain 'test' to allow pool clear to resume pool io
MMP_POOL="test-mmp-pool"

function cleanup
{
log_must zinject -c all

if [[ $(zpool list -H -o health $MMP_POOL) == "SUSPENDED" ]]; then
# Note this clear works for suspended small test pools
log_must zpool clear $MMP_POOL
zpool get state $MMP_POOL $MMP_DIR/file.3
zpool events | grep ".fs.zfs." | grep -v "history_event"
fi

log_must zpool destroy $MMP_POOL
poolexists $MMP_POOL && destroy_pool $MMP_POOL
log_must rm -r $MMP_DIR
log_must mmp_clear_hostid
}

log_assert "A long VDEV probe doesn't cause a MMP check suspend"
log_onexit cleanup

MMP_HISTORY=/proc/spl/kstat/zfs/$MMP_POOL/multihost
MMP_HISTORY_URL=/proc/spl/kstat/zfs/$MMP_POOL/multihost

# Create a multiple drive pool
log_must zpool events -c
Expand All @@ -75,8 +71,7 @@ log_must zpool create -f $MMP_POOL \
# Enable MMP
log_must mmp_set_hostid $HOSTID1
log_must zpool set multihost=on $MMP_POOL
set_tunable64 MULTIHOST_HISTORY 0
set_tunable64 MULTIHOST_HISTORY 40
clear_mmp_history

# Inject vdev write error along with a delay
log_must zinject -f 33 -e io -L pad2 -T write -d $MMP_DIR/file.3 $MMP_POOL
Expand All @@ -85,11 +80,11 @@ log_must zinject -D 2000:4 -T write -d $MMP_DIR/file.3 $MMP_POOL

log_must dd if=/dev/urandom of=/$MMP_POOL/data bs=1M count=5
sleep 10
log_must zpool sync
sync_pool $MMP_POOL

# Confirm mmp writes to the non-slow disks have taken place
for x in {0,1,2,4}; do
write_count=$(grep -c file.${x} $MMP_HISTORY)
write_count=$(grep -c file.${x} $MMP_HISTORY_URL)
[[ $write_count -gt 0 ]] || log_fail "expecting mmp writes"
done

Expand Down

0 comments on commit 80a9c3b

Please sign in to comment.