Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ZED: Match added disk by pool/vdev GUID if found #12217

Merged
merged 1 commit into from
Jun 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions cmd/zed/agents/zfs_mod.c
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,27 @@ devid_iter(const char *devid, zfs_process_func_t func, boolean_t is_slice)
return (data.dd_found);
}

/*
* Given a device guid, find any vdevs with a matching guid.
*/
static boolean_t
guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
zfs_process_func_t func, boolean_t is_slice)
{
dev_data_t data = { 0 };

data.dd_func = func;
data.dd_found = B_FALSE;
data.dd_pool_guid = pool_guid;
data.dd_vdev_guid = vdev_guid;
data.dd_islabeled = is_slice;
data.dd_new_devid = devid;

(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);

return (data.dd_found);
}

/*
* Handle a EC_DEV_ADD.ESC_DISK event.
*
Expand All @@ -663,15 +684,18 @@ static int
zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
{
char *devpath = NULL, *devid;
uint64_t pool_guid = 0, vdev_guid = 0;
boolean_t is_slice;

/*
* Expecting a devid string and an optional physical location
* Expecting a devid string and an optional physical location and guid
*/
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
return (-1);

(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);

is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);

Expand All @@ -682,12 +706,16 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
* Iterate over all vdevs looking for a match in the following order:
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
*
* For disks, we only want to pay attention to vdevs marked as whole
* disks or are a multipath device.
* 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
*/
if (!devid_iter(devid, zfs_process_add, is_slice) && devpath != NULL)
(void) devphys_iter(devpath, devid, zfs_process_add, is_slice);
if (devid_iter(devid, zfs_process_add, is_slice))
return (0);
if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add,
is_slice))
return (0);
if (vdev_guid != 0)
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
is_slice);

return (0);
}
Expand Down
2 changes: 2 additions & 0 deletions cmd/zed/zed_disk_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
if (nvlist_lookup_boolean(nvl, DEV_IS_PART) == B_TRUE)
zed_log_msg(LOG_INFO, "\t%s: B_TRUE", DEV_IS_PART);
if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
Expand Down
9 changes: 5 additions & 4 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,11 @@ tests = ['fallocate_prealloc', 'fallocate_punch-hole']
tags = ['functional', 'fallocate']

[tests/functional/fault:Linux]
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_multiple',
'auto_spare_ashift', 'auto_spare_shared', 'decrypt_fault',
'decompress_fault', 'scrub_after_resilver', 'zpool_status_-s']
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
'zpool_status_-s']
tags = ['functional', 'fault']

[tests/functional/features/large_dnode:Linux]
Expand Down
1 change: 1 addition & 0 deletions tests/test-runner/bin/zts-report.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ if os.environ.get('CI') == 'true':
'cli_root/zpool_split/zpool_split_wholedisk': ['SKIP', ci_reason],
'fault/auto_offline_001_pos': ['SKIP', ci_reason],
'fault/auto_online_001_pos': ['SKIP', ci_reason],
'fault/auto_online_002_pos': ['SKIP', ci_reason],
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
'fault/auto_spare_ashift': ['SKIP', ci_reason],
'fault/auto_spare_shared': ['SKIP', ci_reason],
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/fault/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ dist_pkgdata_SCRIPTS = \
cleanup.ksh \
auto_offline_001_pos.ksh \
auto_online_001_pos.ksh \
auto_online_002_pos.ksh \
auto_replace_001_pos.ksh \
auto_spare_001_pos.ksh \
auto_spare_002_pos.ksh \
Expand Down
94 changes: 94 additions & 0 deletions tests/zfs-tests/tests/functional/fault/auto_online_002_pos.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
# Copyright (c) 2019 by Delphix. All rights reserved.
# Portions Copyright 2021 iXsystems, Inc.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg

#
# DESCRIPTION:
# Testing Fault Management Agent ZED Logic - Automated Auto-Online Test.
# Now with partitioned vdevs.
#
# STRATEGY:
# 1. Partition a scsi_debug device for simulating removal
# 2. Create a pool
# 3. Offline disk
# 4. ZED polls for an event change for online disk to be automatically
# added back to the pool.
#
verify_runnable "both"

function cleanup
{
poolexists ${TESTPOOL} && destroy_pool ${TESTPOOL}
unload_scsi_debug
}

log_assert "Testing automated auto-online FMA test with partitioned vdev"

log_onexit cleanup

load_scsi_debug ${SDSIZE} ${SDHOSTS} ${SDTGTS} ${SDLUNS} '512b'
SDDEVICE=$(get_debug_device)
zpool labelclear -f ${SDDEVICE}
partition_disk ${SDSIZE} ${SDDEVICE} 1
part=${SDDEVICE}1
host=$(get_scsi_host ${SDDEVICE})

block_device_wait /dev/${part}
log_must zpool create -f ${TESTPOOL} raidz1 ${part} ${DISKS}

# Add some data to the pool
log_must mkfile ${FSIZE} /${TESTPOOL}/data

remove_disk ${SDDEVICE}
check_state ${TESTPOOL} "" "degraded" || \
log_fail "${TESTPOOL} is not degraded"

# Clear zpool events
log_must zpool events -c

# Online disk
insert_disk ${SDDEVICE} ${host}

log_note "Delay for ZED auto-online"
typeset -i timeout=0
until is_pool_resilvered ${TESTPOOL}; do
if ((timeout++ == MAXTIMEOUT)); then
log_fail "Timeout occurred"
fi
sleep 1
done
log_note "Auto-online of ${SDDEVICE} is complete"

# Validate auto-online was successful
sleep 1
check_state ${TESTPOOL} "" "online" || \
log_fail "${TESTPOOL} is not back online"

log_must zpool destroy ${TESTPOOL}

log_pass "Auto-online with partitioned vdev test successful"