Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deadlock between spa_errlog_lock and dp_config_rwlock #14289

Merged
merged 1 commit into from
Dec 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 6 additions & 26 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -8599,37 +8599,17 @@ status_callback(zpool_handle_t *zhp, void *data)

if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
&nerr) == 0) {
nvlist_t *nverrlist = NULL;

/*
* If the approximate error count is small, get a
* precise count by fetching the entire log and
* uniquifying the results.
*/
if (nerr > 0 && nerr < 100 && !cbp->cb_verbose &&
zpool_get_errlog(zhp, &nverrlist) == 0) {
nvpair_t *elem;

elem = NULL;
nerr = 0;
while ((elem = nvlist_next_nvpair(nverrlist,
elem)) != NULL) {
nerr++;
}
}
nvlist_free(nverrlist);

(void) printf("\n");

if (nerr == 0)
(void) printf(gettext("errors: No known data "
"errors\n"));
else if (!cbp->cb_verbose)
if (nerr == 0) {
(void) printf(gettext(
"errors: No known data errors\n"));
} else if (!cbp->cb_verbose) {
(void) printf(gettext("errors: %llu data "
"errors, use '-v' for a list\n"),
(u_longlong_t)nerr);
else
} else {
print_error_log(zhp);
}
}

if (cbp->cb_dedup_stats)
Expand Down
2 changes: 1 addition & 1 deletion cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -6313,7 +6313,7 @@ ztest_scrub_impl(spa_t *spa)
while (dsl_scan_scrubbing(spa_get_dsl(spa)))
txg_wait_synced(spa_get_dsl(spa), 0);

if (spa_get_errlog_size(spa) > 0)
if (spa_approx_errlog_size(spa) > 0)
return (ECKSUM);

ztest_pool_scrubbed = B_TRUE;
Expand Down
2 changes: 1 addition & 1 deletion include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,7 @@ extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern uint64_t spa_approx_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
Expand Down
46 changes: 20 additions & 26 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -4133,33 +4133,28 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
{
zfs_cmd_t zc = {"\0"};
libzfs_handle_t *hdl = zhp->zpool_hdl;
uint64_t count;
zbookmark_phys_t *zb = NULL;
int i;
zbookmark_phys_t *buf;
uint64_t buflen = 10000; /* approx. 1MB of RAM */

if (fnvlist_lookup_uint64(zhp->zpool_config,
ZPOOL_CONFIG_ERRCOUNT) == 0)
return (0);

/*
* Retrieve the raw error list from the kernel. If the number of errors
* has increased, allocate more space and continue until we get the
* entire list.
* Retrieve the raw error list from the kernel. If it doesn't fit,
* allocate a larger buffer and retry.
*/
count = fnvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT);
if (count == 0)
return (0);
zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
count * sizeof (zbookmark_phys_t));
zc.zc_nvlist_dst_size = count;
(void) strcpy(zc.zc_name, zhp->zpool_name);
for (;;) {
buf = zfs_alloc(zhp->zpool_hdl,
buflen * sizeof (zbookmark_phys_t));
zc.zc_nvlist_dst = (uintptr_t)buf;
zc.zc_nvlist_dst_size = buflen;
if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_ERROR_LOG,
&zc) != 0) {
free((void *)(uintptr_t)zc.zc_nvlist_dst);
free(buf);
if (errno == ENOMEM) {
void *dst;

count = zc.zc_nvlist_dst_size;
dst = zfs_alloc(zhp->zpool_hdl, count *
sizeof (zbookmark_phys_t));
zc.zc_nvlist_dst = (uintptr_t)dst;
buflen *= 2;
} else {
return (zpool_standard_error_fmt(hdl, errno,
dgettext(TEXT_DOMAIN, "errors: List of "
Expand All @@ -4177,18 +4172,17 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
* _not_ copied as part of the process. So we point the start of our
* array appropriate and decrement the total number of elements.
*/
zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
zc.zc_nvlist_dst_size;
count -= zc.zc_nvlist_dst_size;
zbookmark_phys_t *zb = buf + zc.zc_nvlist_dst_size;
uint64_t zblen = buflen - zc.zc_nvlist_dst_size;

qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
qsort(zb, zblen, sizeof (zbookmark_phys_t), zbookmark_mem_compare);

verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);

/*
* Fill in the nverrlistp with nvlist's of dataset and object numbers.
*/
for (i = 0; i < count; i++) {
for (uint64_t i = 0; i < zblen; i++) {
nvlist_t *nv;

/* ignoring zb_blkid and zb_level for now */
Expand All @@ -4215,11 +4209,11 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
nvlist_free(nv);
}

free((void *)(uintptr_t)zc.zc_nvlist_dst);
free(buf);
return (0);

nomem:
free((void *)(uintptr_t)zc.zc_nvlist_dst);
free(buf);
ahrens marked this conversation as resolved.
Show resolved Hide resolved
return (no_memory(zhp->zpool_hdl));
}

Expand Down
2 changes: 1 addition & 1 deletion lib/libzfs/libzfs_status.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ check_status(nvlist_t *config, boolean_t isimport,
{
pool_scan_stat_t *ps = NULL;
uint_t vsc, psc;
uint64_t nerr;
uint64_t suspended;
uint64_t hostid = 0;
uint64_t errata = 0;
Expand Down Expand Up @@ -392,6 +391,7 @@ check_status(nvlist_t *config, boolean_t isimport,
* Persistent data errors.
*/
if (!isimport) {
uint64_t nerr;
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
&nerr) == 0 && nerr != 0)
return (ZPOOL_STATUS_CORRUPT_DATA);
Expand Down
8 changes: 4 additions & 4 deletions module/zfs/dsl_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -944,13 +944,13 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)

if (dsl_scan_restarting(scn, tx))
spa_history_log_internal(spa, "scan aborted, restarting", tx,
"errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));
else if (!complete)
spa_history_log_internal(spa, "scan cancelled", tx,
"errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));
else
spa_history_log_internal(spa, "scan done", tx,
"errors=%llu", (u_longlong_t)spa_get_errlog_size(spa));
"errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa));

if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
spa->spa_scrub_active = B_FALSE;
Expand Down Expand Up @@ -1013,7 +1013,7 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) {
spa_history_log_internal(spa,
"starting deferred resilver", tx, "errors=%llu",
(u_longlong_t)spa_get_errlog_size(spa));
(u_longlong_t)spa_approx_errlog_size(spa));
spa_async_request(spa, SPA_ASYNC_RESILVER);
}

Expand Down
2 changes: 1 addition & 1 deletion module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -5543,7 +5543,7 @@ spa_get_stats(const char *name, nvlist_t **config,

fnvlist_add_uint64(*config,
ZPOOL_CONFIG_ERRCOUNT,
spa_get_errlog_size(spa));
spa_approx_errlog_size(spa));

if (spa_suspended(spa)) {
fnvlist_add_uint64(*config,
Expand Down
Loading