From f660625e7e9fec36a6bf790c6776df9db63fa9c9 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Wed, 28 Aug 2019 16:39:59 -0700 Subject: [PATCH 1/2] Prevent metaslab_sync panic due to spa_final_dirty_txg If a pool enables the SPACEMAP_HISTOGRAM feature shortly before being exported, we can enter a situation that causes a kernel panic. Any metaslabs that are loaded during the final dirty txg and haven't already been condensed will cause metaslab_sync to proceed after the final dirty txg so that the condense can be performed, which there are assertions to prevent. Because of the nature of this issue, there are a number of ways we can enter this state. Rather than try to prevent each of them one by one, potentially missing some edge cases, we instead cut it off at the point of intersection; by preventing metaslab_sync from proceeding if it would only do so to perform a condense and we're past the final dirty txg, we preserve the utility of the existing asserts while preventing this particular issue. Signed-off-by: Paul Dagnelie --- module/zfs/metaslab.c | 10 ++++++++-- tests/runfiles/linux.run | 8 +++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 2f92fffa4ec0..12d63431825f 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -3552,12 +3552,18 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) /* * Normally, we don't want to process a metaslab if there are no * allocations or frees to perform. However, if the metaslab is being - * forced to condense and it's loaded, we need to let it through. + * forced to condense, it's loaded and we're not beyond the final + * dirty txg, we need to let it through. This prevents an issue where + * metaslabs that need to be condensed but were loaded for other + * reasons could cause a panic here. By only checking the txg in that + * branch of the conditional, we preserve the utility of the VERIFY + * statements in all other cases. */ if (range_tree_is_empty(alloctree) && range_tree_is_empty(msp->ms_freeing) && range_tree_is_empty(msp->ms_checkpointing) && - !(msp->ms_loaded && msp->ms_condense_wanted)) + !(msp->ms_loaded && msp->ms_condense_wanted && + txg <= spa_final_dirty_txg(spa))) return; diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index a9ef628bc863..d05ff61202ab 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -481,15 +481,13 @@ tests = ['zpool_trim_attach_detach_add_remove', tags = ['functional', 'zpool_trim'] [tests/functional/cli_root/zpool_upgrade] -tests = ['zpool_upgrade_001_pos', +tests = ['zpool_upgrade_001_pos', 'zpool_upgrade_002_pos', + 'zpool_upgrade_003_pos', 'zpool_upgrade_004_pos', 'zpool_upgrade_005_neg', 'zpool_upgrade_006_neg', + 'zpool_upgrade_007_pos', 'zpool_upgrade_008_pos', 'zpool_upgrade_009_neg'] tags = ['functional', 'cli_root', 'zpool_upgrade'] -# Disabled pending resolution of #9185 and #9186. -# 'zpool_upgrade_002_pos', 'zpool_upgrade_003_pos', 'zpool_upgrade_004_pos', -# 'zpool_upgrade_007_pos', 'zpool_upgrade_008_pos', - [tests/functional/cli_user/misc] tests = ['zdb_001_neg', 'zfs_001_neg', 'zfs_allow_001_neg', 'zfs_clone_001_neg', 'zfs_create_001_neg', 'zfs_destroy_001_neg', From 2d5d5dfa8435ff598206693201cd60a09fbb9a8d Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Thu, 29 Aug 2019 11:23:39 -0700 Subject: [PATCH 2/2] Matt comment feedback Signed-off-by: Paul Dagnelie --- module/zfs/metaslab.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 12d63431825f..d84541650078 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -3553,11 +3553,12 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) * Normally, we don't want to process a metaslab if there are no * allocations or frees to perform. However, if the metaslab is being * forced to condense, it's loaded and we're not beyond the final - * dirty txg, we need to let it through. This prevents an issue where - * metaslabs that need to be condensed but were loaded for other - * reasons could cause a panic here. By only checking the txg in that - * branch of the conditional, we preserve the utility of the VERIFY - * statements in all other cases. + * dirty txg, we need to let it through. Not condensing beyond the + * final dirty txg prevents an issue where metaslabs that need to be + * condensed but were loaded for other reasons could cause a panic + * here. By only checking the txg in that branch of the conditional, + * we preserve the utility of the VERIFY statements in all other + * cases. */ if (range_tree_is_empty(alloctree) && range_tree_is_empty(msp->ms_freeing) &&