From dc47d0c1d29187ca0aac9573da335afd86e073b4 Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Tue, 4 Feb 2014 17:39:47 -0500 Subject: [PATCH] Workaround for pools affected by #2094 erratum ZoL commit 1421c89 unintentionally changed the disk format in a forward- compatible, but not backward compatible way. This was accomplished by adding an entry to zbookmark_t, which is included in a couple of on-disk structures. That lead to the creation of pools with incorrect dsl_scan_phys_t objects that could only be imported by versions of ZoL containing that commit. Such pools cannot be imported by other versions of ZFS or past versions of ZoL. The additional field has been removed by the previous commit. However, affected pools must be imported and scrubbed using a version of ZoL with this commit applied. This will return the pools to a state in which they may be imported by other implementations. The 'zpool import' or 'zpool status' command can be used to determine if a pool is impacted. A message similar to one of the following means your pool must be scrubbed to restore compatibility. $ zpool import pool: zol-0.6.2-173 id: 1165955789558693437 state: ONLINE status: Errata #1 detected. action: The pool can be imported using its name or numeric identifier, however there is a compatibility issue which should be corrected by running 'zpool scrub' see: http://zfsonlinux.org/msg/ZFS-8000-ER config: ... $ zpool status pool: zol-0.6.2-173 state: ONLINE scan: pool compatibility issue detected. see: https://github.com/zfsonlinux/zfs/issues/2094 action: To correct the issue run 'zpool scrub'. config: ... If there was an async destroy in progress 'zpool import' will prevent the pool from being imported. Further advice on how to proceed will be provided by the error message as follows. $ zpool import pool: zol-0.6.2-173 id: 1165955789558693437 state: ONLINE status: Errata #1 detected. action: The pool can not be imported with this version of ZFS due to an active asynchronous destroy. Revert to an earlier version and allow the destroy to complete before updating. see: http://zfsonlinux.org/msg/ZFS-8000-ER config: ... Pools affected by the damaged dsl_scan_phys_t can be detected prior to an upgrade by running the following command as root: zdb -dddd poolname 1 | grep -P '^\t\tscan = ' | sed -e 's;scan = ;;' | wc -w Note that `poolname` must be replaced with the name of the pool you wish to check. A value of 25 indicates the dsl_scan_phys_t has been damaged. A value of 24 indicates that the dsl_scan_phys_t is normal. A value of 0 indicates that there has never been a scrub run on the pool. The regression caused by the change to zbookmark_t never made it into a tagged release or any Gentoo backports. Only those using HEAD were affected. However, this patch has a few limitations. There is no way to detect a damaged dsl_scan_phys_t object when it has occurred on 32-bit systems due to integer rounding that wrote incorrect information, but avoided the overflow on them. Correcting such issues requires triggering a scrub. In addition, bptree_entry_phys_t objects are also affected. These objects only exist during an asynchronous destroy and automating repair of damaged bptree_entry_phys_t objects is non-trivial. Any pools that have been imported by an affected version of ZoL must have all asynchronous destroy operations finish before export and subsequent import by a version containing this commit. Failure to do that will prevent pool import. The presence of any background destroys on any imported pools can be checked by running `zpool get freeing` as root. This will display a non-zero value for any pool with an active asynchronous destroy. Lastly, it is expected that no user data has been lost as a result of this erratum. Original-patch-by: Tim Chase Signed-off-by: Richard Yao Signed-off-by: Brian Behlendorf Issue #2094 --- cmd/zpool/zpool_main.c | 55 +++++++++++++++++++++++++++++++++++--- include/libzfs.h | 7 +++-- include/sys/dsl_scan.h | 2 ++ include/sys/fs/zfs.h | 12 +++++++++ include/sys/spa_impl.h | 1 + lib/libzfs/libzfs_status.c | 21 +++++++++++---- module/zfs/dsl_scan.c | 39 +++++++++++++++++++++++++++ module/zfs/spa.c | 2 ++ module/zfs/spa_config.c | 2 ++ 9 files changed, 130 insertions(+), 11 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index a856fd49e85c..0c0a0e3a6834 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -1608,7 +1608,8 @@ show_import(nvlist_t *config) uint64_t guid; char *msgid; nvlist_t *nvroot; - int reason; + zpool_status_t reason; + zpool_errata_t errata; const char *health; uint_t vsc; int namewidth; @@ -1627,7 +1628,7 @@ show_import(nvlist_t *config) (uint64_t **)&vs, &vsc) == 0); health = zpool_state_to_name(vs->vs_state, vs->vs_aux); - reason = zpool_import_status(config, &msgid); + reason = zpool_import_status(config, &msgid, &errata); (void) printf(gettext(" pool: %s\n"), name); (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); @@ -1715,6 +1716,11 @@ show_import(nvlist_t *config) "resilvered.\n")); break; + case ZPOOL_STATUS_ERRATA: + (void) printf(gettext(" status: Errata #%d detected.\n"), + errata); + break; + default: /* * No other status can be seen when importing pools. @@ -1736,6 +1742,29 @@ show_import(nvlist_t *config) (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " "identifier and\n\tthe '-f' flag.\n")); + } else if (reason == ZPOOL_STATUS_ERRATA) { + switch (errata) { + case ZPOOL_ERRATA_ZOL_2094_SCRUB: + (void) printf(gettext(" action: The pool can " + "be imported using its name or numeric " + "identifier,\n\thowever there is a compat" + "ibility issue which should be corrected" + "\n\tby running 'zpool scrub'\n")); + break; + case ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY: + (void) printf(gettext(" action: The pool can" + "not be imported with this version of ZFS " + "due to\n\tan active asynchronous destroy. " + "Revert to an earlier version\n\tand " + "allow the destroy to complete before " + "updating.\n")); + break; + default: + /* + * All errata must contain an action message. + */ + assert(0); + } } else { (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " @@ -4125,13 +4154,14 @@ status_callback(zpool_handle_t *zhp, void *data) status_cbdata_t *cbp = data; nvlist_t *config, *nvroot; char *msgid; - int reason; + zpool_status_t reason; + zpool_errata_t errata; const char *health; uint_t c; vdev_stat_t *vs; config = zpool_get_config(zhp, NULL); - reason = zpool_get_status(zhp, &msgid); + reason = zpool_get_status(zhp, &msgid, &errata); cbp->cb_count++; @@ -4349,6 +4379,23 @@ status_callback(zpool_handle_t *zhp, void *data) "'zpool clear'.\n")); break; + case ZPOOL_STATUS_ERRATA: + switch (errata) { + case ZPOOL_ERRATA_ZOL_2094_SCRUB: + (void) printf(gettext("status: Errata #%d detected.\n"), + errata); + (void) printf(gettext("action: To correct the issue " + "run 'zpool scrub'.\n")); + break; + default: + /* + * All errata which allow the pool to be imported + * must contain an action message. + */ + assert(0); + } + break; + default: /* * The remaining errors can't actually be generated, yet. diff --git a/include/libzfs.h b/include/libzfs.h index 742f39f944e9..55dd34c99de1 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -312,6 +312,7 @@ typedef enum { ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ + ZPOOL_STATUS_ERRATA, /* informational errata available */ /* * If the pool has unsupported features but can still be opened in @@ -347,8 +348,10 @@ typedef enum { ZPOOL_STATUS_OK } zpool_status_t; -extern zpool_status_t zpool_get_status(zpool_handle_t *, char **); -extern zpool_status_t zpool_import_status(nvlist_t *, char **); +extern zpool_status_t zpool_get_status(zpool_handle_t *, char **, + zpool_errata_t *); +extern zpool_status_t zpool_import_status(nvlist_t *, char **, + zpool_errata_t *); extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh); /* diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index bf8c5ac824a1..bcb85d67d38e 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -72,6 +72,8 @@ typedef enum dsl_scan_flags { DSF_VISIT_DS_AGAIN = 1<<0, } dsl_scan_flags_t; +#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN) + /* * Every pool will have one dsl_scan_t and this structure will contain * in-memory information about the scan and a pointer to the on-disk diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index c54721155a85..df5692d14b1c 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -548,6 +548,7 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_CAN_RDONLY "can_rdonly" /* not stored on disk */ #define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" #define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */ +#define ZPOOL_CONFIG_ERRATA "errata" /* not stored on disk */ /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such @@ -704,6 +705,17 @@ typedef enum dsl_scan_state { DSS_NUM_STATES } dsl_scan_state_t; +/* + * Errata described by http://zfsonlinux.org/msg/ZFS-8000-ER. The ordering + * of this enum must be maintained to ensure the errata identifiers map to + * the correct documentation. New errata may only be appended to the list + * and must contain corresponding documentation at the above link. + */ +typedef enum zpool_errata { + ZPOOL_ERRATA_NONE, + ZPOOL_ERRATA_ZOL_2094_SCRUB, + ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY, +} zpool_errata_t; /* * Vdev statistics. Note: all fields should be 64-bit because this diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 55515c1fc369..90a32d3f0694 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -236,6 +236,7 @@ struct spa { uint64_t spa_deadman_calls; /* number of deadman calls */ hrtime_t spa_sync_starttime; /* starting time of spa_sync */ uint64_t spa_deadman_synctime; /* deadman expiration timer */ + uint64_t spa_errata; /* errata issues detected */ spa_stats_t spa_stats; /* assorted spa statistics */ /* diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c index 0ef5f36d69f3..534ff853a5bc 100644 --- a/lib/libzfs/libzfs_status.c +++ b/lib/libzfs/libzfs_status.c @@ -67,6 +67,7 @@ static char *zfs_msgid_table[] = { "ZFS-8000-HC", "ZFS-8000-JQ", "ZFS-8000-K4", + "ZFS-8000-ER", }; #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) @@ -182,7 +183,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) * only picks the most damaging of all the current errors to report. */ static zpool_status_t -check_status(nvlist_t *config, boolean_t isimport) +check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap) { nvlist_t *nvroot; vdev_stat_t *vs; @@ -193,6 +194,7 @@ check_status(nvlist_t *config, boolean_t isimport) uint64_t stateval; uint64_t suspended; uint64_t hostid = 0; + uint64_t errata = 0; unsigned long system_hostid = gethostid() & 0xffffffff; verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, @@ -356,13 +358,22 @@ check_status(nvlist_t *config, boolean_t isimport) } } + /* + * Informational errata available. + */ + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata); + if (errata) { + *erratap = errata; + return (ZPOOL_STATUS_ERRATA); + } + return (ZPOOL_STATUS_OK); } zpool_status_t -zpool_get_status(zpool_handle_t *zhp, char **msgid) +zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata) { - zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); + zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata); if (ret >= NMSGID) *msgid = NULL; @@ -373,9 +384,9 @@ zpool_get_status(zpool_handle_t *zhp, char **msgid) } zpool_status_t -zpool_import_status(nvlist_t *config, char **msgid) +zpool_import_status(nvlist_t *config, char **msgid, zpool_errata_t *errata) { - zpool_status_t ret = check_status(config, B_TRUE); + zpool_status_t ret = check_status(config, B_TRUE, errata); if (ret >= NMSGID) *msgid = NULL; diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index ea04507813f7..7807f8485a86 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -123,6 +123,42 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys); + /* + * Detect if the pool contains the signature of #2094. If it + * does properly update the scn->scn_phys structure and notify + * the administrator by setting an errata for the pool. + */ + if (err == EOVERFLOW) { + uint64_t zaptmp[SCAN_PHYS_NUMINTS + 1]; + VERIFY3S(SCAN_PHYS_NUMINTS, ==, 24); + VERIFY3S(offsetof(dsl_scan_phys_t, scn_flags), ==, + (23 * sizeof (uint64_t))); + + err = zap_lookup(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, + sizeof (uint64_t), SCAN_PHYS_NUMINTS + 1, &zaptmp); + if (err == 0) { + uint64_t overflow = zaptmp[SCAN_PHYS_NUMINTS]; + + if (overflow & ~DSL_SCAN_FLAGS_MASK || + scn->scn_async_destroying) { + spa->spa_errata = + ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY; + return (EOVERFLOW); + } + + bcopy(zaptmp, &scn->scn_phys, + SCAN_PHYS_NUMINTS * sizeof (uint64_t)); + scn->scn_phys.scn_flags = overflow; + + /* Required scrub already in progress. */ + if (scn->scn_phys.scn_state == DSS_FINISHED || + scn->scn_phys.scn_state == DSS_CANCELED) + spa->spa_errata = + ZPOOL_ERRATA_ZOL_2094_SCRUB; + } + } + if (err == ENOENT) return (0); else if (err) @@ -319,6 +355,9 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) } scn->scn_phys.scn_end_time = gethrestime_sec(); + + if (spa->spa_errata == ZPOOL_ERRATA_ZOL_2094_SCRUB) + spa->spa_errata = 0; } /* ARGSUSED */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 7052eec4abab..9e7a7b785a32 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -4083,6 +4083,8 @@ spa_tryimport(nvlist_t *tryconfig) spa->spa_uberblock.ub_timestamp) == 0); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, + spa->spa_errata) == 0); /* * If the bootfs property exists on this pool then we diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 9efa053616bc..5b95a8e811e5 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -365,6 +365,8 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, + spa->spa_errata) == 0); VERIFY(spa->spa_comment == NULL || nvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);