From 086204d4f498ef864e47526b5cf1f21d01cb6163 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Thu, 15 Aug 2019 07:44:57 -0700 Subject: [PATCH] Assert that a dnode's bonuslen never exceeds its recorded size This patch introduces an assertion that can catch pitfalls in development where there is a mismatch between the size of reads and writes between a *_phys structure and its respective in-core structure when bonus buffers are used. This debugging-aid should be complementary to the verification done by ztest in ztest_verify_dnode_bt(). A side to this patch is that we now clear out any extra bytes past a bonus buffer's new size when the buffer is shrinking. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Reviewed-by: Tom Caputi Signed-off-by: Serapheim Dimitropoulos Closes #8348 --- module/zfs/dbuf.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ module/zfs/dnode.c | 8 ++++++++ 2 files changed, 52 insertions(+) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 94c49b3ef0a9..6cd7ae9567f5 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -3785,6 +3785,46 @@ dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx) zio_nowait(zio); } +#ifdef ZFS_DEBUG +/* + * Verify that the size of the data in our bonus buffer does not exceed + * its recorded size. + * + * The purpose of this verification is to catch any cases in development + * where the size of a phys structure (i.e space_map_phys_t) grows and, + * due to incorrect feature management, older pools expect to read more + * data even though they didn't actually write it to begin with. + * + * For a example, this would catch an error in the feature logic where we + * open an older pool and we expect to write the space map histogram of + * a space map with size SPACE_MAP_SIZE_V0. + */ +static void +dbuf_sync_leaf_verify_bonus_dnode(dbuf_dirty_record_t *dr) +{ + dnode_t *dn = DB_DNODE(dr->dr_dbuf); + + /* + * Encrypted bonus buffers can have data past their bonuslen. + * Skip the verification of these blocks. + */ + if (DMU_OT_IS_ENCRYPTED(dn->dn_bonustype)) + return; + + uint16_t bonuslen = dn->dn_phys->dn_bonuslen; + uint16_t maxbonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots); + ASSERT3U(bonuslen, <=, maxbonuslen); + + arc_buf_t *datap = dr->dt.dl.dr_data; + char *datap_end = ((char *)datap) + bonuslen; + char *datap_max = ((char *)datap) + maxbonuslen; + + /* ensure that everything is zero after our data */ + for (; datap_end < datap_max; datap_end++) + ASSERT(*datap_end == 0); +} +#endif + /* * dbuf_sync_leaf() is called recursively from dbuf_sync_list() so it is * critical the we not allow the compiler to inline this function in to @@ -3861,6 +3901,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) DN_MAX_BONUS_LEN(dn->dn_phys)); DB_DNODE_EXIT(db); +#ifdef ZFS_DEBUG + dbuf_sync_leaf_verify_bonus_dnode(dr); +#endif + if (*datap != db->db.db_data) { int slots = DB_DNODE(db)->dn_num_slots; int bonuslen = DN_SLOTS_TO_BONUSLEN(slots); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index cc7bc5ec82c8..097eaf3ee6f6 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -389,6 +389,14 @@ dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx) rw_enter(&dn->dn_struct_rwlock, RW_WRITER); ASSERT3U(newsize, <=, DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) - (dn->dn_nblkptr-1) * sizeof (blkptr_t)); + + if (newsize < dn->dn_bonuslen) { + /* clear any data after the end of the new size */ + size_t diff = dn->dn_bonuslen - newsize; + char *data_end = ((char *)dn->dn_bonus->db.db_data) + newsize; + bzero(data_end, diff); + } + dn->dn_bonuslen = newsize; if (newsize == 0) dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN;