Skip to content

Commit

Permalink
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
Browse files Browse the repository at this point in the history
6396359 infinite loop due to dangling dbufs (hang on unmount)
  • Loading branch information
ahrens committed Mar 11, 2006
1 parent b9ae6f8 commit c543ec0
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 256 deletions.
5 changes: 5 additions & 0 deletions usr/src/lib/libzpool/common/sys/zfs_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ _NOTE(CONSTCOND) } while (0)
#define DTRACE_PROBE2(a, b, c, d, e) ((void)0)
#endif /* DTRACE_PROBE2 */

#ifdef DTRACE_PROBE3
#undef DTRACE_PROBE3
#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void)0)
#endif /* DTRACE_PROBE3 */

/*
* Threads
*/
Expand Down
3 changes: 2 additions & 1 deletion usr/src/uts/common/fs/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1859,7 +1859,8 @@ arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_byteswap_func_t *swap,
mutex_exit(hash_lock);

ASSERT3U(hdr->b_size, ==, size);
DTRACE_PROBE2(arc__miss, blkptr_t *, bp, uint64_t, size);
DTRACE_PROBE3(arc__miss, blkptr_t *, bp, uint64_t, size,
zbookmark_t *, zb);
atomic_add_64(&arc.misses, 1);

rzio = zio_read(pio, spa, bp, buf->b_data, size,
Expand Down
59 changes: 25 additions & 34 deletions usr/src/uts/common/fs/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -761,14 +761,8 @@ dbuf_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
mutex_exit(&db->db_mtx);
continue;
}
if (db->db_state == DB_READ) {
/* this will be handled in dbuf_read_done() */
db->db_d.db_freed_in_flight = TRUE;
mutex_exit(&db->db_mtx);
continue;
}
if (db->db_state == DB_FILL) {
/* this will be handled in dbuf_rele() */
if (db->db_state == DB_READ || db->db_state == DB_FILL) {
/* will be handled in dbuf_read_done or dbuf_rele */
db->db_d.db_freed_in_flight = TRUE;
mutex_exit(&db->db_mtx);
continue;
Expand Down Expand Up @@ -844,16 +838,12 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
/* XXX does *this* func really need the lock? */
ASSERT(RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock));

if (osize == size)
return;

/*
* This call to dbuf_will_dirty() with the dn_struct_rwlock held
* is OK, because there can be no other references to the db
* when we are changing its size, so no concurrent DB_FILL can
* be happening.
*/
/* Make a copy of the data if necessary */
/*
* XXX we should be doing a dbuf_read, checking the return
* value and returning that up to our callers
Expand All @@ -871,12 +861,10 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
bzero((uint8_t *)buf->b_data + osize, size - osize);

mutex_enter(&db->db_mtx);
/* ASSERT3U(refcount_count(&db->db_holds), ==, 1); */
dbuf_set_data(db, buf);
VERIFY(arc_buf_remove_ref(obuf, db) == 1);
db->db.db_size = size;

/* fix up the dirty info */
if (db->db_level == 0)
db->db_d.db_data_old[tx->tx_txg&TXG_MASK] = buf;
mutex_exit(&db->db_mtx);
Expand Down Expand Up @@ -1234,6 +1222,7 @@ dbuf_clear(dmu_buf_impl_t *db)
{
dnode_t *dn = db->db_dnode;
dmu_buf_impl_t *parent = db->db_parent;
dmu_buf_impl_t *dndb = dn->dn_dbuf;
int dbuf_gone = FALSE;

ASSERT(MUTEX_HELD(&db->db_mtx));
Expand Down Expand Up @@ -1270,7 +1259,7 @@ dbuf_clear(dmu_buf_impl_t *db)
* If this dbuf is referened from an indirect dbuf,
* decrement the ref count on the indirect dbuf.
*/
if (parent && parent != dn->dn_dbuf)
if (parent && parent != dndb)
dbuf_rele(parent, db);
}

Expand Down Expand Up @@ -1305,17 +1294,23 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
return (err);
err = dbuf_read(*parentp, NULL,
(DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL));
if (err == 0) {
*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
(blkid & ((1ULL << epbs) - 1));
if (err) {
dbuf_rele(*parentp, NULL);
*parentp = NULL;
return (err);
}
return (err);
*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
(blkid & ((1ULL << epbs) - 1));
return (0);
} else {
/* the block is referenced from the dnode */
ASSERT3U(level, ==, nlevels-1);
ASSERT(dn->dn_phys->dn_nblkptr == 0 ||
blkid < dn->dn_phys->dn_nblkptr);
*parentp = dn->dn_dbuf;
if (dn->dn_dbuf) {
dbuf_add_ref(dn->dn_dbuf, NULL);
*parentp = dn->dn_dbuf;
}
*bpp = &dn->dn_phys->dn_blkptr[blkid];
return (0);
}
Expand Down Expand Up @@ -1427,18 +1422,9 @@ dbuf_destroy(dmu_buf_impl_t *db)
* remove it from that list.
*/
if (list_link_active(&db->db_link)) {
int need_mutex;

ASSERT(!MUTEX_HELD(&dn->dn_dbufs_mtx));
need_mutex = !MUTEX_HELD(&dn->dn_dbufs_mtx);
if (need_mutex)
mutex_enter(&dn->dn_dbufs_mtx);

/* remove from dn_dbufs */
mutex_enter(&dn->dn_dbufs_mtx);
list_remove(&dn->dn_dbufs, db);

if (need_mutex)
mutex_exit(&dn->dn_dbufs_mtx);
mutex_exit(&dn->dn_dbufs_mtx);

dnode_rele(dn, db);
}
Expand Down Expand Up @@ -1494,7 +1480,7 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid)
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
(ARC_NOWAIT | ARC_PREFETCH), &zb);
}
if (parent && parent != dn->dn_dbuf)
if (parent)
dbuf_rele(parent, NULL);
}
}
Expand Down Expand Up @@ -1522,12 +1508,13 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
blkptr_t *bp = NULL;
int err;

ASSERT3P(parent, ==, NULL);
err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
if (fail_sparse) {
if (err == 0 && bp && BP_IS_HOLE(bp))
err = ENOENT;
if (err) {
if (parent && parent != dn->dn_dbuf)
if (parent)
dbuf_rele(parent, NULL);
return (err);
}
Expand All @@ -1541,6 +1528,10 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
arc_buf_add_ref(db->db_buf, db);
if (db->db_buf->b_data == NULL) {
dbuf_clear(db);
if (parent) {
dbuf_rele(parent, NULL);
parent = NULL;
}
goto top;
}
ASSERT3P(db->db.db_data, ==, db->db_buf->b_data);
Expand Down Expand Up @@ -1572,7 +1563,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
mutex_exit(&db->db_mtx);

/* NOTE: we can't rele the parent until after we drop the db_mtx */
if (parent && parent != dn->dn_dbuf)
if (parent)
dbuf_rele(parent, NULL);

ASSERT3P(db->db_dnode, ==, dn);
Expand Down
2 changes: 1 addition & 1 deletion usr/src/uts/common/fs/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1638,7 +1638,7 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
* we go trundling through the block pointers.
*/
for (i = 0; i < TXG_SIZE; i++) {
if (dn->dn_dirtyblksz[i])
if (list_link_active(&dn->dn_dirty_link[i]))
break;
}
if (i != TXG_SIZE) {
Expand Down
60 changes: 31 additions & 29 deletions usr/src/uts/common/fs/zfs/dmu_objset.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,41 +279,43 @@ void
dmu_objset_evict_dbufs(objset_t *os)
{
objset_impl_t *osi = os->os;
dnode_t *mdn = osi->os_meta_dnode;
dnode_t *dn;
int allzero = B_TRUE;

mutex_enter(&osi->os_lock);

/* process the mdn last, since the other dnodes have holds on it */
list_remove(&osi->os_dnodes, osi->os_meta_dnode);
list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode);

/*
* Each time we process an entry on the list, we first move it
* to the tail so that we don't process it over and over again.
* We use the meta-dnode as a marker: if we make a complete pass
* over the list without finding any work to do, we're done.
* This ensures that we complete in linear time rather than
* quadratic time, as described in detail in bug 1182169.
* Find the first dnode with holds. We have to do this dance
* because dnode_add_ref() only works if you already have a
* hold. If there are no holds then it has no dbufs so OK to
* skip.
*/
mutex_enter(&osi->os_lock);
list_remove(&osi->os_dnodes, mdn);
list_insert_tail(&osi->os_dnodes, mdn);
while ((dn = list_head(&osi->os_dnodes)) != NULL) {
list_remove(&osi->os_dnodes, dn);
list_insert_tail(&osi->os_dnodes, dn);
if (dn == mdn) {
if (allzero)
break;
allzero = B_TRUE;
continue;
}
if (!refcount_is_zero(&dn->dn_holds)) {
allzero = B_FALSE;
dnode_add_ref(dn, FTAG);
mutex_exit(&osi->os_lock);
dnode_evict_dbufs(dn);
dnode_rele(dn, FTAG);
mutex_enter(&osi->os_lock);
}
for (dn = list_head(&osi->os_dnodes);
dn && refcount_is_zero(&dn->dn_holds);
dn = list_next(&osi->os_dnodes, dn))
continue;
if (dn)
dnode_add_ref(dn, FTAG);

while (dn) {
dnode_t *next_dn = dn;

do {
next_dn = list_next(&osi->os_dnodes, next_dn);
} while (next_dn && refcount_is_zero(&next_dn->dn_holds));
if (next_dn)
dnode_add_ref(next_dn, FTAG);

mutex_exit(&osi->os_lock);
dnode_evict_dbufs(dn);
dnode_rele(dn, FTAG);
mutex_enter(&osi->os_lock);
dn = next_dn;
}
mutex_exit(&osi->os_lock);
dnode_evict_dbufs(mdn);
}

void
Expand Down
71 changes: 47 additions & 24 deletions usr/src/uts/common/fs/zfs/dmu_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -331,26 +331,53 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
uint64_t space = 0;
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;

if (dn->dn_datablkshift == 0)
/*
* We don't use any locking to check for dirtyness because it's
* OK if we get stale data -- the dnode may become dirty
* immediately after our check anyway. This is just a means to
* avoid the expensive count when we aren't sure we need it. We
* need to be able to deal with a dirty dnode.
*/
if ((uintptr_t)dn->dn_assigned_tx |
list_link_active(&dn->dn_dirty_link[0]) |
list_link_active(&dn->dn_dirty_link[1]) |
list_link_active(&dn->dn_dirty_link[2]) |
list_link_active(&dn->dn_dirty_link[3]))
return;

/*
* not that the dnode can change, since it isn't dirty, but
* dbuf_hold_impl() wants us to have the struct_rwlock.
* also need it to protect dn_maxblkid.
* the struct_rwlock protects us against dn_phys->dn_nlevels
* changing, in case (against all odds) we manage to dirty &
* sync out the changes after we check for being dirty.
* also, dbuf_hold_impl() wants us to have the struct_rwlock.
*
* It's fine to use dn_datablkshift rather than the dn_phys
* equivalent because if it is changing, maxblkid==0 and we will
* bail.
*/
rw_enter(&dn->dn_struct_rwlock, RW_READER);
blkid = off >> dn->dn_datablkshift;
nblks = (off + len) >> dn->dn_datablkshift;
if (dn->dn_phys->dn_maxblkid == 0) {
if (off == 0 && len >= dn->dn_datablksz) {
blkid = 0;
nblks = 1;
} else {
rw_exit(&dn->dn_struct_rwlock);
return;
}
} else {
blkid = off >> dn->dn_datablkshift;
nblks = (off + len) >> dn->dn_datablkshift;

if (blkid >= dn->dn_maxblkid) {
rw_exit(&dn->dn_struct_rwlock);
return;
}
if (blkid + nblks > dn->dn_maxblkid)
nblks = dn->dn_maxblkid - blkid;
if (blkid >= dn->dn_phys->dn_maxblkid) {
rw_exit(&dn->dn_struct_rwlock);
return;
}
if (blkid + nblks > dn->dn_phys->dn_maxblkid)
nblks = dn->dn_phys->dn_maxblkid - blkid;

/* don't bother after the 100,000 blocks */
nblks = MIN(nblks, 128*1024);
/* don't bother after 128,000 blocks */
nblks = MIN(nblks, 128*1024);
}

if (dn->dn_phys->dn_nlevels == 1) {
int i;
Expand Down Expand Up @@ -399,9 +426,10 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
}
}
dbuf_rele(dbuf, FTAG);
} else {
/* the indirect block is sparse */
ASSERT(err == ENOENT);
}
if (err != 0 && err != ENOENT) {
tx->tx_err = err;
break;
}

blkid += tochk;
Expand All @@ -416,7 +444,7 @@ static void
dmu_tx_hold_free_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
{
uint64_t start, end, i;
int dirty, err, shift;
int err, shift;
zio_t *zio;

/* first block */
Expand Down Expand Up @@ -465,12 +493,7 @@ dmu_tx_hold_free_impl(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
}

dmu_tx_count_dnode(tx, dn);

/* XXX locking */
dirty = dn->dn_dirtyblksz[0] | dn->dn_dirtyblksz[1] |
dn->dn_dirtyblksz[2] | dn->dn_dirtyblksz[3];
if (dn->dn_assigned_tx != NULL && !dirty)
dmu_tx_count_free(tx, dn, off, len);
dmu_tx_count_free(tx, dn, off, len);
}

void
Expand Down
Loading

0 comments on commit c543ec0

Please sign in to comment.