Skip to content

Commit

Permalink
Revert "Reduce dbuf_find() lock contention"
Browse files Browse the repository at this point in the history
This reverts commit 34dbc61.  While this
change resolved the lock contention observed for certain workloads, it
inadventantly reduced the maximum hash inserts/removes per second.  This
appears to be due to the slightly higher acquisition cost of a rwlock vs
a mutex.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
  • Loading branch information
behlendorf authored and tonyhutter committed Sep 21, 2022
1 parent b66f8d3 commit 91e0215
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 18 deletions.
7 changes: 4 additions & 3 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,13 @@ typedef struct dmu_buf_impl {
uint8_t db_dirtycnt;
} dmu_buf_impl_t;

#define DBUF_RWLOCKS 8192
#define DBUF_HASH_RWLOCK(h, idx) (&(h)->hash_rwlocks[(idx) & (DBUF_RWLOCKS-1)])
/* Note: the dbuf hash table is exposed only for the mdb module */
#define DBUF_MUTEXES 2048
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
typedef struct dbuf_hash_table {
uint64_t hash_table_mask;
dmu_buf_impl_t **hash_table;
krwlock_t hash_rwlocks[DBUF_RWLOCKS] ____cacheline_aligned;
kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
} dbuf_hash_table_t;

typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t);
Expand Down
26 changes: 13 additions & 13 deletions module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -339,18 +339,18 @@ dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid)
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;

rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_READER);
mutex_enter(DBUF_HASH_MUTEX(h, idx));
for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) {
if (DBUF_EQUAL(db, os, obj, level, blkid)) {
mutex_enter(&db->db_mtx);
if (db->db_state != DB_EVICTING) {
rw_exit(DBUF_HASH_RWLOCK(h, idx));
mutex_exit(DBUF_HASH_MUTEX(h, idx));
return (db);
}
mutex_exit(&db->db_mtx);
}
}
rw_exit(DBUF_HASH_RWLOCK(h, idx));
mutex_exit(DBUF_HASH_MUTEX(h, idx));
return (NULL);
}

Expand Down Expand Up @@ -393,13 +393,13 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;

rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
mutex_enter(DBUF_HASH_MUTEX(h, idx));
for (dbf = h->hash_table[idx], i = 0; dbf != NULL;
dbf = dbf->db_hash_next, i++) {
if (DBUF_EQUAL(dbf, os, obj, level, blkid)) {
mutex_enter(&dbf->db_mtx);
if (dbf->db_state != DB_EVICTING) {
rw_exit(DBUF_HASH_RWLOCK(h, idx));
mutex_exit(DBUF_HASH_MUTEX(h, idx));
return (dbf);
}
mutex_exit(&dbf->db_mtx);
Expand All @@ -417,7 +417,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
mutex_enter(&db->db_mtx);
db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db;
rw_exit(DBUF_HASH_RWLOCK(h, idx));
mutex_exit(DBUF_HASH_MUTEX(h, idx));
uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
DBUF_STAT_MAX(hash_elements_max, he);

Expand Down Expand Up @@ -474,13 +474,13 @@ dbuf_hash_remove(dmu_buf_impl_t *db)

/*
* We mustn't hold db_mtx to maintain lock ordering:
* DBUF_HASH_RWLOCK > db_mtx.
* DBUF_HASH_MUTEX > db_mtx.
*/
ASSERT(zfs_refcount_is_zero(&db->db_holds));
ASSERT(db->db_state == DB_EVICTING);
ASSERT(!MUTEX_HELD(&db->db_mtx));

rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER);
mutex_enter(DBUF_HASH_MUTEX(h, idx));
dbp = &h->hash_table[idx];
while ((dbf = *dbp) != db) {
dbp = &dbf->db_hash_next;
Expand All @@ -491,7 +491,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
if (h->hash_table[idx] &&
h->hash_table[idx]->db_hash_next == NULL)
DBUF_STAT_BUMPDOWN(hash_chains);
rw_exit(DBUF_HASH_RWLOCK(h, idx));
mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
}

Expand Down Expand Up @@ -914,8 +914,8 @@ dbuf_init(void)
sizeof (dmu_buf_impl_t),
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);

for (i = 0; i < DBUF_RWLOCKS; i++)
rw_init(&h->hash_rwlocks[i], NULL, RW_DEFAULT, NULL);
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);

dbuf_stats_init(h);

Expand Down Expand Up @@ -981,8 +981,8 @@ dbuf_fini(void)

dbuf_stats_destroy();

for (i = 0; i < DBUF_RWLOCKS; i++)
rw_destroy(&h->hash_rwlocks[i]);
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
#if defined(_KERNEL)
/*
* Large allocations which do not require contiguous pages
Expand Down
4 changes: 2 additions & 2 deletions module/zfs/dbuf_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
if (size)
buf[0] = 0;

rw_enter(DBUF_HASH_RWLOCK(h, dsh->idx), RW_READER);
mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
/*
* Returning ENOMEM will cause the data and header functions
Expand All @@ -158,7 +158,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data)

mutex_exit(&db->db_mtx);
}
rw_exit(DBUF_HASH_RWLOCK(h, dsh->idx));
mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));

return (error);
}
Expand Down

0 comments on commit 91e0215

Please sign in to comment.