Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

btree: Reduce opportunities for branch mispredictions in binary search #14866

Merged
merged 1 commit into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
int err;
struct sublivelist_verify *sv = args;

zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare,
zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL,
sizeof (sublivelist_verify_block_refcnt_t));

err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
Expand Down Expand Up @@ -390,7 +390,7 @@ sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
{
(void) args;
sublivelist_verify_t sv;
zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
sizeof (sublivelist_verify_block_t));
int err = sublivelist_verify_func(&sv, dle);
zfs_btree_clear(&sv.sv_leftover);
Expand Down Expand Up @@ -682,7 +682,7 @@ livelist_metaslab_validate(spa_t *spa)
(void) printf("Verifying deleted livelist entries\n");

sublivelist_verify_t sv;
zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
sizeof (sublivelist_verify_block_t));
iterate_deleted_livelists(spa, livelist_verify, &sv);

Expand Down Expand Up @@ -716,7 +716,7 @@ livelist_metaslab_validate(spa_t *spa)
mv.mv_start = m->ms_start;
mv.mv_end = m->ms_start + m->ms_size;
zfs_btree_create(&mv.mv_livelist_allocs,
livelist_block_compare,
livelist_block_compare, NULL,
sizeof (sublivelist_verify_block_t));

mv_populate_livelist_allocs(&mv, &sv);
Expand Down
66 changes: 62 additions & 4 deletions include/sys/btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,13 @@ typedef struct zfs_btree_index {
boolean_t bti_before;
} zfs_btree_index_t;

typedef struct btree {
typedef struct btree zfs_btree_t;
typedef void * (*bt_find_in_buf_f) (zfs_btree_t *, uint8_t *, uint32_t,
const void *, zfs_btree_index_t *);

struct btree {
int (*bt_compar) (const void *, const void *);
bt_find_in_buf_f bt_find_in_buf;
size_t bt_elem_size;
size_t bt_leaf_size;
uint32_t bt_leaf_cap;
Expand All @@ -115,7 +120,54 @@ typedef struct btree {
uint64_t bt_num_nodes;
zfs_btree_hdr_t *bt_root;
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
} zfs_btree_t;
};

/*
* Implementation of Shar's algorithm designed to accelerate binary search by
* eliminating impossible to predict branches.
*
* For optimality, this should be used to generate the search function in the
* same file as the comparator and the comparator should be marked
* `__attribute__((always_inline) inline` so that the compiler will inline it.
*
* Arguments are:
*
* NAME - The function name for this instance of the search function. Use it
* in a subsequent call to zfs_btree_create().
* T - The element type stored inside the B-Tree.
* COMP - A comparator to compare two nodes, it must return exactly: -1, 0,
* or +1 -1 for <, 0 for ==, and +1 for >. For trivial comparisons,
* TREE_CMP() from avl.h can be used in a boilerplate function.
*/
ryao marked this conversation as resolved.
Show resolved Hide resolved
/* BEGIN CSTYLED */
#define ZFS_BTREE_FIND_IN_BUF_FUNC(NAME, T, COMP) \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") \
static void * \
NAME(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems, \
const void *value, zfs_btree_index_t *where) \
{ \
T *i = (T *)buf; \
(void) tree; \
_Pragma("GCC unroll 9") \
while (nelems > 1) { \
uint32_t half = nelems / 2; \
nelems -= half; \
i += (COMP(&i[half - 1], value) < 0) * half; \
} \
\
int comp = COMP(i, value); \
where->bti_offset = (i - (T *)buf) + (comp < 0); \
where->bti_before = (comp != 0); \
\
if (comp == 0) { \
return (i); \
} \
\
return (NULL); \
} \
_Pragma("GCC diagnostic pop")
/* END CSTYLED */

/*
* Allocate and deallocate caches for btree nodes.
Expand All @@ -129,13 +181,19 @@ void zfs_btree_fini(void);
* tree - the tree to be initialized
* compar - function to compare two nodes, it must return exactly: -1, 0, or +1
* -1 for <, 0 for ==, and +1 for >
* find - optional function to accelerate searches inside B-Tree nodes
* through Shar's algorithm and comparator inlining. Setting this to
* NULL will use a generic function. The function should be created
* using ZFS_BTREE_FIND_IN_BUF_FUNC() in the same file as compar.
* compar should be marked `__attribute__((always_inline)) inline` or
* performance is unlikely to improve very much.
* size - the value of sizeof(struct my_type)
* lsize - custom leaf size
*/
void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
size_t);
bt_find_in_buf_f, size_t);
void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *),
size_t, size_t);
bt_find_in_buf_f, size_t, size_t);

/*
* Find a node with a matching value in the tree. Returns the matching node
Expand Down
14 changes: 14 additions & 0 deletions module/Kbuild.in
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ ifeq ($(CONFIG_KASAN),y)
ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
endif

# Generated binary search code is particularly bad with this optimization.
# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
# is not affected when unrolling is done.
# Disable it until the following upstream issue is resolved:
# https://github.com/llvm/llvm-project/issues/62790
ifeq ($(CONFIG_X86),y)
ifeq ($(CONFIG_CC_IS_CLANG),y)
CFLAGS_zfs/dsl_scan.o += -mllvm -x86-cmov-converter=false
CFLAGS_zfs/metaslab.o += -mllvm -x86-cmov-converter=false
CFLAGS_zfs/range_tree.o += -mllvm -x86-cmov-converter=false
CFLAGS_zfs/zap_micro.o += -mllvm -x86-cmov-converter=false
endif
endif

ifneq ($(KBUILD_EXTMOD),)
@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
Expand Down
14 changes: 14 additions & 0 deletions module/Makefile.bsd
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,20 @@ beforeinstall:

.include <bsd.kmod.mk>

# Generated binary search code is particularly bad with this optimization.
# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
# is not affected when unrolling is done.
# Disable it until the following upstream issue is resolved:
# https://github.com/llvm/llvm-project/issues/62790
.if ${CC} == "clang"
ryao marked this conversation as resolved.
Show resolved Hide resolved
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "amd64"
CFLAGS.dsl_scan.c= -mllvm -x86-cmov-converter=false
CFLAGS.metaslab.c= -mllvm -x86-cmov-converter=false
CFLAGS.range_tree.c= -mllvm -x86-cmov-converter=false
CFLAGS.zap_micro.c= -mllvm -x86-cmov-converter=false
.endif
.endif

CFLAGS.sysctl_os.c= -include ../zfs_config.h
CFLAGS.xxhash.c+= -include ${SYSDIR}/sys/_null.h

Expand Down
22 changes: 15 additions & 7 deletions module/zfs/btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,21 +193,29 @@ zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr)

void
zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
size_t size)
bt_find_in_buf_f bt_find_in_buf, size_t size)
{
zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
zfs_btree_create_custom(tree, compar, bt_find_in_buf, size,
BTREE_LEAF_SIZE);
}

static void *
zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,
const void *value, zfs_btree_index_t *where);

void
zfs_btree_create_custom(zfs_btree_t *tree,
int (*compar) (const void *, const void *),
bt_find_in_buf_f bt_find_in_buf,
size_t size, size_t lsize)
{
size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);

ASSERT3U(size, <=, esize / 2);
memset(tree, 0, sizeof (*tree));
tree->bt_compar = compar;
tree->bt_find_in_buf = (bt_find_in_buf == NULL) ?
zfs_btree_find_in_buf : bt_find_in_buf;
tree->bt_elem_size = size;
tree->bt_leaf_size = lsize;
tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
Expand Down Expand Up @@ -303,7 +311,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
* element in the last leaf, it's in the last leaf or
* it's not in the tree.
*/
void *d = zfs_btree_find_in_buf(tree,
void *d = tree->bt_find_in_buf(tree,
last_leaf->btl_elems +
last_leaf->btl_hdr.bth_first * size,
last_leaf->btl_hdr.bth_count, value, &idx);
Expand All @@ -327,7 +335,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
ASSERT3P(node, !=, NULL);
void *d = zfs_btree_find_in_buf(tree, node->btc_elems,
void *d = tree->bt_find_in_buf(tree, node->btc_elems,
node->btc_hdr.bth_count, value, &idx);
EQUIV(d != NULL, !idx.bti_before);
if (d != NULL) {
Expand All @@ -347,7 +355,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
*/
zfs_btree_leaf_t *leaf = (depth == 0 ?
(zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
void *d = tree->bt_find_in_buf(tree, leaf->btl_elems +
leaf->btl_hdr.bth_first * size,
leaf->btl_hdr.bth_count, value, &idx);

Expand Down Expand Up @@ -671,7 +679,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
zfs_btree_index_t idx;
ASSERT(zfs_btree_is_core(par_hdr));
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
par_hdr->bth_count, buf, &idx), ==, NULL);
ASSERT(idx.bti_before);
uint32_t offset = idx.bti_offset;
Expand Down Expand Up @@ -897,7 +905,7 @@ zfs_btree_find_parent_idx(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
}
zfs_btree_index_t idx;
zfs_btree_core_t *parent = hdr->bth_parent;
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
ASSERT(idx.bti_before);
ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);
Expand Down
7 changes: 6 additions & 1 deletion module/zfs/dsl_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -4877,6 +4877,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
* with single operation. Plus it makes scrubs more sequential and reduces
* chances that minor extent change move it within the B-tree.
*/
__attribute__((always_inline)) inline
static int
ext_size_compare(const void *x, const void *y)
{
Expand All @@ -4885,13 +4886,17 @@ ext_size_compare(const void *x, const void *y)
return (TREE_CMP(*a, *b));
}

ZFS_BTREE_FIND_IN_BUF_FUNC(ext_size_find_in_buf, uint64_t,
ext_size_compare)

static void
ext_size_create(range_tree_t *rt, void *arg)
{
(void) rt;
zfs_btree_t *size_tree = arg;

zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
zfs_btree_create(size_tree, ext_size_compare, ext_size_find_in_buf,
sizeof (uint64_t));
}

static void
Expand Down
23 changes: 16 additions & 7 deletions module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,7 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
* Comparison function for the private size-ordered tree using 32-bit
* ranges. Tree is sorted by size, larger sizes at the end of the tree.
*/
__attribute__((always_inline)) inline
static int
metaslab_rangesize32_compare(const void *x1, const void *x2)
{
Expand All @@ -1352,16 +1353,15 @@ metaslab_rangesize32_compare(const void *x1, const void *x2)
uint64_t rs_size2 = r2->rs_end - r2->rs_start;

int cmp = TREE_CMP(rs_size1, rs_size2);
if (likely(cmp))
return (cmp);

return (TREE_CMP(r1->rs_start, r2->rs_start));
return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
}

/*
* Comparison function for the private size-ordered tree using 64-bit
* ranges. Tree is sorted by size, larger sizes at the end of the tree.
*/
__attribute__((always_inline)) inline
static int
metaslab_rangesize64_compare(const void *x1, const void *x2)
{
Expand All @@ -1372,11 +1372,10 @@ metaslab_rangesize64_compare(const void *x1, const void *x2)
uint64_t rs_size2 = r2->rs_end - r2->rs_start;

int cmp = TREE_CMP(rs_size1, rs_size2);
if (likely(cmp))
return (cmp);

return (TREE_CMP(r1->rs_start, r2->rs_start));
return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
}

typedef struct metaslab_rt_arg {
zfs_btree_t *mra_bt;
uint32_t mra_floor_shift;
Expand Down Expand Up @@ -1412,6 +1411,13 @@ metaslab_size_tree_full_load(range_tree_t *rt)
range_tree_walk(rt, metaslab_size_sorted_add, &arg);
}


ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize32_in_buf,
range_seg32_t, metaslab_rangesize32_compare)

ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize64_in_buf,
range_seg64_t, metaslab_rangesize64_compare)

/*
* Create any block allocator specific components. The current allocators
* rely on using both a size-ordered range_tree_t and an array of uint64_t's.
Expand All @@ -1424,19 +1430,22 @@ metaslab_rt_create(range_tree_t *rt, void *arg)

size_t size;
int (*compare) (const void *, const void *);
bt_find_in_buf_f bt_find;
switch (rt->rt_type) {
case RANGE_SEG32:
size = sizeof (range_seg32_t);
compare = metaslab_rangesize32_compare;
bt_find = metaslab_rt_find_rangesize32_in_buf;
break;
case RANGE_SEG64:
size = sizeof (range_seg64_t);
compare = metaslab_rangesize64_compare;
bt_find = metaslab_rt_find_rangesize64_in_buf;
break;
default:
panic("Invalid range seg type %d", rt->rt_type);
}
zfs_btree_create(size_tree, compare, size);
zfs_btree_create(size_tree, compare, bt_find, size);
mrap->mra_floor_shift = metaslab_by_size_min_shift;
}

Expand Down
Loading