Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make rehashing and resizing less generic #282

Merged
merged 3 commits into from
Jul 21, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
333 changes: 213 additions & 120 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ use crate::scopeguard::guard;
use crate::TryReserveError;
#[cfg(feature = "nightly")]
use crate::UnavailableMutError;
use core::hint;
use core::iter::FusedIterator;
use core::marker::PhantomData;
use core::mem;
use core::mem::ManuallyDrop;
#[cfg(feature = "nightly")]
use core::mem::MaybeUninit;
use core::ptr::NonNull;
use core::{hint, ptr};

cfg_if! {
// Use the SSE2 implementation if possible: it allows us to scan 16 buckets
Expand Down Expand Up @@ -359,6 +359,7 @@ impl<T> Bucket<T> {
pub unsafe fn as_mut<'a>(&self) -> &'a mut T {
&mut *self.as_ptr()
}
#[cfg(feature = "raw")]
#[cfg_attr(feature = "inline-more", inline)]
pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) {
self.as_ptr().copy_from_nonoverlapping(other.as_ptr(), 1);
Expand Down Expand Up @@ -682,102 +683,18 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
hasher: impl Fn(&T) -> u64,
fallibility: Fallibility,
) -> Result<(), TryReserveError> {
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
let new_items = match self.table.items.checked_add(additional) {
Some(new_items) => new_items,
None => return Err(fallibility.capacity_overflow()),
};
let full_capacity = bucket_mask_to_capacity(self.table.bucket_mask);
if new_items <= full_capacity / 2 {
// Rehash in-place without re-allocating if we have plenty of spare
// capacity that is locked up due to DELETED entries.
self.rehash_in_place(hasher);
Ok(())
} else {
// Otherwise, conservatively resize to at least the next size up
// to avoid churning deletes into frequent rehashes.
self.resize(
usize::max(new_items, full_capacity + 1),
hasher,
fallibility,
)
}
}

/// Rehashes the contents of the table in place (i.e. without changing the
/// allocation).
///
/// If `hasher` panics then some the table's contents may be lost.
fn rehash_in_place(&mut self, hasher: impl Fn(&T) -> u64) {
unsafe {
// If the hash function panics then properly clean up any elements
// that we haven't rehashed yet. We unfortunately can't preserve the
// element since we lost their hash and have no way of recovering it
// without risking another panic.
self.table.prepare_rehash_in_place();

let mut guard = guard(&mut self.table, move |self_| {
self.table.reserve_rehash_inner(
additional,
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
fallibility,
TableLayout::new::<T>(),
if mem::needs_drop::<T>() {
for i in 0..self_.buckets() {
if *self_.ctrl(i) == DELETED {
self_.set_ctrl(i, EMPTY);
self_.bucket::<T>(i).drop();
self_.items -= 1;
}
}
}
self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items;
});

// At this point, DELETED elements are elements that we haven't
// rehashed yet. Find them and re-insert them at their ideal
// position.
'outer: for i in 0..guard.buckets() {
if *guard.ctrl(i) != DELETED {
continue;
}

'inner: loop {
// Hash the current item
let item = guard.bucket(i);
let hash = hasher(item.as_ref());

// Search for a suitable place to put it
let new_i = guard.find_insert_slot(hash);

// Probing works by scanning through all of the control
// bytes in groups, which may not be aligned to the group
// size. If both the new and old position fall within the
// same unaligned group, then there is no benefit in moving
// it and we can just continue to the next item.
if likely(guard.is_in_same_group(i, new_i, hash)) {
guard.set_ctrl_h2(i, hash);
continue 'outer;
}

// We are moving the current item to a new position. Write
// our H2 to the control byte of the new position.
let prev_ctrl = guard.replace_ctrl_h2(new_i, hash);
if prev_ctrl == EMPTY {
guard.set_ctrl(i, EMPTY);
// If the target slot is empty, simply move the current
// element into the new slot and clear the old control
// byte.
guard.bucket(new_i).copy_from_nonoverlapping(&item);
continue 'outer;
} else {
// If the target slot is occupied, swap the two elements
// and then continue processing the element that we just
// swapped into the old slot.
debug_assert_eq!(prev_ctrl, DELETED);
mem::swap(guard.bucket(new_i).as_mut(), item.as_mut());
continue 'inner;
}
}
}

guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items;
mem::forget(guard);
Some(mem::transmute(ptr::drop_in_place::<T> as unsafe fn(*mut T)))
} else {
None
},
)
}
}

Expand All @@ -790,30 +707,12 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
fallibility: Fallibility,
) -> Result<(), TryReserveError> {
unsafe {
let mut new_table =
self.table
.prepare_resize(TableLayout::new::<T>(), capacity, fallibility)?;

// Copy all elements to the new table.
for item in self.iter() {
// This may panic.
let hash = hasher(item.as_ref());

// We can use a simpler version of insert() here since:
// - there are no DELETED entries.
// - we know there is enough space in the table.
// - all elements are unique.
let (index, _) = new_table.prepare_insert_slot(hash);
new_table.bucket(index).copy_from_nonoverlapping(&item);
}

// We successfully copied all elements without panicking. Now replace
// self with the new table. The old table will have its memory freed but
// the items will not be dropped (since they have been moved into the
// new table).
mem::swap(&mut self.table, &mut new_table);

Ok(())
self.table.resize_inner(
capacity,
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
fallibility,
TableLayout::new::<T>(),
)
}
}

Expand Down Expand Up @@ -1312,6 +1211,14 @@ impl<A: Allocator + Clone> RawTableInner<A> {
Bucket::from_base_index(self.data_end(), index)
}

#[cfg_attr(feature = "inline-more", inline)]
unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function absolutely needs to be #[inline] at minimum, it is not generic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is generic over A, so I figured this should match the inlining of fn bucket. Though I would put #[inline(always)] on both of them (on release mode).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a bunch of small functions which are beneficial to inline. I'll open a PR for those. It seems the compile time benefit of #119 mostly comes from removing inline on get, insert, etc.

debug_assert_ne!(self.bucket_mask, 0);
debug_assert!(index < self.buckets());
let base: *mut u8 = self.data_end().as_ptr();
base.sub((index + 1) * size_of)
}

#[cfg_attr(feature = "inline-more", inline)]
unsafe fn data_end<T>(&self) -> NonNull<T> {
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
Expand Down Expand Up @@ -1457,6 +1364,178 @@ impl<A: Allocator + Clone> RawTableInner<A> {
}))
}

/// Reserves or rehashes to make room for `additional` more elements.
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
#[allow(clippy::inline_always)]
#[inline(always)]
unsafe fn reserve_rehash_inner(
&mut self,
additional: usize,
hasher: &dyn Fn(&mut Self, usize) -> u64,
fallibility: Fallibility,
layout: TableLayout,
drop: Option<fn(*mut u8)>,
) -> Result<(), TryReserveError> {
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
let new_items = match self.items.checked_add(additional) {
Some(new_items) => new_items,
None => return Err(fallibility.capacity_overflow()),
};
let full_capacity = bucket_mask_to_capacity(self.bucket_mask);
if new_items <= full_capacity / 2 {
// Rehash in-place without re-allocating if we have plenty of spare
// capacity that is locked up due to DELETED entries.
self.rehash_in_place(hasher, layout.size, drop);
Ok(())
} else {
// Otherwise, conservatively resize to at least the next size up
// to avoid churning deletes into frequent rehashes.
self.resize_inner(
usize::max(new_items, full_capacity + 1),
hasher,
fallibility,
layout,
)
}
}

/// Allocates a new table of a different size and moves the contents of the
/// current table into it.
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
#[allow(clippy::inline_always)]
#[inline(always)]
unsafe fn resize_inner(
&mut self,
capacity: usize,
hasher: &dyn Fn(&mut Self, usize) -> u64,
fallibility: Fallibility,
layout: TableLayout,
) -> Result<(), TryReserveError> {
let mut new_table = self.prepare_resize(layout, capacity, fallibility)?;

// Copy all elements to the new table.
for i in 0..self.buckets() {
if !is_full(*self.ctrl(i)) {
continue;
}

// This may panic.
let hash = hasher(self, i);

// We can use a simpler version of insert() here since:
// - there are no DELETED entries.
// - we know there is enough space in the table.
// - all elements are unique.
let (index, _) = new_table.prepare_insert_slot(hash);

ptr::copy_nonoverlapping(
self.bucket_ptr(i, layout.size),
new_table.bucket_ptr(index, layout.size),
layout.size,
);
}

// We successfully copied all elements without panicking. Now replace
// self with the new table. The old table will have its memory freed but
// the items will not be dropped (since they have been moved into the
// new table).
mem::swap(self, &mut new_table);

Ok(())
}

/// Rehashes the contents of the table in place (i.e. without changing the
/// allocation).
///
/// If `hasher` panics then some the table's contents may be lost.
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
#[allow(clippy::inline_always)]
#[inline(always)]
unsafe fn rehash_in_place(
&mut self,
hasher: &dyn Fn(&mut Self, usize) -> u64,
size_of: usize,
drop: Option<fn(*mut u8)>,
) {
// If the hash function panics then properly clean up any elements
// that we haven't rehashed yet. We unfortunately can't preserve the
// element since we lost their hash and have no way of recovering it
// without risking another panic.
self.prepare_rehash_in_place();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no reason for prepare_rehash_in_place to be a separate function any more now that rehash_in_place is moved to RawTableInner.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find rehash_in_place to be sufficiently large and complex already, so I don't mind this being in a separate function for code clarity. I'm less sure about prepare_resize staying a separate function though.


let mut guard = guard(self, move |self_| {
if let Some(drop) = drop {
for i in 0..self_.buckets() {
if *self_.ctrl(i) == DELETED {
self_.set_ctrl(i, EMPTY);
drop(self_.bucket_ptr(i, size_of));
self_.items -= 1;
}
}
}
self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items;
});

// At this point, DELETED elements are elements that we haven't
// rehashed yet. Find them and re-insert them at their ideal
// position.
'outer: for i in 0..guard.buckets() {
if *guard.ctrl(i) != DELETED {
continue;
}

let i_p = guard.bucket_ptr(i, size_of);

'inner: loop {
// Hash the current item
let hash = hasher(*guard, i);

// Search for a suitable place to put it
let new_i = guard.find_insert_slot(hash);
let new_i_p = guard.bucket_ptr(new_i, size_of);

// Probing works by scanning through all of the control
// bytes in groups, which may not be aligned to the group
// size. If both the new and old position fall within the
// same unaligned group, then there is no benefit in moving
// it and we can just continue to the next item.
if likely(guard.is_in_same_group(i, new_i, hash)) {
guard.set_ctrl_h2(i, hash);
continue 'outer;
}

// We are moving the current item to a new position. Write
// our H2 to the control byte of the new position.
let prev_ctrl = guard.replace_ctrl_h2(new_i, hash);
if prev_ctrl == EMPTY {
guard.set_ctrl(i, EMPTY);
// If the target slot is empty, simply move the current
// element into the new slot and clear the old control
// byte.
ptr::copy_nonoverlapping(i_p, new_i_p, size_of);
continue 'outer;
} else {
// If the target slot is occupied, swap the two elements
// and then continue processing the element that we just
// swapped into the old slot.
debug_assert_eq!(prev_ctrl, DELETED);
ptr::swap_nonoverlapping(i_p, new_i_p, size_of);
continue 'inner;
}
}
}

guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items;

mem::forget(guard);
}

#[inline]
unsafe fn free_buckets(&mut self, table_layout: TableLayout) {
// Avoid `Option::unwrap_or_else` because it bloats LLVM IR.
Expand Down Expand Up @@ -2281,6 +2360,20 @@ impl<'a, A: Allocator + Clone> Iterator for RawIterHashInner<'a, A> {
mod test_map {
use super::*;

fn rehash_in_place<T>(table: &mut RawTable<T>, hasher: impl Fn(&T) -> u64) {
unsafe {
table.table.rehash_in_place(
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
mem::size_of::<T>(),
if mem::needs_drop::<T>() {
Some(mem::transmute(ptr::drop_in_place::<T> as unsafe fn(*mut T)))
} else {
None
},
);
}
}

#[test]
fn rehash() {
let mut table = RawTable::new();
Expand All @@ -2296,7 +2389,7 @@ mod test_map {
assert!(table.find(i + 100, |x| *x == i + 100).is_none());
}

table.rehash_in_place(hasher);
rehash_in_place(&mut table, hasher);

for i in 0..100 {
unsafe {
Expand Down