Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement efficient clearing of the Hashtable #106

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions include/fixed_containers/fixed_doubly_linked_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct LinkedListIndices
template <typename T, std::size_t MAXIMUM_SIZE, typename IndexType = std::size_t>
class FixedDoublyLinkedListBase
{
protected:
static_assert(MAXIMUM_SIZE + 1 <= std::numeric_limits<IndexType>::max(),
"must be able to index MAXIMUM_SIZE+1 elements with IndexType");
using StorageType = FixedIndexBasedPoolStorage<T, MAXIMUM_SIZE>;
Expand Down Expand Up @@ -52,11 +53,6 @@ class FixedDoublyLinkedListBase
}
[[nodiscard]] constexpr bool full() const noexcept { return storage().full(); }

constexpr void clear() noexcept
{
delete_range_and_return_next_index(front_index(), MAXIMUM_SIZE);
}

constexpr const T& at(const IndexType i) const { return storage().at(i); }
constexpr T& at(const IndexType i) { return storage().at(i); }

Expand Down Expand Up @@ -227,6 +223,11 @@ class FixedDoublyLinkedList : public FixedDoublyLinkedListBase<T, MAXIMUM_SIZE,
return *this;
}

constexpr void clear() noexcept
{
this->delete_range_and_return_next_index(this->front_index(), MAXIMUM_SIZE);
}

constexpr ~FixedDoublyLinkedList() noexcept { this->clear(); }
};

Expand All @@ -240,6 +241,22 @@ class FixedDoublyLinkedList<T, MAXIMUM_SIZE, IndexType>
// clang-format off
constexpr FixedDoublyLinkedList() noexcept : Base() { }
// clang-format on

constexpr void clear() noexcept
{
// Instead of iterating over the elements of the linked list (slow), just reset the backing
// storage
std::construct_at(&(this->IMPLEMENTATION_DETAIL_DO_NOT_USE_storage_));

// And reset the start/end sentinel to point at itself.
// The remaining links of the linked list will be overwritten as elements are allocated, so
// we don't have to reset the entire chain_ array
this->next_of(MAXIMUM_SIZE) = MAXIMUM_SIZE;
this->prev_of(MAXIMUM_SIZE) = MAXIMUM_SIZE;

// Finally, set the size back to 0
this->IMPLEMENTATION_DETAIL_DO_NOT_USE_size_ = 0;
}
};

} // namespace fixed_containers::fixed_doubly_linked_list_detail::specializations
Expand Down
50 changes: 48 additions & 2 deletions include/fixed_containers/fixed_robinhood_hashtable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,17 @@ class FixedRobinhoodHashtable

Hash IMPLEMENTATION_DETAIL_DO_NOT_USE_hash_{};
KeyEqual IMPLEMENTATION_DETAIL_DO_NOT_USE_key_equal_{};

// this stores the Key, Value pairs that we actually need to store. The LinkedList is what
// provides a stable iteration order.
fixed_doubly_linked_list_detail::FixedDoublyLinkedList<PairType, CAPACITY, SizeType>
IMPLEMENTATION_DETAIL_DO_NOT_USE_value_storage_{};
// this stores a "backlink" from a given Value array index to the bucket index that points at it
std::array<SizeType, CAPACITY> IMPLEMENTATION_DETAIL_DO_NOT_USE_value_index_to_bucket_index_{};

// this is the array that is indexed by the hash of the key. Each "bucket" contains an index to
// a slot in the `value_storage_`, as well as a `dist_and_fingerprint` that is needed to
// implement robinhood hashing
std::array<Bucket, INTERNAL_TABLE_SIZE> IMPLEMENTATION_DETAIL_DO_NOT_USE_bucket_array_{};

struct OpaqueIndexType
Expand All @@ -149,6 +158,16 @@ class FixedRobinhoodHashtable
return IMPLEMENTATION_DETAIL_DO_NOT_USE_bucket_array_[idx];
}

[[nodiscard]] constexpr SizeType& bucket_for_value_index(SizeType value_idx)
{
return IMPLEMENTATION_DETAIL_DO_NOT_USE_value_index_to_bucket_index_[value_idx];
}

[[nodiscard]] constexpr const SizeType& bucket_for_value_index(SizeType value_idx) const
{
return IMPLEMENTATION_DETAIL_DO_NOT_USE_value_index_to_bucket_index_[value_idx];
}

template <typename Key>
[[nodiscard]] constexpr std::uint64_t hash(const Key& k) const
{
Expand Down Expand Up @@ -190,10 +209,20 @@ class FixedRobinhoodHashtable
// until we hit an empty one
while (0 != bucket_at(table_loc).dist_and_fingerprint_)
{
// update the backlink of the value pointed to by the bucket we're about to put in
// table_loc
bucket_for_value_index(bucket.value_index_) = table_loc;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is actually quite a lot of extra work that is required for every single insertion and deletion. We expect there to be a lot of buckets that need to be moved, and now each of those moves also requires touching this separate array.

// put `bucket` in `table_loc` and then assign `bucket` to whatever used to be in
// `table_loc`
bucket = std::exchange(bucket_at(table_loc), bucket);
// increment the distance of the thing we just evicted, it will be placed one slot over
// at the top of this loop
bucket = bucket.plus_dist();
// go to the next table slot
table_loc = next_bucket_index(table_loc);
}
// update the backlink of the value pointed to by the bucket we're about to put in table_loc
bucket_for_value_index(bucket.value_index_) = table_loc;
bucket_at(table_loc) = bucket;
}

Expand All @@ -205,9 +234,18 @@ class FixedRobinhoodHashtable
SizeType next_loc = next_bucket_index(table_loc);
while (bucket_at(next_loc).dist_and_fingerprint_ >= Bucket::DIST_INC * 2)
{
// overwrite the bucket at table_loc with the bucket at next_loc (accounting for the
// change in distance)
bucket_at(table_loc) = bucket_at(next_loc).minus_dist();

// update the backlink for the shifted element
bucket_for_value_index(bucket_at(table_loc).value_index_) = table_loc;

// shift both table_loc and next_loc forward one index
table_loc = std::exchange(next_loc, next_bucket_index(next_loc));
}
// zero out the thing we're pointed at now, it was copied back one (or it is the thing we
// wanted to delete)
bucket_at(table_loc) = {};
}

Expand Down Expand Up @@ -349,13 +387,21 @@ class FixedRobinhoodHashtable
SizeType cur_index = start_value_index;
while (cur_index != end_value_index)
{
cur_index = erase(opaque_index_of(key_at(cur_index)));
OpaqueIndexType i = {bucket_for_value_index(cur_index), 0};
cur_index = erase(i);
}

return end_value_index;
}

constexpr void clear() { erase_range(begin_index(), end_index()); }
constexpr void clear()
{
// reset the backing linked list
IMPLEMENTATION_DETAIL_DO_NOT_USE_value_storage_.clear();

// reset the bucket array
IMPLEMENTATION_DETAIL_DO_NOT_USE_bucket_array_.fill({});
}

public:
constexpr FixedRobinhoodHashtable() = default;
Expand Down
Loading
Loading