Skip to content

Commit

Permalink
Introduce til::linear_flat_set
Browse files Browse the repository at this point in the history
  • Loading branch information
lhecker committed Apr 3, 2023
1 parent 0105807 commit 567550a
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .github/actions/spelling/expect/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,7 @@ Mip
MMBB
mmcc
MMCPL
MMIX
mmsystem
MNC
MNOPQ
Expand Down Expand Up @@ -2287,6 +2288,7 @@ YCENTER
YCount
YDPI
YOffset
ype
YSubstantial
YVIRTUALSCREEN
YWalk
Expand Down
196 changes: 196 additions & 0 deletions src/inc/til/flat_set.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#pragma once

#pragma warning(push)
#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4).
#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique<T> instead (r.11).

namespace til
{
// A simple hash function for simple hash maps.
// As demonstrated in https://doi.org/10.14778/2850583.2850585, a simple "multiply and shift" hash performs
// very well with linear probing hash maps and I found this to be true as well in my own testing. This hash
// function doesn't do the "shift" part, because linear_flat_set already does it by an appropriate amount.
constexpr size_t flat_set_hash_integer(size_t v) noexcept
{
// These two multipliers are the same as used by the PCG family of random number generators.
// The 32-Bit version is described in https://doi.org/10.1090/S0025-5718-99-00996-5, Table 5.
// The 64-Bit version is the multiplier as used by Donald Knuth for MMIX and found by C. E. Haynes.
#ifdef _WIN64
return v * UINT64_C(6364136223846793005);
#else
return v * UINT32_C(747796405);
#endif
}

template<typename T>
struct flat_set_trait;

// This is an example implementation for a linear_flat_set that can store any size_t != -1.
// Apart from this trait, the only other thing the type T has to implement is a copy or move assignment operator.
template<>
struct flat_set_trait<size_t>
{
using T = size_t;

static constexpr size_t hash(T v) noexcept
{
return flat_set_hash_integer(v);
}

// Return true if the key and existing slot in the hashmap match.
static constexpr bool equals(T slot, T key)
{
return slot == key;
}

// Return true if this slot can be filled with a new item.
static constexpr bool empty(T slot)
{
return slot == -1;
}

// Called when a new item is inserted into the hashmap.
// T::operator=(T&&) is called when the map is resized and existing items must be moved over.
static constexpr void fill(T& slot, T key)
{
slot = key;
}

// Called when a new backing buffer is allocated. You need to then initialize the raw memory.
static std::unique_ptr<T[]> allocate(size_t capacity)
{
return std::unique_ptr<T[]>{ new T[capacity]{ static_cast<size_t>(-1) } };
}

static void clear(T* data, size_t capacity) noexcept
{
for (auto& slot : std::span{ data, capacity })
{
slot = static_cast<size_t>(-1);
}
}
};

// A basic, hashmap with linear probing. A `LoadFactor` of 2 equals
// a max. load of roughly 50% and a `LoadFactor` of 4 roughly 25%.
//
// It performs best with:
// * small and cheap T
// * >= 50% successful lookups
// * <= 50% load factor (LoadFactor >= 2, which is the minimum anyways)
template<typename T, size_t LoadFactor = 2>
struct linear_flat_set
{
using Trait = typename flat_set_trait<T>;

static_assert(LoadFactor >= 2);

bool empty() const noexcept
{
return _load == 0;
}

size_t load() const noexcept
{
return _load;
}

size_t size() const noexcept
{
return _load / LoadFactor;
}

template<typename U>
std::pair<T&, bool> insert(U&& key)
{
// Putting this into the lookup path is a little pessimistic, but it
// allows us to default-construct this hashmap with a size of 0.
if (_load >= _capacity) [[unlikely]]
{
_bumpSize();
}

// The most common, basic and performant hash function is to multiply the value
// by some prime number and divide by the number of slots. It's been shown
// many times in literature that such a scheme performs the best on average.
// As such, we perform the divide her to get the topmost bits down.
// See flat_set_hash_integer.
const auto hash = Trait::hash(key) >> _shift;

for (auto i = hash;; ++i)
{
auto& slot = _map[i & _mask];
if (Trait::empty(slot))
{
Trait::fill(slot, std::forward<U>(key));
_load += LoadFactor;
return { slot, true };
}
if (Trait::equals(slot, key)) [[likely]]
{
return { slot, false };
}
}
}

void clear() noexcept
{
Trait::clear(_map.get(), _capacity);
_load = 0;
}

private:
__declspec(noinline) void _bumpSize()
{
if (!_shift)
{
throw std::bad_array_new_length{};
}

const auto newShift = _shift - 1;
const auto newCapacity = size_t{ 1 } << (digits - newShift);
const auto newMask = newCapacity - 1;
auto newMap = Trait::allocate(newCapacity);

// This mirrors the insert() function, but without the lookup part.
for (auto& oldSlot : std::span{ _map.get(), _capacity })
{
if (Trait::empty(oldSlot))
{
continue;
}

const auto hash = Trait::hash(oldSlot) >> newShift;

for (auto i = hash;; ++i)
{
auto& slot = newMap[i & newMask];
if (Trait::empty(slot))
{
slot = std::move_if_noexcept(oldSlot);
break;
}
}
}

_map = std::move(newMap);
_capacity = newCapacity;
_shift = newShift;
_mask = newMask;
}

static constexpr auto digits = std::numeric_limits<size_t>::digits;

std::unique_ptr<T[]> _map;
size_t _capacity = 0;
size_t _load = 0;
// This results in an initial capacity of 8 items, independent of the LoadFactor.
size_t _shift = digits - LoadFactor - 1;
size_t _mask = 0;
};
}

#pragma warning(pop)
5 changes: 4 additions & 1 deletion src/til/ut_til/til.unit.tests.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@
<ClInclude Include="..\..\inc\til\atomic.h" />
<ClInclude Include="..\..\inc\til\bit.h" />
<ClInclude Include="..\..\inc\til\bitmap.h" />
<ClInclude Include="..\..\inc\til\bytes.h" />
<ClInclude Include="..\..\inc\til\coalesce.h" />
<ClInclude Include="..\..\inc\til\color.h" />
<ClInclude Include="..\..\inc\til\enumset.h" />
<ClInclude Include="..\..\inc\til\env.h" />
<ClInclude Include="..\..\inc\til\flat_set.h" />
<ClInclude Include="..\..\inc\til\hash.h" />
<ClInclude Include="..\..\inc\til\latch.h" />
<ClInclude Include="..\..\inc\til\math.h" />
Expand All @@ -66,6 +68,7 @@
<ClInclude Include="..\..\inc\til\string.h" />
<ClInclude Include="..\..\inc\til\throttled_func.h" />
<ClInclude Include="..\..\inc\til\ticket_lock.h" />
<ClInclude Include="..\..\inc\til\type_traits.h" />
<ClInclude Include="..\..\inc\til\u8u16convert.h" />
<ClInclude Include="..\..\inc\til\unicode.h" />
<ClInclude Include="..\precomp.h" />
Expand All @@ -85,4 +88,4 @@
<Import Project="$(SolutionDir)src\common.build.post.props" />
<Import Project="$(SolutionDir)src\common.build.tests.props" />
<Import Project="$(SolutionDir)src\common.nugetversions.targets" />
</Project>
</Project>
11 changes: 10 additions & 1 deletion src/til/ut_til/til.unit.tests.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,19 @@
<ClInclude Include="..\..\inc\til\unicode.h">
<Filter>inc</Filter>
</ClInclude>
<ClInclude Include="..\..\inc\til\bytes.h">
<Filter>inc</Filter>
</ClInclude>
<ClInclude Include="..\..\inc\til\flat_set.h">
<Filter>inc</Filter>
</ClInclude>
<ClInclude Include="..\..\inc\til\type_traits.h">
<Filter>inc</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="inc">
<UniqueIdentifier>{7cf29ba4-d33d-4c3b-82e3-ab73e5a79685}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>
</Project>
10 changes: 10 additions & 0 deletions tools/ConsoleTypes.natvis
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,14 @@
<Item Name="[ptr]">_ptr</Item>
</Expand>
</Type>

<Type Name="til::linear_flat_set&lt;*,*&gt;">
<DisplayString>{{ size={_load / $T2} }}</DisplayString>
<Expand>
<ArrayItems>
<Size>_capacity</Size>
<ValuePointer>_map._Mypair._Myval2</ValuePointer>
</ArrayItems>
</Expand>
</Type>
</AutoVisualizer>

0 comments on commit 567550a

Please sign in to comment.