diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index e22a3c676b5..80cb7a963c0 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -1124,6 +1124,7 @@ Mip MMBB mmcc MMCPL +MMIX mmsystem MNC MNOPQ diff --git a/src/inc/til/flat_set.h b/src/inc/til/flat_set.h new file mode 100644 index 00000000000..1a243639470 --- /dev/null +++ b/src/inc/til/flat_set.h @@ -0,0 +1,141 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#pragma warning(push) +#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique instead (r.11). + +namespace til +{ + // A simple hash function for simple hash maps. + // As demonstrated in https://doi.org/10.14778/2850583.2850585, a simple "multiply and shift" hash performs + // very well with linear probing hash maps and I found this to be true as well in my own testing. This hash + // function doesn't do the "shift" part, because linear_flat_set already does it by an appropriate amount. + constexpr size_t flat_set_hash_integer(size_t v) noexcept + { + // These two multipliers are the same as used by the PCG family of random number generators. + // The 32-Bit version is described in https://doi.org/10.1090/S0025-5718-99-00996-5, Table 5. + // The 64-Bit version is the multiplier as used by Donald Knuth for MMIX and found by C. E. Haynes. +#ifdef _WIN64 + return v * UINT64_C(6364136223846793005); +#else + return v * UINT32_C(747796405); +#endif + } + + // A basic, hashmap with linear probing. A `LoadFactor` of 2 equals + // a max. load of roughly 50% and a `LoadFactor` of 4 roughly 25%. + // + // It performs best with: + // * small and cheap T + // * >= 50% successful lookups + // * <= 50% load factor (LoadFactor >= 2, which is the minimum anyways) + template + struct linear_flat_set + { + static_assert(LoadFactor >= 2); + + bool empty() const noexcept + { + return _load == 0; + } + + size_t size() const noexcept + { + return _load / LoadFactor; + } + + std::span container() const noexcept + { + return { _map.get(), _capacity }; + } + + template + std::pair insert(U&& key) + { + // Putting this into the lookup path is a little pessimistic, but it + // allows us to default-construct this hashmap with a size of 0. + if (_load >= _capacity) [[unlikely]] + { + _bumpSize(); + } + + // The most common, basic and performant hash function is to multiply the value + // by some prime number and divide by the number of slots. It's been shown + // many times in literature that such a scheme performs the best on average. + // As such, we perform the divide here to get the topmost bits down. + // See flat_set_hash_integer. + const auto hash = ::std::hash{}(key) >> _shift; + + for (auto i = hash;; ++i) + { + auto& slot = _map[i & _mask]; + if (!slot) + { + slot = std::forward(key); + _load += LoadFactor; + return { slot, true }; + } + if (slot == key) [[likely]] + { + return { slot, false }; + } + } + } + + private: + __declspec(noinline) void _bumpSize() + { + // A _shift of 0 would result in a newShift of 0xfffff... + // A _shift of 1 would result in a newCapacity of 0 + if (_shift < 2) + { + throw std::bad_array_new_length{}; + } + + const auto newShift = _shift - 1; + const auto newCapacity = size_t{ 1 } << (digits - newShift); + const auto newMask = newCapacity - 1; + auto newMap = std::make_unique(newCapacity); + + // This mirrors the insert() function, but without the lookup part. + for (auto& oldSlot : container()) + { + if (!oldSlot) + { + continue; + } + + const auto hash = ::std::hash{}(oldSlot) >> newShift; + + for (auto i = hash;; ++i) + { + auto& slot = newMap[i & newMask]; + if (!slot) + { + slot = std::move_if_noexcept(oldSlot); + break; + } + } + } + + _map = std::move(newMap); + _capacity = newCapacity; + _shift = newShift; + _mask = newMask; + } + + static constexpr auto digits = std::numeric_limits::digits; + + std::unique_ptr _map; + size_t _capacity = 0; + size_t _load = 0; + // This results in an initial capacity of 8 items, independent of the LoadFactor. + size_t _shift = digits - LoadFactor - 1; + size_t _mask = 0; + }; +} + +#pragma warning(pop) diff --git a/src/til/ut_til/FlatSetTests.cpp b/src/til/ut_til/FlatSetTests.cpp new file mode 100644 index 00000000000..43e4c5fb2be --- /dev/null +++ b/src/til/ut_til/FlatSetTests.cpp @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#include "precomp.h" + +#include + +using namespace WEX::Common; +using namespace WEX::Logging; +using namespace WEX::TestExecution; + +struct Data +{ + static constexpr auto emptyMarker = std::numeric_limits::max(); + + constexpr operator bool() const noexcept + { + return value != emptyMarker; + } + + constexpr bool operator==(int key) const noexcept + { + return value == static_cast(key); + } + + constexpr Data& operator=(int key) noexcept + { + value = static_cast(key); + return *this; + } + + size_t value = emptyMarker; +}; + +template<> +struct ::std::hash +{ + constexpr size_t operator()(int key) const noexcept + { + return til::flat_set_hash_integer(static_cast(key)); + } + + constexpr size_t operator()(Data d) const noexcept + { + return til::flat_set_hash_integer(d.value); + } +}; + +class FlatSetTests +{ + TEST_CLASS(FlatSetTests); + + TEST_METHOD(Basic) + { + til::linear_flat_set set; + + // This simultaneously demonstrates how the class can't just do "heterogeneous lookups" + // like STL does, but also insert items with a different type. + const auto [entry1, inserted1] = set.insert(123); + VERIFY_IS_TRUE(inserted1); + + const auto [entry2, inserted2] = set.insert(123); + VERIFY_IS_FALSE(inserted2); + + VERIFY_ARE_EQUAL(&entry1, &entry2); + VERIFY_ARE_EQUAL(123u, entry2.value); + } +}; diff --git a/src/til/ut_til/til.unit.tests.vcxproj b/src/til/ut_til/til.unit.tests.vcxproj index 824bf40bd4d..04694e68a16 100644 --- a/src/til/ut_til/til.unit.tests.vcxproj +++ b/src/til/ut_til/til.unit.tests.vcxproj @@ -20,6 +20,7 @@ + diff --git a/src/til/ut_til/til.unit.tests.vcxproj.filters b/src/til/ut_til/til.unit.tests.vcxproj.filters index 545bc412f55..c25b0113980 100644 --- a/src/til/ut_til/til.unit.tests.vcxproj.filters +++ b/src/til/ut_til/til.unit.tests.vcxproj.filters @@ -29,6 +29,7 @@ + diff --git a/tools/ConsoleTypes.natvis b/tools/ConsoleTypes.natvis index 803bf5d3337..de0c91ffd23 100644 --- a/tools/ConsoleTypes.natvis +++ b/tools/ConsoleTypes.natvis @@ -112,4 +112,14 @@ _value + + + {{ size={_load / $T2} }} + + + _capacity + _map._Mypair._Myval2 + + +