Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster optimized frozen dictionary creation (6/6) #88093

Merged
merged 8 commits into from
Jul 5, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,10 @@ public void ICollection_Generic_Contains_ValidValueOnCollectionContainingThatVal
public void ICollection_Generic_Contains_DefaultValueOnCollectionNotContainingDefaultValue(int count)
{
ICollection<T> collection = GenericICollectionFactory(count);
if (DefaultValueAllowed)
if (DefaultValueAllowed && default(T) is null) // it's true only for reference types and for Nullable<T>
{
Assert.False(collection.Contains(default(T)));
}
}

[Theory]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ public void IDictionary_Generic_ItemGet_MissingNonDefaultKey_ThrowsKeyNotFoundEx
[MemberData(nameof(ValidCollectionSizes))]
public void IDictionary_Generic_ItemGet_MissingDefaultKey_ThrowsKeyNotFoundException(int count)
{
if (DefaultValueAllowed)
if (DefaultValueAllowed && !IsReadOnly)
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
{
IDictionary<TKey, TValue> dictionary = GenericIDictionaryFactory(count);
TKey missingKey = default(TKey);
Expand Down Expand Up @@ -733,11 +733,14 @@ public void IDictionary_Generic_ContainsKey_DefaultKeyNotContainedInDictionary(i
IDictionary<TKey, TValue> dictionary = GenericIDictionaryFactory(count);
if (DefaultValueAllowed)
{
// returns false
TKey missingKey = default(TKey);
while (dictionary.ContainsKey(missingKey))
dictionary.Remove(missingKey);
Assert.False(dictionary.ContainsKey(missingKey));
if (!IsReadOnly)
{
// returns false
TKey missingKey = default(TKey);
while (dictionary.ContainsKey(missingKey))
dictionary.Remove(missingKey);
Assert.False(dictionary.ContainsKey(missingKey));
}
}
else
{
Expand Down Expand Up @@ -934,10 +937,13 @@ public void IDictionary_Generic_TryGetValue_DefaultKeyNotContainedInDictionary(i
TValue outValue;
if (DefaultValueAllowed)
{
TKey missingKey = default(TKey);
while (dictionary.ContainsKey(missingKey))
dictionary.Remove(missingKey);
Assert.False(dictionary.TryGetValue(missingKey, out outValue));
if (!IsReadOnly)
{
TKey missingKey = default(TKey);
while (dictionary.ContainsKey(missingKey))
dictionary.Remove(missingKey);
Assert.False(dictionary.TryGetValue(missingKey, out outValue));
}
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ The System.Collections.Immutable library is built-in as part of the shared frame
<Compile Include="System\Collections\Frozen\Int32\Int32FrozenSet.cs" />
<Compile Include="System\Collections\Frozen\String\Hashing.cs" />
<Compile Include="System\Collections\Frozen\String\KeyAnalyzer.cs" />
<Compile Include="System\Collections\Frozen\String\LengthBuckets.cs" />
<Compile Include="System\Collections\Frozen\String\LengthBucketsFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\String\LengthBucketsFrozenSet.cs" />
<Compile Include="System\Collections\Frozen\String\OrdinalStringFrozenDictionary.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ public static bool IsKnownComparable<T>() =>
typeof(T) == typeof(uint) ||
typeof(T) == typeof(long) ||
typeof(T) == typeof(ulong) ||
typeof(T) == typeof(nint) ||
typeof(T) == typeof(nuint) ||
typeof(T) == typeof(decimal) ||
typeof(T) == typeof(float) ||
typeof(T) == typeof(double) ||
Expand All @@ -68,6 +66,8 @@ public static bool IsKnownComparable<T>() =>
#endif
#if NET5_0_OR_GREATER
typeof(T) == typeof(Half) ||
typeof(T) == typeof(nint) ||
typeof(T) == typeof(nuint) ||
#endif
#if NET6_0_OR_GREATER
typeof(T) == typeof(DateOnly) ||
Expand All @@ -78,5 +78,13 @@ public static bool IsKnownComparable<T>() =>
typeof(T) == typeof(UInt128) ||
#endif
typeof(T).IsEnum;

// for these types GetHashCode returns their value casted to int, so when we receive a Dictionary/HashSet where there are key
// we know that all hash codes are unique and we can avoid some work later
internal static bool KeysAreHashCodes<T>()
=> typeof(T) == typeof(int) || typeof(T) == typeof(uint)
|| typeof(T) == typeof(short) || typeof(T) == typeof(ushort)
|| typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)
|| ((typeof(T) == typeof(nint) || typeof(T) == typeof(nuint)) && IntPtr.Size == 4);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ internal abstract class ItemsFrozenSet<T, TThisWrapper> : FrozenSetInternalBase<
private protected readonly FrozenHashTable _hashTable;
private protected readonly T[] _items;

protected ItemsFrozenSet(HashSet<T> source) : base(source.Comparer)
protected ItemsFrozenSet(HashSet<T> source, bool keysAreHashCodes = false) : base(source.Comparer)
{
Debug.Assert(source.Count != 0);

Expand All @@ -30,7 +30,7 @@ protected ItemsFrozenSet(HashSet<T> source) : base(source.Comparer)
hashCodes[i] = entries[i] is T t ? Comparer.GetHashCode(t) : 0;
}

_hashTable = FrozenHashTable.Create(hashCodes);
_hashTable = FrozenHashTable.Create(hashCodes, keysAreHashCodes);

for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ internal abstract class KeysAndValuesFrozenDictionary<TKey, TValue> : FrozenDict
private protected readonly TKey[] _keys;
private protected readonly TValue[] _values;

protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source) : base(source.Comparer)
protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source, bool keysAreHashCodes = false) : base(source.Comparer)
{
Debug.Assert(source.Count != 0);

Expand All @@ -32,7 +32,7 @@ protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source) : base(
hashCodes[i] = Comparer.GetHashCode(entries[i].Key);
}

_hashTable = FrozenHashTable.Create(hashCodes);
_hashTable = FrozenHashTable.Create(hashCodes, keysAreHashCodes);

for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;

namespace System.Collections.Frozen
Expand All @@ -24,8 +25,8 @@ internal SmallFrozenDictionary(Dictionary<TKey, TValue> source) : base(source.Co
{
Debug.Assert(source.Count != 0);

_keys = source.Keys.ToArray(source.Count);
_values = source.Values.ToArray(source.Count);
_keys = source.Keys.ToArray();
_values = source.Values.ToArray();
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
}

private protected override TKey[] KeysCore => _keys;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ internal sealed class SmallFrozenSet<T> : FrozenSetInternalBase<T, SmallFrozenSe

internal SmallFrozenSet(HashSet<T> source) : base(source.Comparer)
{
_items = source.ToArray(source.Count);
_items = source.ToArray();
}

private protected override T[] ItemsCore => _items;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;

namespace System.Collections.Frozen
{
internal static class LengthBuckets
{
/// <summary>The maximum number of items allowed per bucket. The larger the value, the longer it can take to search a bucket, which is sequentially examined.</summary>
internal const int MaxPerLength = 5;
/// <summary>Allowed ratio between buckets with values and total buckets. Under this ratio, this implementation won't be used due to too much wasted space.</summary>
private const double EmptyLengthsRatio = 0.2;

internal static int[]? CreateLengthBucketsArrayIfAppropriate(string[] keys, IEqualityComparer<string> comparer, int minLength, int maxLength)
{
Debug.Assert(comparer == EqualityComparer<string>.Default || comparer == StringComparer.Ordinal || comparer == StringComparer.OrdinalIgnoreCase);
Debug.Assert(minLength >= 0 && maxLength >= minLength);

// If without even looking at the keys we know that some bucket will exceed the max per-bucket
// limit (pigeon hole principle), we can early-exit out without doing any further work.
int spread = maxLength - minLength + 1;
if (keys.Length / spread > MaxPerLength)
{
return null;
}

int arraySize = spread * MaxPerLength;
#if NET6_0_OR_GREATER
if (arraySize > Array.MaxLength)
#else
if (arraySize > 0X7FFFFFC7)
#endif
{
// In the future we may lower the value, as it may be quite unlikely
// to have a LOT of strings of different sizes.
return null;
}

// Instead of creating a dictionary of lists or a multi-dimensional array
// we rent a single dimension array, where every bucket has five slots.
// The bucket starts at (key.Length - minLength) * 5.
// Each value is an index of the key from _keys array
// or just -1, which represents "null".
int[] buckets = ArrayPool<int>.Shared.Rent(arraySize);
buckets.AsSpan(0, arraySize).Fill(-1);

int nonEmptyCount = 0;
for (int i = 0; i < keys.Length; i++)
{
string key = keys[i];
int startIndex = (key.Length - minLength) * MaxPerLength;
int endIndex = startIndex + MaxPerLength;
int index = startIndex;

while (index < endIndex)
{
ref int bucket = ref buckets[index];
if (bucket < 0)
{
if (index == startIndex)
{
nonEmptyCount++;
}

bucket = i;
break;
}

index++;
}

if (index == endIndex)
{
// If we've already hit the max per-bucket limit, bail.
ArrayPool<int>.Shared.Return(buckets);
return null;
}
}

// If there would be too much empty space in the lookup array, bail.
if (nonEmptyCount / (double)spread < EmptyLengthsRatio)
{
ArrayPool<int>.Shared.Return(buckets);
return null;
}

#if NET6_0_OR_GREATER
// We don't need an array with every value initialized to zero if we are just about to overwrite every value anyway.
int[] copy = GC.AllocateUninitializedArray<int>(arraySize);
Array.Copy(buckets, copy, arraySize);
#else
int[] copy = buckets.AsSpan(0, arraySize).ToArray();
#endif
ArrayPool<int>.Shared.Return(buckets);

return copy;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
Expand All @@ -11,11 +10,6 @@ namespace System.Collections.Frozen
/// <summary>Provides a frozen dictionary implementation where strings are grouped by their lengths.</summary>
internal sealed class LengthBucketsFrozenDictionary<TValue> : FrozenDictionary<string, TValue>
{
/// <summary>Allowed ratio between buckets with values and total buckets. Under this ratio, this implementation won't be used due to too much wasted space.</summary>
private const double EmptyLengthsRatio = 0.2;
/// <summary>The maximum number of items allowed per bucket. The larger the value, the longer it can take to search a bucket, which is sequentially examined.</summary>
private const int MaxPerLength = 5;

private readonly int[] _lengthBuckets;
private readonly int _minLength;
private readonly string[] _keys;
Expand All @@ -39,87 +33,14 @@ private LengthBucketsFrozenDictionary(
string[] keys, TValue[] values, IEqualityComparer<string> comparer, int minLength, int maxLength)
{
Debug.Assert(keys.Length != 0 && keys.Length == values.Length);
Debug.Assert(comparer == EqualityComparer<string>.Default || comparer == StringComparer.Ordinal || comparer == StringComparer.OrdinalIgnoreCase);
Debug.Assert(minLength >= 0 && maxLength >= minLength);

// If without even looking at the keys we know that some bucket will exceed the max per-bucket
// limit (pigeon hole principle), we can early-exit out without doing any further work.
int spread = maxLength - minLength + 1;
if (keys.Length / spread > MaxPerLength)
int[]? lengthBuckets = LengthBuckets.CreateLengthBucketsArrayIfAppropriate(keys, comparer, minLength, maxLength);
if (lengthBuckets is null)
{
return null;
}

int arraySize = spread * MaxPerLength;
#if NET6_0_OR_GREATER
if (arraySize > Array.MaxLength)
#else
if (arraySize > 0X7FFFFFC7)
#endif
{
// In the future we may lower the value, as it may be quite unlikely
// to have a LOT of strings of different sizes.
return null;
}

// Instead of creating a dictionary of lists or a multi-dimensional array
// we rent a single dimension array, where every bucket has five slots.
// The bucket starts at (key.Length - minLength) * 5.
// Each value is an index of the key from _keys array
// or just -1, which represents "null".
int[] buckets = ArrayPool<int>.Shared.Rent(arraySize);
buckets.AsSpan(0, arraySize).Fill(-1);

int nonEmptyCount = 0;
for (int i = 0; i < keys.Length; i++)
{
string key = keys[i];
int startIndex = (key.Length - minLength) * MaxPerLength;
int endIndex = startIndex + MaxPerLength;
int index = startIndex;

while (index < endIndex)
{
ref int bucket = ref buckets[index];
if (bucket < 0)
{
if (index == startIndex)
{
nonEmptyCount++;
}

bucket = i;
break;
}

index++;
}

if (index == endIndex)
{
// If we've already hit the max per-bucket limit, bail.
ArrayPool<int>.Shared.Return(buckets);
return null;
}
}

// If there would be too much empty space in the lookup array, bail.
if (nonEmptyCount / (double)spread < EmptyLengthsRatio)
{
ArrayPool<int>.Shared.Return(buckets);
return null;
}

#if NET6_0_OR_GREATER
// We don't need an array with every value initialized to zero if we are just about to overwrite every value anyway.
int[] copy = GC.AllocateUninitializedArray<int>(arraySize);
Array.Copy(buckets, copy, arraySize);
#else
int[] copy = buckets.AsSpan(0, arraySize).ToArray();
#endif
ArrayPool<int>.Shared.Return(buckets);

return new LengthBucketsFrozenDictionary<TValue>(keys, values, copy, minLength, comparer);
return new LengthBucketsFrozenDictionary<TValue>(keys, values, lengthBuckets, minLength, comparer);
}

/// <inheritdoc />
Expand All @@ -138,8 +59,8 @@ private LengthBucketsFrozenDictionary(
private protected override ref readonly TValue GetValueRefOrNullRefCore(string key)
{
// If the length doesn't have an associated bucket, the key isn't in the dictionary.
int bucketIndex = (key.Length - _minLength) * MaxPerLength;
int bucketEndIndex = bucketIndex + MaxPerLength;
int bucketIndex = (key.Length - _minLength) * LengthBuckets.MaxPerLength;
int bucketEndIndex = bucketIndex + LengthBuckets.MaxPerLength;
int[] lengthBuckets = _lengthBuckets;
if (bucketIndex >= 0 && bucketEndIndex <= lengthBuckets.Length)
{
Expand Down
Loading