Skip to content

Commit

Permalink
Initial HashTable implementation; tests still WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
jpahm committed Oct 19, 2024
1 parent aa970f6 commit 7a1eb56
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 38 deletions.
278 changes: 278 additions & 0 deletions DSA/Data Structures/HashTable.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
using System.Collections;
using System.Collections.Immutable;
using System.Diagnostics.CodeAnalysis;

namespace DSA
{
/// <summary>
/// Represents a basic hash table implementation utilizing multiplicative hashing and chaining with linked lists.
/// </summary>
/// <typeparam name="KeyType">The type to key on.</typeparam>
/// /// <typeparam name="ValueType">The type to store.</typeparam>
public class HashTable<KeyType, ValueType> : IDictionary<KeyType, ValueType> where KeyType : notnull
{
public ValueType this[KeyType key] { get => GetValueAt(key); set => Add(key, value); }

public ICollection<KeyType> Keys => _buckets.SelectMany(bucket => bucket?.Select(x => x.Key) ?? []).ToImmutableArray();

public ICollection<ValueType> Values => _buckets.SelectMany(bucket => bucket?.Select(x => x.Value) ?? []).ToImmutableArray();

public int Count { get; private set; } = 0;

public int Capacity => (int)Math.Pow(2, _sizeLog2);

public bool IsReadOnly => false;

private LinkedList<KeyValuePair<KeyType, ValueType>>?[] _buckets;

/// <summary>
/// The maximum amount of collisions allowed before the table attempts to resize.
/// </summary>
public int MaxAllowedCollisions { get; set; } = 10;

/// <summary>
/// The minimum clustering factor required before a resize will be allowed.
/// </summary>
public double MaxAllowedClustering { get; set; } = 2.0;

/// <summary>
/// The default capacity of the map implementation in log2.
/// </summary>
public const int DEFAULT_SIZE_LOG2 = 5;

private int _sizeLog2 = DEFAULT_SIZE_LOG2;

private uint _collisions;

/// <summary>
/// Creates an empty hashtable.
/// </summary>
public HashTable()
{
_buckets = new LinkedList<KeyValuePair<KeyType, ValueType>>[1 << _sizeLog2];
}

/// <summary>
/// Creates a hashtable with the specified initial bucket capacity. Note this is NOT the element capacity; it is only the initial number of buckets.
/// </summary>
/// <param name="capacity">The initial bucket capacity.</param>
/// <exception cref="ArgumentOutOfRangeException"></exception>
public HashTable(uint capacity)
{
_sizeLog2 = Math.Clamp((int)Math.Ceiling(Math.Log2(capacity)), 2, 31);
_buckets = new LinkedList<KeyValuePair<KeyType, ValueType>>[1 << _sizeLog2];
}

/// <summary>
/// Computes the clustering level of the table; used to determine whether to resize the table or not.
/// See https://www.cs.cornell.edu/courses/cs3110/2008fa/lectures/lec21.html for some details.
/// </summary>
/// <returns>The clustering factor. < 0 indicates better than even distribution, 0 indicates even distribution, > 0 indicates worse than even distribution</returns>
public double GetClustering()
{
if (Count == 0)
return 0;
double sumOfSizesSquared = _buckets.Sum(x => x is null ? 0 : (int)Math.Pow(x.Count, 2));
//int numUsedBuckets = _buckets.Count(x => x is not null);
return (((double)Count / Capacity) - 1) * ((sumOfSizesSquared / Capacity) - 1);
}

/// <summary>
/// Resizes the hashtable until the clustering level is no worse than MaxAllowedClustering.
/// </summary>
/// <returns>Whether the resize attempt was successful.</returns>
private bool TryResize()
{
// Reset collision counter
_collisions = 0;

// Only resize if clustering is bad enough
if (GetClustering() <= MaxAllowedClustering)
return false;

_sizeLog2++;
var newBuckets = new LinkedList<KeyValuePair<KeyType, ValueType>>[1 << _sizeLog2];

// Re-hash old buckets to new buckets, keeping track of collisions and resizing more if needed
foreach (var bucket in _buckets)
{
if (bucket == null)
continue;
foreach (var kvp in bucket)
{
uint idx = GetBucketIndex(kvp.Key);
ref var newBucket = ref newBuckets[idx];
if (newBucket == null)
newBucket = [kvp];
else
{
newBucket.Add(kvp);
_collisions++;
// Recursively resize further if necessary
if (_collisions > MaxAllowedCollisions && TryResize())
return true;
}
}
}
_buckets = newBuckets;
return true;
}

/// <summary>
/// Knuth's suggested multiplier; 2^32 * Phi (Golden Ratio) -- chosen for good distribution
/// </summary>
private const uint _hashMultiplier = 0x9E3779B1;

/// <summary>
/// Uses a multiplicative hash function to compute the bucket index of the specified key.
/// See https://www.cs.cornell.edu/courses/cs3110/2008fa/lectures/lec21.html for some details.
/// </summary>
/// <param name="key">The key to be hashed.</param>
/// <returns>The key's associated bucket index.</returns>
private uint GetBucketIndex(KeyType key)
{
return (uint)key.GetHashCode() * _hashMultiplier >> (32 - _sizeLog2); // Get top log2(_buckets.Length) bits of key * _hashMultiplier
}

private ValueType GetValueAt(KeyType key)
{
var bucket = _buckets[GetBucketIndex(key)];
return bucket == null
? throw new KeyNotFoundException()
: bucket.First(x => EqualityComparer<KeyType>.Default.Equals(x.Key, key)).Value;
}

private ValueType GetValueInBucket(KeyType key, LinkedList<KeyValuePair<KeyType, ValueType>> bucket)
{
return bucket.First(x => EqualityComparer<KeyType>.Default.Equals(x.Key, key)).Value;
}

public void Add(KeyType key, ValueType value)
{
uint idx = GetBucketIndex(key);
ref var bucket = ref _buckets[idx];
if (bucket == null)
{
bucket = [new(key, value)];
Count++;
}
else
{
// Check to see whether we're just replacing an existing value or if there's a collision
bool replaced = false;
for (Node<KeyValuePair<KeyType, ValueType>>? node = bucket.Head; node != null; node = node.Next)
{
if (EqualityComparer<KeyType>.Default.Equals(key, node.Value.Key))
{
node.Value = new(key, value);
replaced = true;
break;
}
}
if (!replaced)
{
// Add a new item to the bucket if it's a collision
bucket.Add(new(key, value));
_collisions++;
Count++;

// Try to resize if necessary
if (_collisions > MaxAllowedCollisions)
TryResize();
}
}
}

public void Add(KeyValuePair<KeyType, ValueType> item)
{
Add(item.Key, item.Value);
}

public void Clear()
{
_sizeLog2 = DEFAULT_SIZE_LOG2;
_buckets = new LinkedList<KeyValuePair<KeyType, ValueType>>[1 << _sizeLog2];
Count = 0;
}

public bool Contains(KeyValuePair<KeyType, ValueType> item)
{
var bucket = _buckets[GetBucketIndex(item.Key)];
return bucket != null && bucket.Contains(item);
}

public bool ContainsKey(KeyType key)
{
var bucket = _buckets[GetBucketIndex(key)];
return bucket != null && bucket.Any(x => EqualityComparer<KeyType>.Default.Equals(x.Key, key));
}

public void CopyTo(KeyValuePair<KeyType, ValueType>[] array, int arrayIndex)
{
foreach (var bucket in _buckets)
{
if (bucket == null)
continue;
bucket.CopyTo(array, arrayIndex);
arrayIndex += bucket.Count;
}
}

public IEnumerator<KeyValuePair<KeyType, ValueType>> GetEnumerator()
{
foreach (var bucket in _buckets.Where(bucket => bucket != null))
foreach (KeyValuePair<KeyType, ValueType> kvp in bucket!)
yield return kvp;
}

public bool Remove(KeyType key)
{
ref var bucket = ref _buckets[GetBucketIndex(key)];
if (bucket == null)
return false;

if (bucket.Remove(bucket.First(x => EqualityComparer<KeyType>.Default.Equals(x.Key, key))))
{
if (bucket.Count == 0)
bucket = null;
Count--;
return true;
}
return false;
}

public bool Remove(KeyValuePair<KeyType, ValueType> item)
{
ref var bucket = ref _buckets[GetBucketIndex(item.Key)];
if (bucket == null)
return false;

if (bucket.Remove(item))
{
if (bucket.Count == 0)
bucket = null;
Count--;
return true;
}
return false;
}

public bool TryGetValue(KeyType key, [MaybeNullWhen(false)] out ValueType value)
{
var bucket = _buckets[GetBucketIndex(key)];
if (bucket == null)
{
value = default;
return false;
}

value = GetValueInBucket(key, bucket);
return true;
}

IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
}
Loading

0 comments on commit 7a1eb56

Please sign in to comment.