Skip to content

Commit

Permalink
Improve performance by improving the handling of hashes
Browse files Browse the repository at this point in the history
Performance is improved in the following ways:
  1. Eliminate Hash.ComputeHash() that returns a string, and
     Hash.Sum() that converts the string to an array of hash bytes
  2. Avoid the use of Enumerable.Skip() and Enumerable.Take()
  3. Avoid the use of Array.Reverse() on little-endian architectures

Change library to use HashAlgorithm directly.  Remove Hash.cs, Hash128.cs
and their corresponding tests.

Add Utils.HashBytesToUInt32() and Utils.HashBytesToUInt64() functions
that convert a set of .NET hash bytes into a uint or ulong regardless
of the endianness of the architecture.

Fix Utils.HashKernel128() to conform to how HashKernel() works,
and fix corresponding unit tests.

Add TestProbabilisticDataStructures.TestHashKernelFNV1()
that confirms that the HashKernel() function returns
the same values as the HashKernel() function in
https://github.com/tylertreat/BoomFilters running in Go.

Move the original Hash.ComputeHash() that returns a string
to Utils.ComputeHashAsString().  This function is no longer
used by the library, but the unit tests that use it remain.
  • Loading branch information
dferreyra committed May 29, 2018
1 parent 630fced commit e600eae
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 414 deletions.
21 changes: 3 additions & 18 deletions ProbabilisticDataStructures/CuckooBloomFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -384,38 +384,23 @@ private bool Insert(uint i1, uint i2, byte[] f)
/// fingerprint for the given data</returns>
private Components GetComponents(byte[] data)
{
var hash = this.ComputeHash(data);
var hash = Hash.ComputeHash(data);
var f = hash.Take((int)this.F).ToArray();
var i1 = this.ComputeHashSum32(hash);
var i2 = this.ComputeHashSum32(f);

return Components.Create(f, i1, i2);
}

/// <summary>
/// Returns a 32-bit hash value for the given data.
/// </summary>
/// <param name="data">Data</param>
/// <returns>32-bit hash value</returns>
private byte[] ComputeHash(byte[] data)
{
var hash = new Hash(this.Hash);
hash.ComputeHash(data);
var sum = hash.Sum();
return sum;
}

/// <summary>
/// Returns the sum of the hash.
/// </summary>
/// <param name="data">Data</param>
/// <returns>32-bit hash value</returns>
private uint ComputeHashSum32(byte[] data)
{
var hash = new Hash(this.Hash);
hash.ComputeHash(data);
var sum = hash.Sum();
return Utils.ToBigEndianUInt32(sum);
var sum = Hash.ComputeHash(data);
return Utils.HashBytesToUInt32(sum);
}

/// <summary>
Expand Down
78 changes: 0 additions & 78 deletions ProbabilisticDataStructures/Hash.cs

This file was deleted.

100 changes: 0 additions & 100 deletions ProbabilisticDataStructures/Hash128.cs

This file was deleted.

6 changes: 2 additions & 4 deletions ProbabilisticDataStructures/HyperLogLog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,8 @@ public void SetHash(HashAlgorithm h)
/// <returns>32-bit hash value</returns>
private uint CalculateHash(byte[] data)
{
var hash = new Hash(this.Hash);
hash.ComputeHash(data);
var sum = hash.Sum();
return Utils.ToBigEndianUInt32(sum);
var sum = Hash.ComputeHash(data);
return Utils.HashBytesToUInt32(sum);
}

/// <summary>
Expand Down
6 changes: 2 additions & 4 deletions ProbabilisticDataStructures/InverseBloomFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,8 @@ private uint Index(byte[] data)
/// <returns>32-bit hash value</returns>
private uint ComputeHashSum32(byte[] data)
{
var hash = new Hash(this.Hash);
hash.ComputeHash(data);
var sum = hash.Sum();
return Utils.ToBigEndianUInt32(sum);
var sum = Hash.ComputeHash(data);
return Utils.HashBytesToUInt32(sum);
}

/// <summary>
Expand Down
99 changes: 78 additions & 21 deletions ProbabilisticDataStructures/Utils.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System;
using System.Linq;
using System.Security.Cryptography;
using System.Text;

namespace ProbabilisticDataStructures
{
Expand Down Expand Up @@ -48,19 +49,32 @@ public static uint OptimalK(double fpRate)

/// <summary>
/// Returns the upper and lower base hash values from which the k hashes are
/// derived.
/// derived. The result will be the same regardless of the endianness of the
/// architecture.
/// </summary>
/// <param name="data">The data bytes to hash.</param>
/// <param name="algorithm">The hashing algorithm to use.</param>
/// <returns>A HashKernel</returns>
public static HashKernelReturnValue HashKernel(byte[] data, HashAlgorithm algorithm)
{
var hash = new Hash(algorithm);
hash.ComputeHash(data);
var sum = hash.Sum();
var sum = algorithm.ComputeHash(data);
return HashKernelFromHashBytes(sum);
}

/// <summary>
/// Returns the upper and lower base hash values from which the k hashes are
/// derived using the given hash bytes directly. The result will be the
/// same regardless of the endianness of the architecture. Used by a unit
/// test to confirm the calculation is compatible with the HashKernel from
/// https://github.com/tylertreat/BoomFilters running in Go.
/// </summary>
/// <param name="hashBytes">The hash bytes.</param>
/// <returns>A HashKernel</returns>
public static HashKernelReturnValue HashKernelFromHashBytes(byte[] hashBytes)
{
return HashKernelReturnValue.Create(
ToBigEndianUInt32(sum.Skip(4).Take(4).ToArray()),
ToBigEndianUInt32(sum.Take(4).ToArray())
HashBytesToUInt32(hashBytes, 0),
HashBytesToUInt32(hashBytes, 4)
);
}

Expand All @@ -73,30 +87,73 @@ public static HashKernelReturnValue HashKernel(byte[] data, HashAlgorithm algori
/// <returns>A HashKernel</returns>
public static HashKernel128ReturnValue HashKernel128(byte[] data, HashAlgorithm algorithm)
{
var hash = new Hash128(algorithm);
var sum = hash.ComputeHashAndSum(data);
var sum = algorithm.ComputeHash(data);
return HashKernel128ReturnValue.Create(
ToBigEndianUInt64(sum, 8),
ToBigEndianUInt64(sum, 0)
HashBytesToUInt64(sum, 0),
HashBytesToUInt64(sum, 8)
);
}

public static uint ToBigEndianUInt32(byte[] bytes)
/// <summary>
/// Returns the uint represented by the given hash bytes, starting at
/// byte <paramref name="offset"/>. The result will be the same
/// regardless of the endianness of the architecture.
/// </summary>
/// <param name="hashBytes"></param>
/// <param name="offset"></param>
/// <returns></returns>
public static uint HashBytesToUInt32(byte[] hashBytes, int offset = 0)
{
if (BitConverter.IsLittleEndian)
Array.Reverse(bytes);
return
((uint)hashBytes[offset]) |
((uint)hashBytes[offset + 1]) << 8 |
((uint)hashBytes[offset + 2]) << 16 |
((uint)hashBytes[offset + 3]) << 24;
}

uint i = BitConverter.ToUInt32(bytes, 0);
return i;
/// <summary>
/// Returns the ulong represented by the given hash bytes, starting at
/// byte <paramref name="offset"/>. The result will be the same
/// regardless of the endianness of the architecture.
/// </summary>
/// <param name="hashBytes"></param>
/// <param name="offset"></param>
/// <returns></returns>
public static ulong HashBytesToUInt64(byte[] hashBytes, int offset = 0)
{
return
((ulong)hashBytes[offset]) |
((ulong)hashBytes[offset + 1]) << 8 |
((ulong)hashBytes[offset + 2]) << 16 |
((ulong)hashBytes[offset + 3]) << 24 |
((ulong)hashBytes[offset + 4]) << 32 |
((ulong)hashBytes[offset + 5]) << 40 |
((ulong)hashBytes[offset + 6]) << 48 |
((ulong)hashBytes[offset + 7]) << 56;
}

public static ulong ToBigEndianUInt64(byte[] bytes, int offset)
/// <summary>
/// Compute the hash for the provided bytes.
/// </summary>
/// <param name="inputBytes">The bytes to hash.</param>
/// <returns>The hash string of the bytes.</returns>
public static string ComputeHashAsString(byte[] inputBytes, HashAlgorithm hashAlgorithm)
{
if (BitConverter.IsLittleEndian)
Array.Reverse(bytes, offset, 8);
// Compute the hash of the input byte array.
byte[] data = hashAlgorithm.ComputeHash(inputBytes);

// Create a new StringBuilder to collect the bytes and create a string.
StringBuilder sb = new StringBuilder();

// Loop through each byte of the hashed data and format each one as a
// hexadecimal string.
for (int i = 0; i < data.Length; i++)
{
sb.Append(data[i].ToString("X2"));
}

ulong i = BitConverter.ToUInt64(bytes, offset);
return i;
// Return the hexadecimal string.
return sb.ToString();
}
}

Expand Down
Loading

0 comments on commit e600eae

Please sign in to comment.