Skip to content

Commit

Permalink
ensure that NULL and 0 hashes to different numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
kszucs committed Dec 16, 2024
1 parent 4b0c0bc commit 0b98dfc
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
18 changes: 18 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_hash_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,24 @@ TEST_F(TestScalarHash, Null) {
CheckDeterminisic("hash64", arr);
}

TEST_F(TestScalarHash, NullHashIsZero) {
auto arr1 = ArrayFromJSON(int32(), R"([null, 0, 1])");
ASSERT_OK_AND_ASSIGN(auto res1, CallFunction("hash64", {arr1}));
auto buf1 = res1.array()->GetValues<uint64_t>(1);
ASSERT_EQ(buf1[0], 0);
ASSERT_NE(buf1[1], 0);
ASSERT_NE(buf1[2], 0);
ASSERT_NE(buf1[1], buf1[2]);

auto arr2 = ArrayFromJSON(int8(), R"([null, 0, 1])");
ASSERT_OK_AND_ASSIGN(auto res2, CallFunction("hash32", {arr2}));
auto buf2 = res2.array()->GetValues<uint32_t>(1);
ASSERT_EQ(buf2[0], 0);
ASSERT_NE(buf2[1], 0);
ASSERT_NE(buf2[2], 0);
ASSERT_NE(buf2[1], buf2[2]);
}

TEST_F(TestScalarHash, Boolean) {
auto arr = ArrayFromJSON(boolean(), R"([true, false, null, true, null])");
CheckDeterminisic("hash32", arr);
Expand Down
14 changes: 8 additions & 6 deletions cpp/src/arrow/compute/key_hash_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,11 @@ void Hashing32::HashBit(bool combine_hashes, int64_t bit_offset, uint32_t num_ke

template <bool T_COMBINE_HASHES, typename T>
void Hashing32::HashIntImp(uint32_t num_keys, const T* keys, uint32_t* hashes) {
constexpr uint64_t multiplier = 11400714785074694791ULL;
constexpr uint64_t kMultiplier = 11400714785074694791ULL;
constexpr uint64_t kAddend = 9756277977048271785ULL;
for (uint32_t ikey = 0; ikey < num_keys; ++ikey) {
uint64_t x = static_cast<uint64_t>(keys[ikey]);
uint32_t hash = static_cast<uint32_t>(BYTESWAP(x * multiplier));
uint64_t x = static_cast<uint64_t>(keys[ikey]) + kAddend;
uint32_t hash = static_cast<uint32_t>(BYTESWAP(x * kMultiplier));

if (T_COMBINE_HASHES) {
hashes[ikey] = CombineHashesImp(hashes[ikey], hash);
Expand Down Expand Up @@ -751,10 +752,11 @@ void Hashing64::HashBit(bool combine_hashes, int64_t bit_offset, uint32_t num_ke

template <bool T_COMBINE_HASHES, typename T>
void Hashing64::HashIntImp(uint32_t num_keys, const T* keys, uint64_t* hashes) {
constexpr uint64_t multiplier = 11400714785074694791ULL;
constexpr uint64_t kMultiplier = 11400714785074694791ULL;
constexpr uint64_t kAddend = 9756277977048271785ULL;
for (uint32_t ikey = 0; ikey < num_keys; ++ikey) {
uint64_t x = static_cast<uint64_t>(keys[ikey]);
uint64_t hash = static_cast<uint64_t>(BYTESWAP(x * multiplier));
uint64_t x = static_cast<uint64_t>(keys[ikey]) + kAddend;
uint64_t hash = static_cast<uint64_t>(BYTESWAP(x * kMultiplier));

if (T_COMBINE_HASHES) {
hashes[ikey] = CombineHashesImp(hashes[ikey], hash);
Expand Down

0 comments on commit 0b98dfc

Please sign in to comment.