Skip to content

Commit

Permalink
nmhash32: add minimal test-case for the hash implementation being broken
Browse files Browse the repository at this point in the history
It's not 100% clear to me if it's UB, CC bug or lack of some kind
of memory barrier, but the very same quite innocent-looking code leads
to different results on macos-arm64 platform and breaks nmhash32

See rurban#294
  • Loading branch information
darkk committed Sep 27, 2024
1 parent 96dbb84 commit 076285a
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 4 deletions.
39 changes: 37 additions & 2 deletions Hashes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1278,8 +1278,43 @@ void crc32c_pclmul_test(const void *key, int len, uint32_t seed, void *out)
#endif

#include "hash-garage/nmhash.h"
const char * const nmhash32_desc("nmhash32, le:" MACRO_ITOA(NMHASH_LITTLE_ENDIAN) ", vector:" MACRO_ITOA(NMH_VECTOR) ", align:" MACRO_ITOA(NMH_ACC_ALIGN));
const char * const nmhash32x_desc("nmhash32x, le:" MACRO_ITOA(NMHASH_LITTLE_ENDIAN) ", vector:" MACRO_ITOA(NMH_VECTOR) ", align:" MACRO_ITOA(NMH_ACC_ALIGN));
#define NMHASH32_DESC_STR "NMHASH_LITTLE_ENDIAN:" MACRO_ITOA(NMHASH_LITTLE_ENDIAN) ", " \
"NMH_VECTOR:" MACRO_ITOA(NMH_VECTOR) ", " \
"NMH_ACC_ALIGN:" MACRO_ITOA(NMH_ACC_ALIGN) ", " \
"NMH_RESTRICT:" MACRO_ITOA(NMH_RESTRICT)
const char * const nmhash32_desc("nmhash32, " NMHASH32_DESC_STR);
const char * const nmhash32x_desc("nmhash32x, " NMHASH32_DESC_STR);

bool nmhash32_broken( void ) {
static bool done = false, result;
if (done)
return result;

const char entropy[] = "rwgk8M1uxM6XX6c3teQX2yaw8FQWArmcWUSBJ8dcQQJWHYC9Wt2BmpvETxwhYcJTheTbjf49SVRaDJhbEZCq7ki1D6KxpKQSjgwqsiHGSgHLxvPG5kcRnBhjJ1YC8kuh";

NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accX[sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT)];
static_assert(sizeof(entropy) >= sizeof(accX), "Need more entropy in entropy[]");
memcpy(accX, entropy, sizeof(accX));

const size_t nbGroups = sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT);
size_t i;

for (i = 0; i < nbGroups * 2; ++i) {
((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M1_V)[i];
}

// XXX: no memory barrier takes place here, just like in NMHASH32_long_round_scalar()
// and it affects the `acc` result.

uint32_t acc = 0;
for (i = 0; i < nbGroups; ++i)
acc += accX[i];

result = (acc != UINT32_C(0x249abaee));
done = true;
return result;
}

// objsize: 4202f0-420c7d: 2445
void nmhash32_test ( const void * key, int len, uint32_t seed, void * out ) {
*(uint32_t*)out = NMHASH32 (key, (const size_t) len, seed);
Expand Down
1 change: 1 addition & 0 deletions Hashes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,7 @@ extern "C" {

extern const char * const nmhash32_desc;
extern const char * const nmhash32x_desc;
bool nmhash32_broken ( void );
void nmhash32_test ( const void * key, int len, uint32_t seed, void * out );
void nmhash32x_test ( const void * key, int len, uint32_t seed, void * out );

Expand Down
4 changes: 2 additions & 2 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -754,8 +754,8 @@ HashInfo g_hashes[] =
{ rapidhash_test, 64, 0xAF404C4B, "rapidhash", "rapidhash v1", GOOD, {}},
{ rapidhash_unrolled_test, 64, 0xAF404C4B, "rapidhash_unrolled", "rapidhash v1 - unrolled", GOOD, {}},
#endif
{ nmhash32_test, 32, 0x12A30553, "nmhash32", nmhash32_desc, GOOD, {}},
{ nmhash32x_test, 32, 0xA8580227, "nmhash32x", nmhash32x_desc, GOOD, {}},
{ nmhash32_test, 32, nmhash32_broken() ? 0U : 0x12A30553, "nmhash32", nmhash32_desc, GOOD, {}},
{ nmhash32x_test, 32, nmhash32_broken() ? 0U : 0xA8580227, "nmhash32x", nmhash32x_desc, GOOD, {}},
#ifdef HAVE_KHASHV
#ifdef __clang__ // also gcc 9.4
#define KHASHV32_VERIF 0xB69DF8EB
Expand Down

0 comments on commit 076285a

Please sign in to comment.