diff --git a/CMakeLists.txt b/CMakeLists.txt index a8ce876418..8abd437156 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -355,6 +355,7 @@ target_sources(mold PRIVATE common/malloc.cc common/multi-glob.cc common/perf.cc + common/siphash.cc common/tar.cc elf/arch-alpha.cc elf/arch-arm32.cc diff --git a/common/common.h b/common/common.h index ec6d3ac71f..dde1f913b8 100644 --- a/common/common.h +++ b/common/common.h @@ -720,6 +720,25 @@ class ConcurrentMap { i64 nbuckets = 0; }; +// +// siphash.cc +// + +class SipHash { +public: + SipHash(u8 *key); + void update(u8 *msg, i64 msglen); + void finish(u8 *out); + +private: + u64 k0, k1; + u64 v0, v1, v2, v3; + + i64 total_bytes = 0; + u8 buf[8] = {}; + i64 buflen = 0; +}; + // // output-file.h // diff --git a/common/siphash.cc b/common/siphash.cc new file mode 100644 index 0000000000..f142c046e8 --- /dev/null +++ b/common/siphash.cc @@ -0,0 +1,103 @@ +// This file is based on the SipHash reference implementation which is +// in the public domain. +// +// SipHash is a keyed hash function designed to be collision-resistant +// as long as key is not known to an attacker. That is, as long as we +// use some random number as a key, we can assume that there's no hash +// collision. I don't think SipHash is throughly tested as a secure hash +// function as others such as SHA256 or Blake3 are, but for our purpose, +// I believe the hash function is robust enough. SipHash is much faster +// than SHA256 or Blake3. +// +// Our implementation always outputs a 128 bit hash value. + +#include "common.h" + +namespace mold { + +#define ROUND \ + do { \ + v0 += v1; \ + v1 = std::rotl(v1, 13); \ + v1 ^= v0; \ + v0 = std::rotl(v0, 32); \ + v2 += v3; \ + v3 = std::rotl(v3, 16); \ + v3 ^= v2; \ + v0 += v3; \ + v3 = std::rotl(v3, 21); \ + v3 ^= v0; \ + v2 += v1; \ + v1 = std::rotl(v1, 17); \ + v1 ^= v2; \ + v2 = std::rotl(v2, 32); \ + } while (0) + +// SipHash-1-3 +#define C_ROUND ROUND +#define D_ROUND for (i64 i = 0; i < 3; i++) ROUND + +SipHash::SipHash(u8 *key) { + k0 = *(ul64 *)key; + k1 = *(ul64 *)(key + 8); + v0 = 0x736f6d6570736575 ^ k0; + v1 = 0x646f72616e646f6d ^ k1 ^ 0xee; + v2 = 0x6c7967656e657261 ^ k0; + v3 = 0x7465646279746573 ^ k1; +} + +void SipHash::update(u8 *msg, i64 msglen) { + total_bytes += msglen; + + if (buflen) { + if (buflen + msglen < 8) { + memcpy(buf + buflen, msg, msglen); + buflen += msglen; + return; + } + + i64 j = 8 - buflen; + memcpy(buf + buflen, msg, j); + + u64 m = *(ul64 *)buf; + v3 ^= m; + C_ROUND; + v0 ^= m; + + msg += j; + msglen -= j; + buflen = 0; + } + + while (msglen >= 8) { + u64 m = *(ul64 *)msg; + v3 ^= m; + C_ROUND; + v0 ^= m; + + msg += 8; + msglen -= 8; + } + + memcpy(buf, msg, msglen); + buflen = msglen; +} + +void SipHash::finish(u8 *out) { + memset(buf + buflen, 0, 8 - buflen); + u64 b = (total_bytes << 56) | *(ul64 *)buf; + + v3 ^= b; + C_ROUND; + v0 ^= b; + + v2 ^= 0xee; + D_ROUND; + *(ul64 *)out = v0 ^ v1 ^ v2 ^ v3; + + v1 ^= 0xdd; + D_ROUND; + *(ul64 *)(out + 8) = v0 ^ v1 ^ v2 ^ v3; +} + +} // namespace mold diff --git a/elf/icf.cc b/elf/icf.cc index 9b1c8f58ef..ec3a0142a4 100644 --- a/elf/icf.cc +++ b/elf/icf.cc @@ -65,10 +65,10 @@ // conditions. #include "mold.h" -#include "blake3.h" #include #include +#include #include #include #include @@ -90,6 +90,14 @@ template <> struct hash { namespace mold::elf { +static u8 hmac_key[16]; + +static void init_hmac_key() { + std::random_device rand; + u32 tmp[4] = { rand(), rand(), rand(), rand() }; + memcpy(hmac_key, tmp, 16); +} + template static void uniquify_cies(Context &ctx) { Timer t(ctx, "uniquify_cies"); @@ -130,17 +138,6 @@ static bool is_eligible(Context &ctx, InputSection &isec) { } } -static Digest digest_final(blake3_hasher *hasher) { - assert(HASH_SIZE <= BLAKE3_OUT_LEN); - - u8 buf[BLAKE3_OUT_LEN]; - blake3_hasher_finalize(hasher, buf, BLAKE3_OUT_LEN); - - Digest digest; - memcpy(digest.data(), buf, HASH_SIZE); - return digest; -} - template static bool is_leaf(Context &ctx, InputSection &isec) { if (!isec.get_rels(ctx).empty()) @@ -234,16 +231,15 @@ static void merge_leaf_nodes(Context &ctx) { template static Digest compute_digest(Context &ctx, InputSection &isec) { - blake3_hasher hasher; - blake3_hasher_init(&hasher); + SipHash hasher(hmac_key); auto hash = [&](auto val) { - blake3_hasher_update(&hasher, (u8 *)&val, sizeof(val)); + hasher.update((u8 *)&val, sizeof(val)); }; auto hash_string = [&](std::string_view str) { hash(str.size()); - blake3_hasher_update(&hasher, (u8 *)str.data(), str.size()); + hasher.update((u8 *)str.data(), str.size()); }; auto hash_symbol = [&](Symbol &sym) { @@ -299,7 +295,9 @@ static Digest compute_digest(Context &ctx, InputSection &isec) { hash_symbol(*isec.file.symbols[rel.r_sym]); } - return digest_final(&hasher); + Digest digest; + hasher.finish(digest.data()); + return digest; } template @@ -412,17 +410,16 @@ static i64 propagate(std::span> digests, if (converged.get(i)) return; - blake3_hasher hasher; - blake3_hasher_init(&hasher); - blake3_hasher_update(&hasher, digests[2][i].data(), HASH_SIZE); + SipHash hasher(hmac_key); + hasher.update(digests[2][i].data(), HASH_SIZE); i64 begin = edge_indices[i]; i64 end = (i + 1 == num_digests) ? edges.size() : edge_indices[i + 1]; for (i64 j : edges.subspan(begin, end - begin)) - blake3_hasher_update(&hasher, digests[slot][j].data(), HASH_SIZE); + hasher.update(digests[slot][j].data(), HASH_SIZE); - digests[!slot][i] = digest_final(&hasher); + hasher.finish(digests[!slot][i].data()); if (digests[slot][i] == digests[!slot][i]) { // This node has converged. Skip further iterations as it will @@ -498,6 +495,7 @@ void icf_sections(Context &ctx) { if (ctx.objs.empty()) return; + init_hmac_key(); uniquify_cies(ctx); merge_leaf_nodes(ctx);