From 8de59be93302781390491666409c35e60664c3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Fri, 28 Jan 2022 17:10:59 +0100 Subject: [PATCH] Compress amount of hashed bytes for `isize` values in StableHasher --- .../src/stable_hasher.rs | 30 ++++++++++++++++++- .../src/stable_hasher/tests.rs | 24 +++++++++++++-- src/test/debuginfo/function-names.rs | 4 +-- src/test/ui/symbol-names/basic.legacy.stderr | 4 +-- .../ui/symbol-names/issue-60925.legacy.stderr | 4 +-- 5 files changed, 57 insertions(+), 9 deletions(-) diff --git a/compiler/rustc_data_structures/src/stable_hasher.rs b/compiler/rustc_data_structures/src/stable_hasher.rs index 9c09a7f5f822e..1495521ddbb54 100644 --- a/compiler/rustc_data_structures/src/stable_hasher.rs +++ b/compiler/rustc_data_structures/src/stable_hasher.rs @@ -137,7 +137,35 @@ impl Hasher for StableHasher { // platforms. This is important for symbol hashes when cross compiling, // for example. Sign extending here is preferable as it means that the // same negative number hashes the same on both 32 and 64 bit platforms. - self.state.write_i64((i as i64).to_le()); + let value = (i as i64).to_le() as u64; + + // Cold path + #[cold] + #[inline(never)] + fn hash_value(state: &mut SipHasher128, value: u64) { + state.write_u8(0xFF); + state.write_u64(value); + } + + // `isize` values often seem to have a small (positive) numeric value in practice. + // To exploit this, if the value is small, we will hash a smaller amount of bytes. + // However, we cannot just skip the leading zero bytes, as that would produce the same hash + // e.g. if you hash two values that have the same bit pattern when they are swapped. + // See https://github.com/rust-lang/rust/pull/93014 for context. + // + // Therefore, we employ the following strategy: + // 1) When we encounter a value that fits within a single byte (the most common case), we + // hash just that byte. This is the most common case that is being optimized. However, we do + // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8). + // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding + // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two + // `isize`s that fit within a different amount of bytes, they should always produce a different + // byte stream for the hasher. + if value < 0xFF { + self.state.write_u8(value as u8); + } else { + hash_value(&mut self.state, value); + } } } diff --git a/compiler/rustc_data_structures/src/stable_hasher/tests.rs b/compiler/rustc_data_structures/src/stable_hasher/tests.rs index 31190363eb61b..a84ee3da438c7 100644 --- a/compiler/rustc_data_structures/src/stable_hasher/tests.rs +++ b/compiler/rustc_data_structures/src/stable_hasher/tests.rs @@ -39,7 +39,7 @@ fn test_hash_integers() { test_isize.hash(&mut h); // This depends on the hashing algorithm. See note at top of file. - let expected = (2736651863462566372, 8121090595289675650); + let expected = (1784307454142909076, 11471672289340283879); assert_eq!(h.finalize(), expected); } @@ -67,7 +67,7 @@ fn test_hash_isize() { test_isize.hash(&mut h); // This depends on the hashing algorithm. See note at top of file. - let expected = (14721296605626097289, 11385941877786388409); + let expected = (2789913510339652884, 674280939192711005); assert_eq!(h.finalize(), expected); } @@ -140,3 +140,23 @@ fn test_attribute_permutation() { test_type!(i64); test_type!(i128); } + +// Check that the `isize` hashing optimization does not produce the same hash when permuting two +// values. +#[test] +fn test_isize_compression() { + fn check_hash(a: u64, b: u64) { + let hash_a = hash(&(a as isize, b as isize)); + let hash_b = hash(&(b as isize, a as isize)); + assert_ne!( + hash_a, hash_b, + "The hash stayed the same when permuting values `{a}` and `{b}!", + ); + } + + check_hash(0xAA, 0xAAAA); + check_hash(0xFF, 0xFFFF); + check_hash(0xAAAA, 0xAAAAAA); + check_hash(0xAAAAAA, 0xAAAAAAAA); + check_hash(0xFF, 0xFFFFFFFFFFFFFFFF); +} diff --git a/src/test/debuginfo/function-names.rs b/src/test/debuginfo/function-names.rs index 61d5fc93cd2ad..ac9a02cce0481 100644 --- a/src/test/debuginfo/function-names.rs +++ b/src/test/debuginfo/function-names.rs @@ -37,7 +37,7 @@ // Const generic parameter // gdb-command:info functions -q function_names::const_generic_fn.* // gdb-check:[...]static fn function_names::const_generic_fn_bool(); -// gdb-check:[...]static fn function_names::const_generic_fn_non_int<{CONST#fe3cfa0214ac55c7}>(); +// gdb-check:[...]static fn function_names::const_generic_fn_non_int<{CONST#3fcd7c34c1555be6}>(); // gdb-check:[...]static fn function_names::const_generic_fn_signed_int<-7>(); // gdb-check:[...]static fn function_names::const_generic_fn_unsigned_int<14>(); @@ -76,7 +76,7 @@ // Const generic parameter // cdb-command:x a!function_names::const_generic_fn* // cdb-check:[...] a!function_names::const_generic_fn_bool (void) -// cdb-check:[...] a!function_names::const_generic_fn_non_int (void) +// cdb-check:[...] a!function_names::const_generic_fn_non_int (void) // cdb-check:[...] a!function_names::const_generic_fn_unsigned_int<14> (void) // cdb-check:[...] a!function_names::const_generic_fn_signed_int<-7> (void) diff --git a/src/test/ui/symbol-names/basic.legacy.stderr b/src/test/ui/symbol-names/basic.legacy.stderr index 5b343b637c316..429e28e5241dc 100644 --- a/src/test/ui/symbol-names/basic.legacy.stderr +++ b/src/test/ui/symbol-names/basic.legacy.stderr @@ -1,10 +1,10 @@ -error: symbol-name(_ZN5basic4main17h7c2c715a9b77648bE) +error: symbol-name(_ZN5basic4main17h611df9c6948c15f7E) --> $DIR/basic.rs:8:1 | LL | #[rustc_symbol_name] | ^^^^^^^^^^^^^^^^^^^^ -error: demangling(basic::main::h7c2c715a9b77648b) +error: demangling(basic::main::h611df9c6948c15f7) --> $DIR/basic.rs:8:1 | LL | #[rustc_symbol_name] diff --git a/src/test/ui/symbol-names/issue-60925.legacy.stderr b/src/test/ui/symbol-names/issue-60925.legacy.stderr index dbeab457194ea..0e36747fb8071 100644 --- a/src/test/ui/symbol-names/issue-60925.legacy.stderr +++ b/src/test/ui/symbol-names/issue-60925.legacy.stderr @@ -1,10 +1,10 @@ -error: symbol-name(_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h419983d0842a72aeE) +error: symbol-name(_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h5425dadb5b1e5fb6E) --> $DIR/issue-60925.rs:21:9 | LL | #[rustc_symbol_name] | ^^^^^^^^^^^^^^^^^^^^ -error: demangling(issue_60925::foo::Foo::foo::h419983d0842a72ae) +error: demangling(issue_60925::foo::Foo::foo::h5425dadb5b1e5fb6) --> $DIR/issue-60925.rs:21:9 | LL | #[rustc_symbol_name]