Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let a portion of DefPathHash uniquely identify the DefPath's crate. #81635

Merged
merged 5 commits into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion compiler/rustc_ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ pub mod util {

pub mod ast;
pub mod attr;
pub mod crate_disambiguator;
pub mod entry;
pub mod expand;
pub mod mut_visit;
Expand Down
28 changes: 25 additions & 3 deletions compiler/rustc_data_structures/src/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,30 @@ use std::hash::{Hash, Hasher};
use std::mem::{self, MaybeUninit};

#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)]
#[repr(C)]
pub struct Fingerprint(u64, u64);

impl Fingerprint {
pub const ZERO: Fingerprint = Fingerprint(0, 0);

#[inline]
pub fn new(_0: u64, _1: u64) -> Fingerprint {
Fingerprint(_0, _1)
}

#[inline]
pub fn from_smaller_hash(hash: u64) -> Fingerprint {
Fingerprint(hash, hash)
}

#[inline]
pub fn to_smaller_hash(&self) -> u64 {
self.0
// Even though both halves of the fingerprint are expected to be good
// quality hash values, let's still combine the two values because the
// Fingerprints in DefPathHash have the StableCrateId portion which is
// the same for all DefPathHashes from the same crate. Combining the
// two halfs makes sure we get a good quality hash in such cases too.
self.0.wrapping_mul(3).wrapping_add(self.1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. The previous version of the code had an invariant where for all u64 h, Fingerprint::from_smaller_hash(h).to_smaller_hash() == h, and this new version does not support that invariant (if I understand correctly).

But I suppose that is also ... the point? (One workaround would be to make from_smaller_hash set self.0 to 0. But I would expect that to have bad fallout elsewhere...)

I don't know if that invariant matters, but I'd be careful about breaking it, since its the kind of thing I would expect to hold...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I reviewed the code, and from_smaller_hash is only called in one place, on local_id.as_u32(). I don't think we're making use of the invariant that I wrote above.

}

#[inline]
Expand Down Expand Up @@ -92,8 +103,19 @@ impl<H: Hasher> FingerprintHasher for H {
impl FingerprintHasher for crate::unhash::Unhasher {
#[inline]
fn write_fingerprint(&mut self, fingerprint: &Fingerprint) {
// `Unhasher` only wants a single `u64`
self.write_u64(fingerprint.0);
// Even though both halves of the fingerprint are expected to be good
// quality hash values, let's still combine the two values because the
// Fingerprints in DefPathHash have the StableCrateId portion which is
// the same for all DefPathHashes from the same crate. Combining the
// two halfs makes sure we get a good quality hash in such cases too.
//
// Since `Unhasher` is used only in the context of HashMaps, it is OK
// to combine the two components in an order-independent way (which is
// cheaper than the more robust Fingerprint::to_smaller_hash()). For
// HashMaps we don't really care if Fingerprint(x,y) and
// Fingerprint(y, x) result in the same hash value. Collision
// probability will still be much better than with FxHash.
self.write_u64(fingerprint.0.wrapping_add(fingerprint.1));
wesleywiser marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
71 changes: 51 additions & 20 deletions compiler/rustc_hir/src/definitions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
//! expressions) that are mostly just leftovers.

pub use crate::def_id::DefPathHash;
use crate::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE};
use crate::def_id::{
CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, CRATE_DEF_INDEX, LOCAL_CRATE,
};
use crate::hir;

use rustc_ast::crate_disambiguator::CrateDisambiguator;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::StableHasher;
use rustc_data_structures::unhash::UnhashMap;
use rustc_index::vec::IndexVec;
use rustc_span::crate_disambiguator::CrateDisambiguator;
use rustc_span::hygiene::ExpnId;
use rustc_span::symbol::{kw, sym, Symbol};

Expand All @@ -27,6 +30,7 @@ use tracing::debug;
pub struct DefPathTable {
index_to_key: IndexVec<DefIndex, DefKey>,
def_path_hashes: IndexVec<DefIndex, DefPathHash>,
def_path_hash_to_index: UnhashMap<DefPathHash, DefIndex>,
}

impl DefPathTable {
Expand All @@ -39,6 +43,35 @@ impl DefPathTable {
};
self.def_path_hashes.push(def_path_hash);
debug_assert!(self.def_path_hashes.len() == self.index_to_key.len());

// Check for hash collisions of DefPathHashes. These should be
// exceedingly rare.
if let Some(existing) = self.def_path_hash_to_index.insert(def_path_hash, index) {
let def_path1 = DefPath::make(LOCAL_CRATE, existing, |idx| self.def_key(idx));
let def_path2 = DefPath::make(LOCAL_CRATE, index, |idx| self.def_key(idx));

// Continuing with colliding DefPathHashes can lead to correctness
// issues. We must abort compilation.
wesleywiser marked this conversation as resolved.
Show resolved Hide resolved
//
// The likelyhood of such a collision is very small, so actually
// running into one could be indicative of a poor hash function
// being used.
//
// See the documentation for DefPathHash for more information.
panic!(
"found DefPathHash collsion between {:?} and {:?}. \
Compilation cannot continue.",
def_path1, def_path2
);
}

// Assert that all DefPathHashes correctly contain the local crate's
// StableCrateId
#[cfg(debug_assertions)]
if let Some(root) = self.def_path_hashes.get(CRATE_DEF_INDEX) {
assert!(def_path_hash.stable_crate_id() == root.stable_crate_id());
}

index
}

Expand Down Expand Up @@ -108,13 +141,10 @@ pub struct DefKey {
}

impl DefKey {
fn compute_stable_hash(&self, parent_hash: DefPathHash) -> DefPathHash {
pub(crate) fn compute_stable_hash(&self, parent: DefPathHash) -> DefPathHash {
let mut hasher = StableHasher::new();

// We hash a `0u8` here to disambiguate between regular `DefPath` hashes,
// and the special "root_parent" below.
0u8.hash(&mut hasher);
parent_hash.hash(&mut hasher);
parent.hash(&mut hasher);

let DisambiguatedDefPathData { ref data, disambiguator } = self.disambiguated_data;

Expand All @@ -127,19 +157,13 @@ impl DefKey {

disambiguator.hash(&mut hasher);

DefPathHash(hasher.finish())
}
let local_hash: u64 = hasher.finish();

fn root_parent_stable_hash(
crate_name: &str,
crate_disambiguator: CrateDisambiguator,
) -> DefPathHash {
let mut hasher = StableHasher::new();
// Disambiguate this from a regular `DefPath` hash; see `compute_stable_hash()` above.
1u8.hash(&mut hasher);
crate_name.hash(&mut hasher);
crate_disambiguator.hash(&mut hasher);
DefPathHash(hasher.finish())
// Construct the new DefPathHash, making sure that the `crate_id`
// portion of the hash is properly copied from the parent. This way the
// `crate_id` part will be recursively propagated from the root to all
// DefPathHashes in this DefPathTable.
DefPathHash::new(parent.stable_crate_id(), local_hash)
}
}

Expand Down Expand Up @@ -295,6 +319,12 @@ impl Definitions {
self.table.def_path_hash(id.local_def_index)
}

#[inline]
pub fn def_path_hash_to_def_id(&self, def_path_hash: DefPathHash) -> LocalDefId {
let local_def_index = self.table.def_path_hash_to_index[&def_path_hash];
LocalDefId { local_def_index }
}

/// Returns the path from the crate root to `index`. The root
/// nodes are not included in the path (i.e., this will be an
/// empty vector for the crate root). For an inlined item, this
Expand Down Expand Up @@ -332,7 +362,8 @@ impl Definitions {
},
};

let parent_hash = DefKey::root_parent_stable_hash(crate_name, crate_disambiguator);
let stable_crate_id = StableCrateId::new(crate_name, crate_disambiguator);
let parent_hash = DefPathHash::new(stable_crate_id, 0);
let def_path_hash = key.compute_stable_hash(parent_hash);

// Create the root definition.
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_hir/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ mod stable_hash_impls;
mod target;
pub mod weak_lang_items;

#[cfg(test)]
mod tests;

pub use hir::*;
pub use hir_id::*;
pub use lang_items::{LangItem, LanguageItems};
Expand Down
39 changes: 39 additions & 0 deletions compiler/rustc_hir/src/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use crate::definitions::{DefKey, DefPathData, DisambiguatedDefPathData};
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_span::crate_disambiguator::CrateDisambiguator;
use rustc_span::def_id::{DefPathHash, StableCrateId};

#[test]
fn def_path_hash_depends_on_crate_id() {
// This test makes sure that *both* halves of a DefPathHash depend on
// the crate-id of the defining crate. This is a desirable property
// because the crate-id can be more easily changed than the DefPath
// of an item, so, in the case of a crate-local DefPathHash collision,
// the user can simply "role the dice again" for all DefPathHashes in
// the crate by changing the crate disambiguator (e.g. via bumping the
// crate's version number).

let d0 = CrateDisambiguator::from(Fingerprint::new(12, 34));
let d1 = CrateDisambiguator::from(Fingerprint::new(56, 78));

let h0 = mk_test_hash("foo", d0);
let h1 = mk_test_hash("foo", d1);

assert_ne!(h0.stable_crate_id(), h1.stable_crate_id());
assert_ne!(h0.local_hash(), h1.local_hash());

fn mk_test_hash(crate_name: &str, crate_disambiguator: CrateDisambiguator) -> DefPathHash {
let stable_crate_id = StableCrateId::new(crate_name, crate_disambiguator);
let parent_hash = DefPathHash::new(stable_crate_id, 0);

let key = DefKey {
parent: None,
disambiguated_data: DisambiguatedDefPathData {
data: DefPathData::CrateRoot,
disambiguator: 0,
},
};

key.compute_stable_hash(parent_hash)
}
}
30 changes: 28 additions & 2 deletions compiler/rustc_metadata/src/creader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ use crate::rmeta::{CrateDep, CrateMetadata, CrateNumMap, CrateRoot, MetadataBlob

use rustc_ast::expand::allocator::AllocatorKind;
use rustc_ast::{self as ast, *};
use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::svh::Svh;
use rustc_data_structures::sync::Lrc;
use rustc_expand::base::SyntaxExtension;
use rustc_hir::def_id::{CrateNum, LocalDefId, LOCAL_CRATE};
use rustc_hir::def_id::{CrateNum, LocalDefId, StableCrateId, LOCAL_CRATE};
use rustc_hir::definitions::Definitions;
use rustc_index::vec::IndexVec;
use rustc_middle::middle::cstore::{CrateDepKind, CrateSource, ExternCrate};
Expand Down Expand Up @@ -40,6 +40,10 @@ pub struct CStore {
allocator_kind: Option<AllocatorKind>,
/// This crate has a `#[global_allocator]` item.
has_global_allocator: bool,

/// This map is used to verify we get no hash conflicts between
/// `StableCrateId` values.
stable_crate_ids: FxHashMap<StableCrateId, CrateNum>,
}

pub struct CrateLoader<'a> {
Expand Down Expand Up @@ -192,6 +196,11 @@ impl<'a> CrateLoader<'a> {
metadata_loader: &'a MetadataLoaderDyn,
local_crate_name: &str,
) -> Self {
let local_crate_stable_id =
StableCrateId::new(local_crate_name, sess.local_crate_disambiguator());
let mut stable_crate_ids = FxHashMap::default();
stable_crate_ids.insert(local_crate_stable_id, LOCAL_CRATE);

CrateLoader {
sess,
metadata_loader,
Expand All @@ -205,6 +214,7 @@ impl<'a> CrateLoader<'a> {
injected_panic_runtime: None,
allocator_kind: None,
has_global_allocator: false,
stable_crate_ids,
},
used_extern_options: Default::default(),
}
Expand Down Expand Up @@ -311,6 +321,20 @@ impl<'a> CrateLoader<'a> {
res
}

fn verify_no_stable_crate_id_hash_conflicts(
&mut self,
root: &CrateRoot<'_>,
cnum: CrateNum,
) -> Result<(), CrateError> {
if let Some(existing) = self.cstore.stable_crate_ids.insert(root.stable_crate_id(), cnum) {
let crate_name0 = root.name();
let crate_name1 = self.cstore.get_crate_data(existing).name();
return Err(CrateError::StableCrateIdCollision(crate_name0, crate_name1));
}

Ok(())
}

fn register_crate(
&mut self,
host_lib: Option<Library>,
Expand All @@ -332,6 +356,8 @@ impl<'a> CrateLoader<'a> {
// Claim this crate number and cache it
let cnum = self.cstore.alloc_new_crate_num();

self.verify_no_stable_crate_id_hash_conflicts(&crate_root, cnum)?;

info!(
"register crate `{}` (cnum = {}. private_dep = {})",
crate_root.name(),
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_metadata/src/locator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,7 @@ crate enum CrateError {
MultipleMatchingCrates(Symbol, FxHashMap<Svh, Library>),
SymbolConflictsCurrent(Symbol),
SymbolConflictsOthers(Symbol),
StableCrateIdCollision(Symbol, Symbol),
DlOpen(String),
DlSym(String),
LocatorCombined(CombinedLocatorError),
Expand Down Expand Up @@ -970,6 +971,13 @@ impl CrateError {
`-C metadata`. This will result in symbol conflicts between the two.",
root_name,
),
CrateError::StableCrateIdCollision(crate_name0, crate_name1) => {
let msg = format!(
"found crates (`{}` and `{}`) with colliding StableCrateId values.",
crate_name0, crate_name1
);
sess.struct_span_err(span, &msg)
}
CrateError::DlOpen(s) | CrateError::DlSym(s) => sess.struct_span_err(span, &s),
CrateError::LocatorCombined(locator) => {
let crate_name = locator.crate_name;
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,10 @@ impl CrateRoot<'_> {
self.hash
}

crate fn stable_crate_id(&self) -> StableCrateId {
self.stable_crate_id
}

crate fn triple(&self) -> &TargetTriple {
&self.triple
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
disambiguator: tcx.sess.local_crate_disambiguator(),
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
panic_strategy: tcx.sess.panic_strategy(),
edition: tcx.sess.edition(),
has_global_allocator: tcx.has_global_allocator(LOCAL_CRATE),
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use rustc_data_structures::svh::Svh;
use rustc_data_structures::sync::MetadataRef;
use rustc_hir as hir;
use rustc_hir::def::{CtorKind, DefKind};
use rustc_hir::def_id::{DefId, DefIndex, DefPathHash};
use rustc_hir::def_id::{DefId, DefIndex, DefPathHash, StableCrateId};
use rustc_hir::definitions::DefKey;
use rustc_hir::lang_items;
use rustc_index::{bit_set::FiniteBitSet, vec::IndexVec};
Expand Down Expand Up @@ -203,6 +203,7 @@ crate struct CrateRoot<'tcx> {
extra_filename: String,
hash: Svh,
disambiguator: CrateDisambiguator,
stable_crate_id: StableCrateId,
panic_strategy: PanicStrategy,
edition: Edition,
has_global_allocator: bool,
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_session/src/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::parse::ParseSess;
use crate::search_paths::{PathKind, SearchPath};

pub use rustc_ast::attr::MarkedAttrs;
pub use rustc_ast::crate_disambiguator::CrateDisambiguator;
pub use rustc_ast::Attribute;
use rustc_data_structures::flock;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
Expand All @@ -23,6 +22,7 @@ use rustc_errors::json::JsonEmitter;
use rustc_errors::registry::Registry;
use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, DiagnosticId, ErrorReported};
use rustc_lint_defs::FutureBreakage;
pub use rustc_span::crate_disambiguator::CrateDisambiguator;
use rustc_span::edition::Edition;
use rustc_span::source_map::{FileLoader, MultiSpan, RealFileLoader, SourceMap, Span};
use rustc_span::{sym, SourceFileHashAlgorithm, Symbol};
Expand Down
Loading