diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index f1e7f87f56767..deae9686b622f 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -1056,11 +1056,12 @@ pub fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) -> // know that later). If we are not doing LTO, there is only one optimized // version of each module, so we re-use that. let dep_node = cgu.codegen_dep_node(tcx); - assert!( - !tcx.dep_graph.dep_node_exists(&dep_node), - "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.", - cgu.name() - ); + tcx.dep_graph.assert_nonexistent_node(dep_node, || { + format!( + "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.", + cgu.name() + ) + }); if tcx.try_mark_green(&dep_node) { // We can re-use either the pre- or the post-thinlto state. If no LTO is diff --git a/compiler/rustc_incremental/src/persist/save.rs b/compiler/rustc_incremental/src/persist/save.rs index 58a03cb8b30cd..ea1d07cbdc5b4 100644 --- a/compiler/rustc_incremental/src/persist/save.rs +++ b/compiler/rustc_incremental/src/persist/save.rs @@ -173,7 +173,7 @@ pub(crate) fn build_dep_graph( sess.opts.dep_tracking_hash(false).encode(&mut encoder); Some(DepGraph::new( - &sess.prof, + sess, prev_graph, prev_work_products, encoder, diff --git a/compiler/rustc_query_system/src/dep_graph/edges.rs b/compiler/rustc_query_system/src/dep_graph/edges.rs index 9a3763bd4eeb4..41ce64f11b78e 100644 --- a/compiler/rustc_query_system/src/dep_graph/edges.rs +++ b/compiler/rustc_query_system/src/dep_graph/edges.rs @@ -26,6 +26,13 @@ impl EdgesVec { Self::default() } + #[inline] + pub(crate) fn eval_always() -> Self { + let mut vec = EdgesVec::new(); + vec.push(DepNodeIndex::FOREVER_RED_NODE); + vec + } + #[inline] pub(crate) fn push(&mut self, edge: DepNodeIndex) { self.max = self.max.max(edge.as_u32()); diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index b6aa1d5a43bb8..fb97b1471af1d 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; -use rustc_data_structures::profiling::{QueryInvocationId, SelfProfilerRef}; +use rustc_data_structures::profiling::QueryInvocationId; use rustc_data_structures::sharded::{self, Sharded}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc}; @@ -16,6 +16,7 @@ use rustc_data_structures::unord::UnordMap; use rustc_index::IndexVec; use rustc_macros::{Decodable, Encodable}; use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; +use rustc_session::Session; use tracing::{debug, instrument}; #[cfg(debug_assertions)] use {super::debug::EdgeFilter, std::env}; @@ -119,7 +120,7 @@ where impl DepGraph { pub fn new( - profiler: &SelfProfilerRef, + session: &Session, prev_graph: Arc, prev_work_products: WorkProductMap, encoder: FileEncoder, @@ -129,7 +130,7 @@ impl DepGraph { let prev_graph_node_count = prev_graph.node_count(); let current = CurrentDepGraph::new( - profiler, + session, prev_graph_node_count, encoder, record_graph, @@ -345,21 +346,17 @@ impl DepGraphData { task: fn(Ctxt, A) -> R, hash_result: Option, &R) -> Fingerprint>, ) -> (R, DepNodeIndex) { - // If the following assertion triggers, it can have two reasons: - // 1. Something is wrong with DepNode creation, either here or - // in `DepGraph::try_mark_green()`. - // 2. Two distinct query keys get mapped to the same `DepNode` - // (see for example #48923). - assert!( - !self.dep_node_exists(&key), - "forcing query with already existing `DepNode`\n\ - - query-key: {arg:?}\n\ - - dep-node: {key:?}" - ); + self.assert_nonexistent_node(key, || { + format!( + "forcing query with already existing `DepNode`\n\ + - query-key: {arg:?}\n\ + - dep-node: {key:?}" + ) + }); let with_deps = |task_deps| D::with_deps(task_deps, || task(cx, arg)); let (result, edges) = if cx.dep_context().is_eval_always(key.kind) { - (with_deps(TaskDepsRef::EvalAlways), EdgesVec::new()) + (with_deps(TaskDepsRef::EvalAlways), EdgesVec::eval_always()) } else { let task_deps = Lock::new(TaskDeps { #[cfg(debug_assertions)] @@ -443,7 +440,31 @@ impl DepGraphData { hash: self.current.anon_id_seed.combine(hasher.finish()).into(), }; - self.current.intern_new_node(target_dep_node, task_deps, Fingerprint::ZERO) + // The DepNodes generated by the process above are not unique. 2 queries could + // have exactly the same dependencies. However, deserialization does not handle + // duplicated nodes, so we do the deduplication here directly. + // + // As anonymous nodes are a small quantity compared to the full dep-graph, the + // memory impact of this `anon_node_to_index` map remains tolerable, and helps + // us avoid useless growth of the graph with almost-equivalent nodes. + match self + .current + .anon_node_to_index + .get_shard_by_value(&target_dep_node) + .lock() + .entry(target_dep_node) + { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let dep_node_index = self.current.intern_new_node( + target_dep_node, + task_deps, + Fingerprint::ZERO, + ); + entry.insert(dep_node_index); + dep_node_index + } + } } }; @@ -607,20 +628,18 @@ impl DepGraph { } impl DepGraphData { - #[inline] - fn dep_node_index_of_opt(&self, dep_node: &DepNode) -> Option { - if let Some(prev_index) = self.previous.node_to_index_opt(dep_node) { - self.current.prev_index_to_index.lock()[prev_index] - } else { - self.current.new_node_to_index.lock_shard_by_value(dep_node).get(dep_node).copied() + fn assert_nonexistent_node( + &self, + _dep_node: DepNode, + _msg: impl FnOnce() -> S, + ) { + #[cfg(debug_assertions)] + if let Some(seen_dep_nodes) = &self.current.seen_dep_nodes { + let seen = seen_dep_nodes.lock().contains(&_dep_node); + assert!(!seen, "{}", _msg()); } } - #[inline] - fn dep_node_exists(&self, dep_node: &DepNode) -> bool { - self.dep_node_index_of_opt(dep_node).is_some() - } - fn node_color(&self, dep_node: &DepNode) -> Option { if let Some(prev_index) = self.previous.node_to_index_opt(dep_node) { self.colors.get(prev_index) @@ -653,11 +672,6 @@ impl DepGraphData { } impl DepGraph { - #[inline] - pub fn dep_node_exists(&self, dep_node: &DepNode) -> bool { - self.data.as_ref().is_some_and(|data| data.dep_node_exists(dep_node)) - } - /// Checks whether a previous work product exists for `v` and, if /// so, return the path that leads to it. Used to skip doing work. pub fn previous_work_product(&self, v: &WorkProductId) -> Option { @@ -734,7 +748,7 @@ impl DepGraphData { // in the previous compilation session too, so we can try to // mark it as green by recursively marking all of its // dependencies green. - self.try_mark_previous_green(qcx, prev_index, dep_node, None) + self.try_mark_previous_green(qcx, prev_index, None) .map(|dep_node_index| (prev_index, dep_node_index)) } } @@ -748,14 +762,14 @@ impl DepGraphData { frame: Option<&MarkFrame<'_>>, ) -> Option<()> { let dep_dep_node_color = self.colors.get(parent_dep_node_index); - let dep_dep_node = &self.previous.index_to_node(parent_dep_node_index); + let dep_dep_node = || self.previous.index_to_node(parent_dep_node_index); match dep_dep_node_color { Some(DepNodeColor::Green(_)) => { // This dependency has been marked as green before, we are // still fine and can continue with checking the other // dependencies. - debug!("dependency {dep_dep_node:?} was immediately green"); + debug!("dependency {:?} was immediately green", dep_dep_node()); return Some(()); } Some(DepNodeColor::Red) => { @@ -763,32 +777,25 @@ impl DepGraphData { // compared to the previous compilation session. We cannot // mark the DepNode as green and also don't need to bother // with checking any of the other dependencies. - debug!("dependency {dep_dep_node:?} was immediately red"); + debug!("dependency {:?} was immediately red", dep_dep_node()); return None; } None => {} } - // We don't know the state of this dependency. If it isn't - // an eval_always node, let's try to mark it green recursively. - if !qcx.dep_context().is_eval_always(dep_dep_node.kind) { - debug!( - "state of dependency {:?} ({}) is unknown, trying to mark it green", - dep_dep_node, dep_dep_node.hash, - ); + // We don't know the state of this dependency. Let's try to mark it green recursively. + debug!("state of dependency {:?} is unknown, trying to mark it green", dep_dep_node()); + let node_index = self.try_mark_previous_green(qcx, parent_dep_node_index, frame); - let node_index = - self.try_mark_previous_green(qcx, parent_dep_node_index, dep_dep_node, frame); - - if node_index.is_some() { - debug!("managed to MARK dependency {dep_dep_node:?} as green",); - return Some(()); - } + if node_index.is_some() { + debug!("managed to MARK dependency {:?} as green", dep_dep_node()); + return Some(()); } // We failed to mark it green, so we try to force the query. + let dep_dep_node = dep_dep_node(); debug!("trying to force dependency {dep_dep_node:?}"); - if !qcx.dep_context().try_force_from_dep_node(*dep_dep_node, frame) { + if !qcx.dep_context().try_force_from_dep_node(dep_dep_node, frame) { // The DepNode could not be forced. debug!("dependency {dep_dep_node:?} could not be forced"); return None; @@ -832,21 +839,17 @@ impl DepGraphData { &self, qcx: Qcx, prev_dep_node_index: SerializedDepNodeIndex, - dep_node: &DepNode, frame: Option<&MarkFrame<'_>>, ) -> Option { let frame = MarkFrame { index: prev_dep_node_index, parent: frame }; + let dep_node = || self.previous.index_to_node(prev_dep_node_index); #[cfg(not(parallel_compiler))] - { - debug_assert!(!self.dep_node_exists(dep_node)); - debug_assert!(self.colors.get(prev_dep_node_index).is_none()); - } - - // We never try to mark eval_always nodes as green - debug_assert!(!qcx.dep_context().is_eval_always(dep_node.kind)); - - debug_assert_eq!(self.previous.index_to_node(prev_dep_node_index), *dep_node); + self.assert_nonexistent_node(dep_node(), || { + format!("trying to mark existing {:?} as green", dep_node()) + }); + #[cfg(not(parallel_compiler))] + debug_assert!(self.colors.get(prev_dep_node_index).is_none()); let prev_deps = self.previous.edge_targets_from(prev_dep_node_index); @@ -874,8 +877,8 @@ impl DepGraphData { #[cfg(not(parallel_compiler))] debug_assert!( self.colors.get(prev_dep_node_index).is_none(), - "DepGraph::try_mark_previous_green() - Duplicate DepNodeColor \ - insertion for {dep_node:?}" + "DepGraph::try_mark_previous_green() - Duplicate DepNodeColor insertion for {:?}", + dep_node() ); if side_effects.maybe_any() { @@ -888,7 +891,7 @@ impl DepGraphData { // Multiple threads can all write the same color here self.colors.insert(prev_dep_node_index, DepNodeColor::Green(dep_node_index)); - debug!("successfully marked {dep_node:?} as green"); + debug!("successfully marked {:?} as green", dep_node()); Some(dep_node_index) } @@ -933,6 +936,18 @@ impl DepGraph { self.node_color(dep_node).is_some_and(|c| c.is_green()) } + pub fn assert_nonexistent_node( + &self, + dep_node: DepNode, + msg: impl FnOnce() -> S, + ) { + if cfg!(debug_assertions) + && let Some(data) = &self.data + { + data.assert_nonexistent_node(dep_node, msg) + } + } + /// This method loads all on-disk cacheable query results into memory, so /// they can be written out to the new cache file again. Most query results /// will already be in memory but in the case where we marked something as @@ -1038,24 +1053,24 @@ rustc_index::newtype_index! { /// largest in the compiler. /// /// For this reason, we avoid storing `DepNode`s more than once as map -/// keys. The `new_node_to_index` map only contains nodes not in the previous +/// keys. The `anon_node_to_index` map only contains nodes of anonymous queries not in the previous /// graph, and we map nodes in the previous graph to indices via a two-step /// mapping. `SerializedDepGraph` maps from `DepNode` to `SerializedDepNodeIndex`, /// and the `prev_index_to_index` vector (which is more compact and faster than /// using a map) maps from `SerializedDepNodeIndex` to `DepNodeIndex`. /// -/// This struct uses three locks internally. The `data`, `new_node_to_index`, +/// This struct uses three locks internally. The `data`, `anon_node_to_index`, /// and `prev_index_to_index` fields are locked separately. Operations that take /// a `DepNodeIndex` typically just access the `data` field. /// /// We only need to manipulate at most two locks simultaneously: -/// `new_node_to_index` and `data`, or `prev_index_to_index` and `data`. When -/// manipulating both, we acquire `new_node_to_index` or `prev_index_to_index` +/// `anon_node_to_index` and `data`, or `prev_index_to_index` and `data`. When +/// manipulating both, we acquire `anon_node_to_index` or `prev_index_to_index` /// first, and `data` second. pub(super) struct CurrentDepGraph { encoder: GraphEncoder, - new_node_to_index: Sharded>, prev_index_to_index: Lock>>, + anon_node_to_index: Sharded>, /// This is used to verify that fingerprints do not change between the creation of a node /// and its recomputation. @@ -1067,6 +1082,11 @@ pub(super) struct CurrentDepGraph { #[cfg(debug_assertions)] forbidden_edge: Option, + /// Used to verify the absence of hash collisions among DepNodes. + /// This field is only `Some` if the `-Z incremental_verify_ich` option is present. + #[cfg(debug_assertions)] + seen_dep_nodes: Option>>, + /// Anonymous `DepNode`s are nodes whose IDs we compute from the list of /// their edges. This has the beneficial side-effect that multiple anonymous /// nodes can be coalesced into one without changing the semantics of the @@ -1088,7 +1108,7 @@ pub(super) struct CurrentDepGraph { impl CurrentDepGraph { fn new( - profiler: &SelfProfilerRef, + session: &Session, prev_graph_node_count: usize, encoder: FileEncoder, record_graph: bool, @@ -1120,10 +1140,10 @@ impl CurrentDepGraph { prev_graph_node_count, record_graph, record_stats, - profiler, + &session.prof, previous, ), - new_node_to_index: Sharded::new(|| { + anon_node_to_index: Sharded::new(|| { FxHashMap::with_capacity_and_hasher( new_node_count_estimate / sharded::shards(), Default::default(), @@ -1135,6 +1155,13 @@ impl CurrentDepGraph { forbidden_edge, #[cfg(debug_assertions)] fingerprints: Lock::new(IndexVec::from_elem_n(None, new_node_count_estimate)), + #[cfg(debug_assertions)] + seen_dep_nodes: session.opts.unstable_opts.incremental_verify_ich.then(|| { + Lock::new(FxHashSet::with_capacity_and_hasher( + new_node_count_estimate, + Default::default(), + )) + }), total_read_count: AtomicU64::new(0), total_duplicate_read_count: AtomicU64::new(0), } @@ -1158,18 +1185,18 @@ impl CurrentDepGraph { edges: EdgesVec, current_fingerprint: Fingerprint, ) -> DepNodeIndex { - let dep_node_index = match self.new_node_to_index.lock_shard_by_value(&key).entry(key) { - Entry::Occupied(entry) => *entry.get(), - Entry::Vacant(entry) => { - let dep_node_index = self.encoder.send(key, current_fingerprint, edges); - entry.insert(dep_node_index); - dep_node_index - } - }; + let dep_node_index = self.encoder.send(key, current_fingerprint, edges); #[cfg(debug_assertions)] self.record_edge(dep_node_index, key, current_fingerprint); + #[cfg(debug_assertions)] + if let Some(ref seen_dep_nodes) = self.seen_dep_nodes { + if !seen_dep_nodes.lock().insert(key) { + panic!("Found duplicate dep-node {key:?}"); + } + } + dep_node_index } @@ -1232,11 +1259,9 @@ impl CurrentDepGraph { fn promote_node_and_deps_to_current( &self, - prev_graph: &SerializedDepGraph, + _prev_graph: &SerializedDepGraph, prev_index: SerializedDepNodeIndex, ) -> DepNodeIndex { - self.debug_assert_not_in_new_nodes(prev_graph, prev_index); - let mut prev_index_to_index = self.prev_index_to_index.lock(); match prev_index_to_index[prev_index] { @@ -1247,26 +1272,13 @@ impl CurrentDepGraph { #[cfg(debug_assertions)] self.record_edge( dep_node_index, - prev_graph.index_to_node(prev_index), - prev_graph.fingerprint_by_index(prev_index), + _prev_graph.index_to_node(prev_index), + _prev_graph.fingerprint_by_index(prev_index), ); dep_node_index } } } - - #[inline] - fn debug_assert_not_in_new_nodes( - &self, - prev_graph: &SerializedDepGraph, - prev_index: SerializedDepNodeIndex, - ) { - let node = &prev_graph.index_to_node(prev_index); - debug_assert!( - !self.new_node_to_index.lock_shard_by_value(node).contains_key(node), - "node from previous graph present in new node collection" - ); - } } #[derive(Debug, Clone, Copy)] @@ -1388,7 +1400,7 @@ fn panic_on_forbidden_read(data: &DepGraphData, dep_node_index: DepN if dep_node.is_none() { // Try to find it among the new nodes - for shard in data.current.new_node_to_index.lock_shards() { + for shard in data.current.anon_node_to_index.lock_shards() { if let Some((node, _)) = shard.iter().find(|(_, index)| **index == dep_node_index) { dep_node = Some(*node); break; diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index ff1c3431b7c52..d7b6065d78a64 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -252,8 +252,27 @@ impl SerializedDepGraph { .map(|_| UnhashMap::with_capacity_and_hasher(d.read_u32() as usize, Default::default())) .collect(); + let mut duplicates = Vec::new(); for (idx, node) in nodes.iter_enumerated() { - index[node.kind.as_usize()].insert(node.hash, idx); + if index[node.kind.as_usize()].insert(node.hash, idx).is_some() { + duplicates.push(node); + } + } + + // Creating the index detected a duplicated DepNode. + // + // If the new session presents us with a DepNode among those, we have no + // way to know which SerializedDepNodeIndex it corresponds to. To avoid + // making the wrong connection between a DepNodeIndex and a SerializedDepNodeIndex, + // we remove all the duplicates from the index. + // + // This way, when the new session presents us with a DepNode among the duplicates, + // we just create a new node with no counterpart in the previous graph. + // + // Red/green marking still works for those nodes, as that algorithm does not + // need to know about DepNode at all. + for node in duplicates { + index[node.kind.as_usize()].remove(&node.hash); } Arc::new(SerializedDepGraph { diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 4b87f5d62b21e..106629921c795 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1762,7 +1762,8 @@ options! { incremental_verify_ich: bool = (false, parse_bool, [UNTRACKED], "verify extended properties for incr. comp. (default: no): - hashes of green query instances - - hash collisions of query keys"), + - hash collisions of query keys + - hash collisions when creating dep-nodes"), inline_in_all_cgus: Option = (None, parse_opt_bool, [TRACKED], "control whether `#[inline]` functions are in all CGUs"), inline_llvm: bool = (true, parse_bool, [TRACKED], diff --git a/tests/incremental/issue-101518.rs b/tests/incremental/issue-101518.rs index 02f4f5d42e74d..d626b4eea1c17 100644 --- a/tests/incremental/issue-101518.rs +++ b/tests/incremental/issue-101518.rs @@ -1,4 +1,11 @@ -//@ revisions: cpass +// This test creates 2 constants that have the same contents but a different AllocId. +// When hashed, those two contants would have the same stable hash. From the point of view +// of the query system, the two calls to `try_destructure_mir_constant` would have the same +// `DepNode` but with different inputs. +// +// This test verifies that the query system does not ICE in such cases. +// +//@ revisions: cpass1 cpass2 #[derive(PartialEq, Eq)] struct Id<'a> { @@ -21,4 +28,7 @@ fn visit_struct2() { } } -fn main() {} +fn main() { + visit_struct(); + visit_struct2(); +}