diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 4eb24d71009aa..66a7a2e090ae6 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -195,6 +195,7 @@ pub enum CodegenErrors { EmptyVersionNumber, EncodingVersionMismatch { version_array: String, rlink_version: u32 }, RustcVersionMismatch { rustc_version: String }, + CorruptFile, } pub fn provide(providers: &mut Providers) { @@ -265,7 +266,9 @@ impl CodegenResults { }); } - let mut decoder = MemDecoder::new(&data[4..], 0); + let Ok(mut decoder) = MemDecoder::new(&data[4..], 0) else { + return Err(CodegenErrors::CorruptFile); + }; let rustc_version = decoder.read_str(); if rustc_version != sess.cfg_version { return Err(CodegenErrors::RustcVersionMismatch { diff --git a/compiler/rustc_driver_impl/messages.ftl b/compiler/rustc_driver_impl/messages.ftl index 5b39248302e7a..31837e0176433 100644 --- a/compiler/rustc_driver_impl/messages.ftl +++ b/compiler/rustc_driver_impl/messages.ftl @@ -10,6 +10,8 @@ driver_impl_ice_path_error = the ICE couldn't be written to `{$path}`: {$error} driver_impl_ice_path_error_env = the environment variable `RUSTC_ICE` is set to `{$env_var}` driver_impl_ice_version = rustc {$version} running on {$triple} +driver_impl_rlink_corrupt_file = corrupt metadata encountered in `{$file}` + driver_impl_rlink_empty_version_number = The input does not contain version number driver_impl_rlink_encoding_version_mismatch = .rlink file was produced with encoding version `{$version_array}`, but the current version is `{$rlink_version}` diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index ba6b9ef078467..5532eff7be661 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -96,7 +96,7 @@ mod signal_handler { use crate::session_diagnostics::{ RLinkEmptyVersionNumber, RLinkEncodingVersionMismatch, RLinkRustcVersionMismatch, - RLinkWrongFileType, RlinkNotAFile, RlinkUnableToRead, + RLinkWrongFileType, RlinkCorruptFile, RlinkNotAFile, RlinkUnableToRead, }; rustc_fluent_macro::fluent_messages! { "../messages.ftl" } @@ -645,8 +645,7 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) { match err { CodegenErrors::WrongFileType => dcx.emit_fatal(RLinkWrongFileType), CodegenErrors::EmptyVersionNumber => dcx.emit_fatal(RLinkEmptyVersionNumber), - CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => sess - .dcx() + CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => dcx .emit_fatal(RLinkEncodingVersionMismatch { version_array, rlink_version }), CodegenErrors::RustcVersionMismatch { rustc_version } => { dcx.emit_fatal(RLinkRustcVersionMismatch { @@ -654,6 +653,9 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) { current_version: sess.cfg_version, }) } + CodegenErrors::CorruptFile => { + dcx.emit_fatal(RlinkCorruptFile { file }); + } }; } }; diff --git a/compiler/rustc_driver_impl/src/session_diagnostics.rs b/compiler/rustc_driver_impl/src/session_diagnostics.rs index 1a9683e840afd..449878f28c4e4 100644 --- a/compiler/rustc_driver_impl/src/session_diagnostics.rs +++ b/compiler/rustc_driver_impl/src/session_diagnostics.rs @@ -32,6 +32,12 @@ pub(crate) struct RLinkRustcVersionMismatch<'a> { #[diag(driver_impl_rlink_no_a_file)] pub(crate) struct RlinkNotAFile; +#[derive(Diagnostic)] +#[diag(driver_impl_rlink_corrupt_file)] +pub(crate) struct RlinkCorruptFile<'a> { + pub file: &'a std::path::Path, +} + #[derive(Diagnostic)] #[diag(driver_impl_ice)] pub(crate) struct Ice; diff --git a/compiler/rustc_incremental/messages.ftl b/compiler/rustc_incremental/messages.ftl index e74173b24a97b..de2177ebb6e0f 100644 --- a/compiler/rustc_incremental/messages.ftl +++ b/compiler/rustc_incremental/messages.ftl @@ -21,6 +21,8 @@ incremental_cargo_help_2 = incremental_copy_workproduct_to_cache = error copying object file `{$from}` to incremental directory as `{$to}`: {$err} +incremental_corrupt_file = corrupt incremental compilation artifact found at `{$path}`. This file will automatically be ignored and deleted. If you see this message repeatedly or can provoke it without manually manipulating the compiler's artifacts, please file an issue. The incremental compilation system relies on hardlinks and filesystem locks behaving correctly, and may not deal well with OS crashes, so whatever information you can provide about your filesystem or other state may be very relevant. + incremental_create_dep_graph = failed to create dependency graph at `{$path}`: {$err} incremental_create_incr_comp_dir = diff --git a/compiler/rustc_incremental/src/errors.rs b/compiler/rustc_incremental/src/errors.rs index 61bb0353a9f4b..e94a7fb876bbe 100644 --- a/compiler/rustc_incremental/src/errors.rs +++ b/compiler/rustc_incremental/src/errors.rs @@ -306,3 +306,9 @@ pub struct DeleteWorkProduct<'a> { pub path: &'a Path, pub err: std::io::Error, } + +#[derive(Diagnostic)] +#[diag(incremental_corrupt_file)] +pub struct CorruptFile<'a> { + pub path: &'a Path, +} diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs index 26aaa24771fe8..9e6ce06678513 100644 --- a/compiler/rustc_incremental/src/persist/load.rs +++ b/compiler/rustc_incremental/src/persist/load.rs @@ -115,7 +115,12 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc, WorkPr if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result { // Decode the list of work_products - let mut work_product_decoder = MemDecoder::new(&work_products_data[..], start_pos); + let Ok(mut work_product_decoder) = + MemDecoder::new(&work_products_data[..], start_pos) + else { + sess.dcx().emit_warn(errors::CorruptFile { path: &work_products_path }); + return LoadResult::DataOutOfDate; + }; let work_products: Vec = Decodable::decode(&mut work_product_decoder); @@ -145,7 +150,10 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc, WorkPr LoadResult::DataOutOfDate => LoadResult::DataOutOfDate, LoadResult::LoadDepGraph(path, err) => LoadResult::LoadDepGraph(path, err), LoadResult::Ok { data: (bytes, start_pos) } => { - let mut decoder = MemDecoder::new(&bytes, start_pos); + let Ok(mut decoder) = MemDecoder::new(&bytes, start_pos) else { + sess.dcx().emit_warn(errors::CorruptFile { path: &path }); + return LoadResult::DataOutOfDate; + }; let prev_commandline_args_hash = u64::decode(&mut decoder); if prev_commandline_args_hash != expected_hash { @@ -181,9 +189,14 @@ pub fn load_query_result_cache(sess: &Session) -> Option> { let _prof_timer = sess.prof.generic_activity("incr_comp_load_query_result_cache"); - match load_data(&query_cache_path(sess), sess) { + let path = query_cache_path(sess); + match load_data(&path, sess) { LoadResult::Ok { data: (bytes, start_pos) } => { - Some(OnDiskCache::new(sess, bytes, start_pos)) + let cache = OnDiskCache::new(sess, bytes, start_pos).unwrap_or_else(|()| { + sess.dcx().emit_warn(errors::CorruptFile { path: &path }); + OnDiskCache::new_empty(sess.source_map()) + }); + Some(cache) } _ => Some(OnDiskCache::new_empty(sess.source_map())), } diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs index 7de03be6da627..6ff19974c1e79 100644 --- a/compiler/rustc_metadata/src/locator.rs +++ b/compiler/rustc_metadata/src/locator.rs @@ -853,7 +853,12 @@ fn get_metadata_section<'p>( slice_owned(mmap, Deref::deref) } }; - let blob = MetadataBlob(raw_bytes); + let Ok(blob) = MetadataBlob::new(raw_bytes) else { + return Err(MetadataError::LoadFailure(format!( + "corrupt metadata encountered in {}", + filename.display() + ))); + }; match blob.check_compatibility(cfg_version) { Ok(()) => Ok(blob), Err(None) => Err(MetadataError::LoadFailure(format!( diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index bb68c6eaf092e..f91e121a240ed 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -40,10 +40,9 @@ use rustc_span::hygiene::HygieneDecodeContext; mod cstore_impl; /// A reference to the raw binary version of crate metadata. -/// A `MetadataBlob` internally is just a reference counted pointer to -/// the actual data, so cloning it is cheap. -#[derive(Clone)] -pub(crate) struct MetadataBlob(pub(crate) OwnedSlice); +/// This struct applies [`MemDecoder`]'s validation when constructed +/// so that later constructions are guaranteed to succeed. +pub(crate) struct MetadataBlob(OwnedSlice); impl std::ops::Deref for MetadataBlob { type Target = [u8]; @@ -54,6 +53,19 @@ impl std::ops::Deref for MetadataBlob { } } +impl MetadataBlob { + /// Runs the [`MemDecoder`] validation and if it passes, constructs a new [`MetadataBlob`]. + pub fn new(slice: OwnedSlice) -> Result { + if MemDecoder::new(&slice, 0).is_ok() { Ok(Self(slice)) } else { Err(()) } + } + + /// Since this has passed the validation of [`MetadataBlob::new`], this returns bytes which are + /// known to pass the [`MemDecoder`] validation. + pub fn bytes(&self) -> &OwnedSlice { + &self.0 + } +} + /// A map from external crate numbers (as decoded from some crate file) to /// local crate numbers (as generated during this session). Each external /// crate may refer to types in other external crates, and each has their @@ -165,7 +177,14 @@ pub(super) trait Metadata<'a, 'tcx>: Copy { fn decoder(self, pos: usize) -> DecodeContext<'a, 'tcx> { let tcx = self.tcx(); DecodeContext { - opaque: MemDecoder::new(self.blob(), pos), + // FIXME: This unwrap should never panic because we check that it won't when creating + // `MetadataBlob`. Ideally we'd just have a `MetadataDecoder` and hand out subslices of + // it as we do elsewhere in the compiler using `MetadataDecoder::split_at`. But we own + // the data for the decoder so holding onto the `MemDecoder` too would make us a + // self-referential struct which is downright goofy because `MetadataBlob` is already + // self-referential. Probably `MemDecoder` should contain an `OwnedSlice`, but that + // demands a significant refactoring due to our crate graph. + opaque: MemDecoder::new(self.blob(), pos).unwrap(), cdata: self.cdata(), blob: self.blob(), sess: self.sess().or(tcx.map(|tcx| tcx.sess)), @@ -393,7 +412,7 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> { where F: FnOnce(&mut Self) -> R, { - let new_opaque = MemDecoder::new(self.opaque.data(), pos); + let new_opaque = self.opaque.split_at(pos); let old_opaque = mem::replace(&mut self.opaque, new_opaque); let old_state = mem::replace(&mut self.lazy_state, LazyState::NoNode); let r = f(self); diff --git a/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs index 9950bc1c31f70..861bf6b2769e9 100644 --- a/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs +++ b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs @@ -48,7 +48,7 @@ impl<'a, 'tcx> Decodable> for DefPathHashMapRef<'static> fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefPathHashMapRef<'static> { let len = d.read_usize(); let pos = d.position(); - let o = d.blob().clone().0.slice(|blob| &blob[pos..pos + len]); + let o = d.blob().bytes().clone().slice(|blob| &blob[pos..pos + len]); // Although we already have the data we need via the `OwnedSlice`, we still need // to advance the `DecodeContext`'s position so it's in a valid state after diff --git a/compiler/rustc_middle/src/query/on_disk_cache.rs b/compiler/rustc_middle/src/query/on_disk_cache.rs index 2dcb58729ff96..941911c223090 100644 --- a/compiler/rustc_middle/src/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/query/on_disk_cache.rs @@ -154,24 +154,25 @@ impl EncodedSourceFileId { impl<'sess> OnDiskCache<'sess> { /// Creates a new `OnDiskCache` instance from the serialized data in `data`. - pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self { - debug_assert!(sess.opts.incremental.is_some()); - - // Wrap in a scope so we can borrow `data`. - let footer: Footer = { - let mut decoder = MemDecoder::new(&data, start_pos); - - // Decode the *position* of the footer, which can be found in the - // last 8 bytes of the file. - let footer_pos = decoder - .with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| { - IntEncodedWithFixedSize::decode(decoder).0 as usize - }); - // Decode the file footer, which contains all the lookup tables, etc. - decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER)) - }; + /// + /// The serialized cache has some basic integrity checks, if those checks indicate that the + /// on-disk data is corrupt, an error is returned. + pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Result { + assert!(sess.opts.incremental.is_some()); + + let mut decoder = MemDecoder::new(&data, start_pos)?; + + // Decode the *position* of the footer, which can be found in the + // last 8 bytes of the file. + let footer_pos = decoder + .with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| { + IntEncodedWithFixedSize::decode(decoder).0 as usize + }); + // Decode the file footer, which contains all the lookup tables, etc. + let footer: Footer = + decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER)); - Self { + Ok(Self { serialized_data: RwLock::new(Some(data)), file_index_to_stable_id: footer.file_index_to_stable_id, file_index_to_file: Default::default(), @@ -184,7 +185,7 @@ impl<'sess> OnDiskCache<'sess> { expn_data: footer.expn_data, foreign_expn_data: footer.foreign_expn_data, hygiene_context: Default::default(), - } + }) } pub fn new_empty(source_map: &'sess SourceMap) -> Self { @@ -437,7 +438,8 @@ impl<'sess> OnDiskCache<'sess> { let serialized_data = self.serialized_data.read(); let mut decoder = CacheDecoder { tcx, - opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()), + opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()) + .unwrap(), source_map: self.source_map, file_index_to_file: &self.file_index_to_file, file_index_to_stable_id: &self.file_index_to_stable_id, @@ -558,7 +560,7 @@ impl<'a, 'tcx> TyDecoder for CacheDecoder<'a, 'tcx> { { debug_assert!(pos < self.opaque.len()); - let new_opaque = MemDecoder::new(self.opaque.data(), pos); + let new_opaque = self.opaque.split_at(pos); let old_opaque = mem::replace(&mut self.opaque, new_opaque); let r = f(self); self.opaque = old_opaque; diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index b426bb888f460..8e91d9dd60b3c 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -182,15 +182,13 @@ impl SerializedDepGraph { pub fn decode(d: &mut MemDecoder<'_>) -> Arc { // The last 16 bytes are the node count and edge count. debug!("position: {:?}", d.position()); - let (node_count, edge_count, graph_size) = - d.with_position(d.len() - 3 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| { + let (node_count, edge_count) = + d.with_position(d.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| { debug!("position: {:?}", d.position()); let node_count = IntEncodedWithFixedSize::decode(d).0 as usize; let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize; - let graph_size = IntEncodedWithFixedSize::decode(d).0 as usize; - (node_count, edge_count, graph_size) + (node_count, edge_count) }); - assert_eq!(d.len(), graph_size); debug!("position: {:?}", d.position()); debug!(?node_count, ?edge_count); @@ -606,8 +604,6 @@ impl EncoderState { debug!("position: {:?}", encoder.position()); IntEncodedWithFixedSize(node_count).encode(&mut encoder); IntEncodedWithFixedSize(edge_count).encode(&mut encoder); - let graph_size = encoder.position() + IntEncodedWithFixedSize::ENCODED_SIZE; - IntEncodedWithFixedSize(graph_size as u64).encode(&mut encoder); debug!("position: {:?}", encoder.position()); // Drop the encoder so that nothing is written after the counts. let result = encoder.finish(); diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index eec83c02d3537..1dcb69920d73e 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -17,6 +17,8 @@ use crate::int_overflow::DebugStrictAdd; pub type FileEncodeResult = Result; +pub const MAGIC_END_BYTES: &[u8] = b"rust-end-file"; + /// The size of the buffer in `FileEncoder`. const BUF_SIZE: usize = 8192; @@ -181,6 +183,7 @@ impl FileEncoder { } pub fn finish(&mut self) -> FileEncodeResult { + self.write_all(MAGIC_END_BYTES); self.flush(); #[cfg(debug_assertions)] { @@ -261,15 +264,18 @@ pub struct MemDecoder<'a> { impl<'a> MemDecoder<'a> { #[inline] - pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> { + pub fn new(data: &'a [u8], position: usize) -> Result, ()> { + let data = data.strip_suffix(MAGIC_END_BYTES).ok_or(())?; let Range { start, end } = data.as_ptr_range(); - MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData } + Ok(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }) } #[inline] - pub fn data(&self) -> &'a [u8] { - // SAFETY: This recovers the original slice, only using members we never modify. - unsafe { std::slice::from_raw_parts(self.start, self.len()) } + pub fn split_at(&self, position: usize) -> MemDecoder<'a> { + assert!(position <= self.len()); + // SAFETY: We checked above that this offset is within the original slice + let current = unsafe { self.start.add(position) }; + MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData } } #[inline] diff --git a/compiler/rustc_serialize/tests/leb128.rs b/compiler/rustc_serialize/tests/leb128.rs index dc9b32a968b52..fafe4b91a95d3 100644 --- a/compiler/rustc_serialize/tests/leb128.rs +++ b/compiler/rustc_serialize/tests/leb128.rs @@ -1,4 +1,6 @@ use rustc_serialize::leb128::*; +use rustc_serialize::opaque::MemDecoder; +use rustc_serialize::opaque::MAGIC_END_BYTES; use rustc_serialize::Decoder; macro_rules! impl_test_unsigned_leb128 { @@ -25,13 +27,15 @@ macro_rules! impl_test_unsigned_leb128 { let n = $write_fn_name(&mut buf, x); stream.extend(&buf[..n]); } + let stream_end = stream.len(); + stream.extend(MAGIC_END_BYTES); - let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0); + let mut decoder = MemDecoder::new(&stream, 0).unwrap(); for &expected in &values { let actual = $read_fn_name(&mut decoder); assert_eq!(expected, actual); } - assert_eq!(stream.len(), decoder.position()); + assert_eq!(stream_end, decoder.position()); } }; } @@ -72,13 +76,15 @@ macro_rules! impl_test_signed_leb128 { let n = $write_fn_name(&mut buf, x); stream.extend(&buf[..n]); } + let stream_end = stream.len(); + stream.extend(MAGIC_END_BYTES); - let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0); + let mut decoder = MemDecoder::new(&stream, 0).unwrap(); for &expected in &values { let actual = $read_fn_name(&mut decoder); assert_eq!(expected, actual); } - assert_eq!(stream.len(), decoder.position()); + assert_eq!(stream_end, decoder.position()); } }; } diff --git a/compiler/rustc_serialize/tests/opaque.rs b/compiler/rustc_serialize/tests/opaque.rs index 45ff85f38d2bf..833151d82be35 100644 --- a/compiler/rustc_serialize/tests/opaque.rs +++ b/compiler/rustc_serialize/tests/opaque.rs @@ -42,7 +42,7 @@ fn check_round_trip< encoder.finish().unwrap(); let data = fs::read(&tmpfile).unwrap(); - let mut decoder = MemDecoder::new(&data[..], 0); + let mut decoder = MemDecoder::new(&data[..], 0).unwrap(); for value in values { let decoded = Decodable::decode(&mut decoder); assert_eq!(value, decoded); diff --git a/src/librustdoc/scrape_examples.rs b/src/librustdoc/scrape_examples.rs index 9c9b386edda7c..e9b380fdeac66 100644 --- a/src/librustdoc/scrape_examples.rs +++ b/src/librustdoc/scrape_examples.rs @@ -344,7 +344,9 @@ pub(crate) fn load_call_locations( Ok(bytes) => bytes, Err(e) => dcx.fatal(format!("failed to load examples: {e}")), }; - let mut decoder = MemDecoder::new(&bytes, 0); + let Ok(mut decoder) = MemDecoder::new(&bytes, 0) else { + dcx.fatal(format!("Corrupt metadata encountered in {path}")) + }; let calls = AllCallLocations::decode(&mut decoder); for (function, fn_calls) in calls.into_iter() {