Skip to content

Commit

Permalink
Rollup merge of #126930 - Xaeroxe:file-checksum-hint, r=chenyukang
Browse files Browse the repository at this point in the history
Add unstable support for outputting file checksums for use in cargo

Adds an unstable option that appends file checksums and expected lengths to the end of the dep-info file such that `cargo` can read and use these values as an alternative to file mtimes.

This PR powers the changes made in this cargo PR rust-lang/cargo#14137

Here's the tracking issue for the cargo feature rust-lang/cargo#14136.
  • Loading branch information
workingjubilee authored Oct 3, 2024
2 parents ad9c494 + 58c5ac4 commit 44f6275
Show file tree
Hide file tree
Showing 20 changed files with 337 additions and 28 deletions.
26 changes: 26 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,12 @@ dependencies = [
"object 0.36.4",
]

[[package]]
name = "arrayref"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"

[[package]]
name = "arrayvec"
version = "0.7.6"
Expand Down Expand Up @@ -262,6 +268,19 @@ version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"

[[package]]
name = "blake3"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d08263faac5cde2a4d52b513dadb80846023aade56fcd8fc99ba73ba8050e92"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
]

[[package]]
name = "block-buffer"
version = "0.10.4"
Expand Down Expand Up @@ -722,6 +741,12 @@ dependencies = [
"windows-sys 0.52.0",
]

[[package]]
name = "constant_time_eq"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"

[[package]]
name = "core-foundation-sys"
version = "0.8.7"
Expand Down Expand Up @@ -4378,6 +4403,7 @@ dependencies = [
name = "rustc_span"
version = "0.0.0"
dependencies = [
"blake3",
"derive-where",
"indexmap",
"itoa",
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,7 @@ pub(crate) fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFi
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
rustc_span::SourceFileHashAlgorithm::Blake3 => llvm::ChecksumKind::None,
};
let hash_value = hex_encode(source_file.src_hash.hash_bytes());

Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_interface/src/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,12 +389,13 @@ pub fn run_compiler<R: Send>(config: Config, f: impl FnOnce(&Compiler) -> R + Se
let file_loader = config.file_loader.unwrap_or_else(|| Box::new(RealFileLoader));
let path_mapping = config.opts.file_path_mapping();
let hash_kind = config.opts.unstable_opts.src_hash_algorithm(&target);
let checksum_hash_kind = config.opts.unstable_opts.checksum_hash_algorithm();

util::run_in_thread_pool_with_globals(
&early_dcx,
config.opts.edition,
config.opts.unstable_opts.threads,
SourceMapInputs { file_loader, path_mapping, hash_kind },
SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind },
|current_gcx| {
// The previous `early_dcx` can't be reused here because it doesn't
// impl `Send`. Creating a new one is fine.
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_interface/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// tidy-alphabetical-start
#![feature(decl_macro)]
#![feature(file_buffered)]
#![feature(iter_intersperse)]
#![feature(let_chains)]
#![feature(try_blocks)]
#![warn(unreachable_pub)]
Expand Down
106 changes: 92 additions & 14 deletions compiler/rustc_interface/src/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ use rustc_session::cstore::Untracked;
use rustc_session::output::{collect_crate_types, filename_for_input, find_crate_name};
use rustc_session::search_paths::PathKind;
use rustc_session::{Limit, Session};
use rustc_span::FileName;
use rustc_span::symbol::{Symbol, sym};
use rustc_span::{FileName, SourceFileHash, SourceFileHashAlgorithm};
use rustc_target::spec::PanicStrategy;
use rustc_trait_selection::traits;
use tracing::{info, instrument};
Expand Down Expand Up @@ -417,15 +417,23 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
let result: io::Result<()> = try {
// Build a list of files used to compile the output and
// write Makefile-compatible dependency rules
let mut files: Vec<String> = sess
let mut files: Vec<(String, u64, Option<SourceFileHash>)> = sess
.source_map()
.files()
.iter()
.filter(|fmap| fmap.is_real_file())
.filter(|fmap| !fmap.is_imported())
.map(|fmap| escape_dep_filename(&fmap.name.prefer_local().to_string()))
.map(|fmap| {
(
escape_dep_filename(&fmap.name.prefer_local().to_string()),
fmap.source_len.0 as u64,
fmap.checksum_hash,
)
})
.collect();

let checksum_hash_algo = sess.opts.unstable_opts.checksum_hash_algorithm;

// Account for explicitly marked-to-track files
// (e.g. accessed in proc macros).
let file_depinfo = sess.psess.file_depinfo.borrow();
Expand All @@ -437,56 +445,113 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P

// The entries will be used to declare dependencies between files in a
// Makefile-like output, so the iteration order does not matter.
fn hash_iter_files<P: AsRef<Path>>(
it: impl Iterator<Item = P>,
checksum_hash_algo: Option<SourceFileHashAlgorithm>,
) -> impl Iterator<Item = (P, u64, Option<SourceFileHash>)> {
it.map(move |path| {
match checksum_hash_algo.and_then(|algo| {
fs::File::open(path.as_ref())
.and_then(|mut file| {
SourceFileHash::new(algo, &mut file).map(|h| (file, h))
})
.and_then(|(file, h)| file.metadata().map(|m| (m.len(), h)))
.map_err(|e| {
tracing::error!(
"failed to compute checksum, omitting it from dep-info {} {e}",
path.as_ref().display()
)
})
.ok()
}) {
Some((file_len, checksum)) => (path, file_len, Some(checksum)),
None => (path, 0, None),
}
})
}

#[allow(rustc::potential_query_instability)]
let extra_tracked_files =
file_depinfo.iter().map(|path_sym| normalize_path(PathBuf::from(path_sym.as_str())));
let extra_tracked_files = hash_iter_files(
file_depinfo.iter().map(|path_sym| normalize_path(PathBuf::from(path_sym.as_str()))),
checksum_hash_algo,
);
files.extend(extra_tracked_files);

// We also need to track used PGO profile files
if let Some(ref profile_instr) = sess.opts.cg.profile_use {
files.push(normalize_path(profile_instr.as_path().to_path_buf()));
files.extend(hash_iter_files(
iter::once(normalize_path(profile_instr.as_path().to_path_buf())),
checksum_hash_algo,
));
}
if let Some(ref profile_sample) = sess.opts.unstable_opts.profile_sample_use {
files.push(normalize_path(profile_sample.as_path().to_path_buf()));
files.extend(hash_iter_files(
iter::once(normalize_path(profile_sample.as_path().to_path_buf())),
checksum_hash_algo,
));
}

// Debugger visualizer files
for debugger_visualizer in tcx.debugger_visualizers(LOCAL_CRATE) {
files.push(normalize_path(debugger_visualizer.path.clone().unwrap()));
files.extend(hash_iter_files(
iter::once(normalize_path(debugger_visualizer.path.clone().unwrap())),
checksum_hash_algo,
));
}

if sess.binary_dep_depinfo() {
if let Some(ref backend) = sess.opts.unstable_opts.codegen_backend {
if backend.contains('.') {
// If the backend name contain a `.`, it is the path to an external dynamic
// library. If not, it is not a path.
files.push(backend.to_string());
files.extend(hash_iter_files(
iter::once(backend.to_string()),
checksum_hash_algo,
));
}
}

for &cnum in tcx.crates(()) {
let source = tcx.used_crate_source(cnum);
if let Some((path, _)) = &source.dylib {
files.push(escape_dep_filename(&path.display().to_string()));
files.extend(hash_iter_files(
iter::once(escape_dep_filename(&path.display().to_string())),
checksum_hash_algo,
));
}
if let Some((path, _)) = &source.rlib {
files.push(escape_dep_filename(&path.display().to_string()));
files.extend(hash_iter_files(
iter::once(escape_dep_filename(&path.display().to_string())),
checksum_hash_algo,
));
}
if let Some((path, _)) = &source.rmeta {
files.push(escape_dep_filename(&path.display().to_string()));
files.extend(hash_iter_files(
iter::once(escape_dep_filename(&path.display().to_string())),
checksum_hash_algo,
));
}
}
}

let write_deps_to_file = |file: &mut dyn Write| -> io::Result<()> {
for path in out_filenames {
writeln!(file, "{}: {}\n", path.display(), files.join(" "))?;
writeln!(
file,
"{}: {}\n",
path.display(),
files
.iter()
.map(|(path, _file_len, _checksum_hash_algo)| path.as_str())
.intersperse(" ")
.collect::<String>()
)?;
}

// Emit a fake target for each input file to the compilation. This
// prevents `make` from spitting out an error if a file is later
// deleted. For more info see #28735
for path in files {
for (path, _file_len, _checksum_hash_algo) in &files {
writeln!(file, "{path}:")?;
}

Expand All @@ -510,6 +575,19 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
}
}

// If caller requested this information, add special comments about source file checksums.
// These are not necessarily the same checksums as was used in the debug files.
if sess.opts.unstable_opts.checksum_hash_algorithm().is_some() {
files
.iter()
.filter_map(|(path, file_len, hash_algo)| {
hash_algo.map(|hash_algo| (path, file_len, hash_algo))
})
.try_for_each(|(path, file_len, checksum_hash)| {
writeln!(file, "# checksum:{checksum_hash} file_len:{file_len} {path}")
})?;
}

Ok(())
};

Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_interface/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,12 @@ where
let sysroot = filesearch::materialize_sysroot(sessopts.maybe_sysroot.clone());
let target = rustc_session::config::build_target_config(&early_dcx, &sessopts, &sysroot);
let hash_kind = sessopts.unstable_opts.src_hash_algorithm(&target);
let checksum_hash_kind = sessopts.unstable_opts.checksum_hash_algorithm();
let sm_inputs = Some(SourceMapInputs {
file_loader: Box::new(RealFileLoader) as _,
path_mapping: sessopts.file_path_mapping(),
hash_kind,
checksum_hash_kind,
});

rustc_span::create_session_globals_then(DEFAULT_EDITION, sm_inputs, || {
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1702,6 +1702,7 @@ impl<'a> CrateMetadataRef<'a> {
let rustc_span::SourceFile {
mut name,
src_hash,
checksum_hash,
start_pos: original_start_pos,
source_len,
lines,
Expand Down Expand Up @@ -1752,6 +1753,7 @@ impl<'a> CrateMetadataRef<'a> {
let local_version = sess.source_map().new_imported_source_file(
name,
src_hash,
checksum_hash,
stable_id,
source_len.to_u32(),
self.cnum,
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_query_system/src/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
// Do not hash the source as it is not encoded
src: _,
ref src_hash,
// Already includes src_hash, this is redundant
checksum_hash: _,
external_src: _,
start_pos: _,
source_len: _,
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_session/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1242,6 +1242,10 @@ impl UnstableOptions {
}
})
}

pub fn checksum_hash_algorithm(&self) -> Option<SourceFileHashAlgorithm> {
self.checksum_hash_algorithm
}
}

// The type of entry function, so users can have their own entry functions
Expand Down
19 changes: 18 additions & 1 deletion compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,9 @@ mod desc {
"one of: `legacy`, `v0` (RFC 2603), or `hashed`";
pub(crate) const parse_opt_symbol_visibility: &str =
"one of: `hidden`, `protected`, or `interposable`";
pub(crate) const parse_src_file_hash: &str = "either `md5` or `sha1`";
pub(crate) const parse_cargo_src_file_hash: &str =
"one of `blake3`, `md5`, `sha1`, or `sha256`";
pub(crate) const parse_src_file_hash: &str = "one of `md5`, `sha1`, or `sha256`";
pub(crate) const parse_relocation_model: &str =
"one of supported relocation models (`rustc --print relocation-models`)";
pub(crate) const parse_code_model: &str =
Expand Down Expand Up @@ -1288,6 +1290,19 @@ mod parse {
true
}

pub(crate) fn parse_cargo_src_file_hash(
slot: &mut Option<SourceFileHashAlgorithm>,
v: Option<&str>,
) -> bool {
match v.and_then(|s| SourceFileHashAlgorithm::from_str(s).ok()) {
Some(hash_kind) => {
*slot = Some(hash_kind);
}
_ => return false,
}
true
}

pub(crate) fn parse_target_feature(slot: &mut String, v: Option<&str>) -> bool {
match v {
Some(s) => {
Expand Down Expand Up @@ -1688,6 +1703,8 @@ options! {
"instrument control-flow architecture protection"),
check_cfg_all_expected: bool = (false, parse_bool, [UNTRACKED],
"show all expected values in check-cfg diagnostics (default: no)"),
checksum_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_cargo_src_file_hash, [TRACKED],
"hash algorithm of source files used to check freshness in cargo (`blake3` or `sha256`)"),
codegen_backend: Option<String> = (None, parse_opt_string, [TRACKED],
"the backend to use"),
combine_cgu: bool = (false, parse_bool, [TRACKED],
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ edition = "2021"

[dependencies]
# tidy-alphabetical-start
blake3 = "1.5.2"
derive-where = "1.2.7"
indexmap = { version = "2.0.0" }
itoa = "1.0"
Expand Down
Loading

0 comments on commit 44f6275

Please sign in to comment.