From 65cfb497e85a833a573d77237b306270861e5677 Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Wed, 14 Jun 2023 16:21:14 +0200 Subject: [PATCH] Codegen/IDL 1: add more build tools (#2362) Adds a bunch of build tools to help with dependency hashing when managing implicit build graphs via `build.rs`. Prerequisites for `re_types_builder` & `re_types`. --- Codegen/IDL PR series: - #2362 - #2363 - #2369 - #2370 - #2374 - #2375 - #2410 - #2432 --- .github/workflows/labels.yml | 2 +- .gitignore | 2 +- Cargo.lock | 3 + crates/re_build_tools/Cargo.toml | 3 + crates/re_build_tools/src/hashing.rs | 181 ++++++++++++++++++ crates/re_build_tools/src/lib.rs | 9 +- crates/re_build_tools/src/rebuild_detector.rs | 2 +- 7 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 crates/re_build_tools/src/hashing.rs diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index aed2272b2b4b..2c039b36df3a 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -29,4 +29,4 @@ jobs: with: mode: minimum count: 1 - labels: "πŸ“Š analytics, πŸͺ³ bug, πŸ§‘β€πŸ’» dev experience, dependencies, πŸ“– documentation, πŸ’¬ discussion, examples, πŸ“‰ performance, 🐍 python API, ⛃ re_datastore, πŸ“Ί re_viewer, πŸ”Ί re_renderer, 🚜 refactor, β›΄ release, πŸ¦€ rust SDK, πŸ”¨ testing, ui, πŸ•ΈοΈ web" + labels: "πŸ“Š analytics, πŸͺ³ bug, codegen/idl, πŸ§‘β€πŸ’» dev experience, dependencies, πŸ“– documentation, πŸ’¬ discussion, examples, πŸ“‰ performance, 🐍 python API, ⛃ re_datastore, πŸ“Ί re_viewer, πŸ”Ί re_renderer, 🚜 refactor, β›΄ release, πŸ¦€ rust SDK, πŸ”¨ testing, ui, πŸ•ΈοΈ web" diff --git a/.gitignore b/.gitignore index 9b321b2cd062..47d369e1ebff 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ **/target_wasm # Python virtual environment: -/venv +**/venv # Python build artifacts: __pycache__ diff --git a/Cargo.lock b/Cargo.lock index 537281eabb89..f368a1fec87e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3917,7 +3917,10 @@ dependencies = [ "anyhow", "cargo_metadata", "glob", + "sha2", "time", + "unindent", + "walkdir", ] [[package]] diff --git a/crates/re_build_tools/Cargo.toml b/crates/re_build_tools/Cargo.toml index bf439458fb7f..73211452eeb1 100644 --- a/crates/re_build_tools/Cargo.toml +++ b/crates/re_build_tools/Cargo.toml @@ -20,4 +20,7 @@ all-features = true anyhow.workspace = true cargo_metadata = "0.15" glob = "0.3" +sha2 = "0.10" time = { workspace = true, features = ["formatting"] } +unindent = "0.1" +walkdir = "2.0" diff --git a/crates/re_build_tools/src/hashing.rs b/crates/re_build_tools/src/hashing.rs new file mode 100644 index 000000000000..f734651b8fa4 --- /dev/null +++ b/crates/re_build_tools/src/hashing.rs @@ -0,0 +1,181 @@ +use std::fmt::Write; +use std::path::{Path, PathBuf}; +use std::{fs, io}; + +use anyhow::Context as _; +use sha2::{Digest, Sha256}; + +use crate::{rerun_if_changed, rerun_if_changed_or_doesnt_exist}; + +// --- + +fn encode_hex(bytes: &[u8]) -> String { + let mut s = String::with_capacity(bytes.len() * 2); + for &b in bytes { + write!(&mut s, "{b:02x}").unwrap(); + } + s +} + +/// Walks the directory at `path` in filename order. +/// +/// If `extensions` is specified, only files with the right extensions will be iterated. +/// Specified extensions should _not_ include the leading dot, e.g. `fbs` rather than `.fbs`. +pub fn iter_dir<'a>( + path: impl AsRef, + extensions: Option<&'a [&'a str]>, +) -> impl Iterator + 'a { + fn filter(entry: &walkdir::DirEntry, extensions: Option<&[&str]>) -> bool { + let is_dir = entry.file_type().is_dir(); + let is_interesting = extensions.map_or(true, |extensions| { + extensions.iter().any(|ext| { + entry + .path() + .extension() + .map_or(false, |ext2| *ext == ext2.to_string_lossy()) + }) + }); + is_dir || is_interesting + } + + let path = path.as_ref(); + walkdir::WalkDir::new(path) + .sort_by_file_name() + .into_iter() + .filter_entry(move |entry| filter(entry, extensions)) + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.file_type().is_file().then(|| entry.into_path())) +} + +/// Given a file path, computes the sha256 hash of its contents and returns an hexadecimal string +/// for it. +/// +/// This will automatically emit a `rerun-if-changed` clause for the specified file. +/// +/// Panics if the file doesn't exist. +pub fn compute_file_hash(path: impl AsRef) -> String { + let mut hasher = Sha256::new(); + + let path = path.as_ref(); + let mut file = fs::File::open(path) + .with_context(|| format!("couldn't open {path:?}")) + .unwrap(); + io::copy(&mut file, &mut hasher) + .with_context(|| format!("couldn't copy from {path:?}")) + .unwrap(); + + rerun_if_changed(path); + + encode_hex(hasher.finalize().as_slice()) +} + +/// Given a directory path, computes the sha256 hash of the accumulated contents of all of its +/// files (ordered by filename), and returns an hexadecimal string for it. +/// +/// This includes files in sub-directories (i.e. it's recursive). +/// +/// This will automatically emit a `rerun-if-changed` clause for all the files that were hashed. +/// +/// If `extensions` is specified, only files with the right extensions will be iterated. +/// Specified extensions should _not_ include the leading dot, e.g. `fbs` rather than `.fbs`. +pub fn compute_dir_hash<'a>(path: impl AsRef, extensions: Option<&'a [&'a str]>) -> String { + let mut hasher = Sha256::new(); + + let path = path.as_ref(); + for filepath in iter_dir(path, extensions) { + let mut file = fs::File::open(&filepath) + .with_context(|| format!("couldn't open {filepath:?}")) + .unwrap(); + io::copy(&mut file, &mut hasher) + .with_context(|| format!("couldn't copy from {filepath:?}")) + .unwrap(); + + rerun_if_changed(path); + } + + encode_hex(hasher.finalize().as_slice()) +} + +/// Given a crate name, computes the sha256 hash of its source code (ordered by filename) and +/// returns an hexadecimal string for it. +/// +/// This includes the source code of all its direct and indirect dependencies. +/// +/// This will automatically emit a `rerun-if-changed` clause for all the files that were hashed. +pub fn compute_crate_hash(pkg_name: impl AsRef) -> String { + use cargo_metadata::{CargoOpt, MetadataCommand}; + let metadata = MetadataCommand::new() + .features(CargoOpt::AllFeatures) + .exec() + .unwrap(); + + let pkg_name = pkg_name.as_ref(); + let mut files = Default::default(); + + let pkgs = crate::Packages::from_metadata(&metadata); + pkgs.track_implicit_dep(pkg_name, &mut files); + + let mut files = files.into_iter().collect::>(); + files.sort(); + + let hashes = files.into_iter().map(compute_file_hash).collect::>(); + let hashes = hashes.iter().map(|s| s.as_str()).collect::>(); + + compute_strings_hash(&hashes) +} + +/// Given a bunch of strings, computes the sha256 hash of their contents (in the order they +/// were passed in) and returns an hexadecimal string for it. +pub fn compute_strings_hash(strs: &[&str]) -> String { + let mut hasher = Sha256::new(); + + for s in strs { + hasher.update(s); + } + + encode_hex(hasher.finalize().as_slice()) +} + +/// Writes the given `hash` at the specified `path`. +/// +/// `hash` should have been computed using of the methods in this module: [`compute_file_hash`], +/// [`compute_dir_hash`], [`compute_crate_hash`]. +/// +/// Panics on I/O errors. +/// +/// Use [`read_versioning_hash`] to read it back. +pub fn write_versioning_hash(path: impl AsRef, hash: impl AsRef) { + let path = path.as_ref(); + let hash = hash.as_ref(); + + let contents = unindent::unindent(&format!( + " + # This is a sha256 hash for all direct and indirect dependencies of this crate's build script. + # It can be safely removed at anytime to force the build script to run again. + # Check out build.rs to see how it's computed. + {hash} + " + )); + std::fs::write(path, contents.trim()) + .with_context(|| format!("couldn't write to {path:?}")) + .unwrap(); +} + +/// Reads back a versioning hash that was written with [`write_versioning_hash`]. +/// +/// This will automatically emit a `rerun-if-changed` clause for the specified filepath. +/// +/// Returns `None` on error. +pub fn read_versioning_hash(path: impl AsRef) -> Option { + let path = path.as_ref(); + + // NOTE: It's important we trigger if the file doesn't exist, as this means the user explicitly + // deleted the versioning file, i.e. they're trying to force a rebuild. + rerun_if_changed_or_doesnt_exist(path); + + std::fs::read_to_string(path).ok().and_then(|contents| { + contents + .lines() + .find_map(|line| (!line.trim().starts_with('#')).then(|| line.trim().to_owned())) + }) +} diff --git a/crates/re_build_tools/src/lib.rs b/crates/re_build_tools/src/lib.rs index 487ea8ae9adb..9f36c8f4e80a 100644 --- a/crates/re_build_tools/src/lib.rs +++ b/crates/re_build_tools/src/lib.rs @@ -8,9 +8,16 @@ use anyhow::Context as _; use std::process::Command; +mod hashing; mod rebuild_detector; -pub use rebuild_detector::{ +pub(crate) use self::rebuild_detector::Packages; + +pub use self::hashing::{ + compute_crate_hash, compute_dir_hash, compute_file_hash, compute_strings_hash, iter_dir, + read_versioning_hash, write_versioning_hash, +}; +pub use self::rebuild_detector::{ get_and_track_env_var, is_tracked_env_var_set, rebuild_if_crate_changed, rerun_if_changed, rerun_if_changed_glob, rerun_if_changed_or_doesnt_exist, write_file_if_necessary, }; diff --git a/crates/re_build_tools/src/rebuild_detector.rs b/crates/re_build_tools/src/rebuild_detector.rs index 102b09ced200..caa9605a34c9 100644 --- a/crates/re_build_tools/src/rebuild_detector.rs +++ b/crates/re_build_tools/src/rebuild_detector.rs @@ -105,7 +105,7 @@ pub fn write_file_if_necessary( // --- -struct Packages<'a> { +pub struct Packages<'a> { pkgs: HashMap<&'a str, &'a Package>, }