diff --git a/Cargo.lock b/Cargo.lock index 421ae66f65734b..2cac06d4727d81 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2388,22 +2388,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "ruff_shrinking" -version = "0.4.2" -dependencies = [ - "anyhow", - "clap", - "fs-err", - "regex", - "ruff_python_ast", - "ruff_python_parser", - "ruff_text_size", - "shlex", - "tracing", - "tracing-subscriber", -] - [[package]] name = "ruff_source_file" version = "0.0.0" @@ -2729,12 +2713,6 @@ dependencies = [ "dirs 5.0.1", ] -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - [[package]] name = "similar" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 77b49be430f939..b1540ec438132f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,7 +91,6 @@ serde_json = { version = "1.0.113" } serde_test = { version = "1.0.152" } serde_with = { version = "3.6.0", default-features = false, features = ["macros"] } shellexpand = { version = "3.0.0" } -shlex = { version = "1.3.0" } similar = { version = "2.4.0", features = ["inline"] } smallvec = { version = "1.13.2" } static_assertions = "1.1.0" diff --git a/crates/ruff_shrinking/Cargo.toml b/crates/ruff_shrinking/Cargo.toml deleted file mode 100644 index 087fdcff294132..00000000000000 --- a/crates/ruff_shrinking/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "ruff_shrinking" -version = "0.4.2" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -anyhow = { workspace = true } -clap = { workspace = true } -fs-err = { workspace = true } -regex = { workspace = true } -ruff_python_ast = { path = "../ruff_python_ast" } -ruff_python_parser = { path = "../ruff_python_parser" } -ruff_text_size = { path = "../ruff_text_size" } -shlex = { workspace = true } -tracing = { workspace = true } -tracing-subscriber = { workspace = true, features = ["env-filter"] } - -[lints] -workspace = true diff --git a/crates/ruff_shrinking/src/main.rs b/crates/ruff_shrinking/src/main.rs deleted file mode 100644 index 4e7b173088c91b..00000000000000 --- a/crates/ruff_shrinking/src/main.rs +++ /dev/null @@ -1,491 +0,0 @@ -//! Minimize a failing python file, a process known as [shrinking](https://www.educative.io/answers/what-is-shrinking) -//! -//! See `--help` (or the [Args] docs) for usage. -//! -//! ## Example -//! -//! Shrink a file with unstable formatting -//! ```shell -//! cargo run --bin ruff-minimizer -- target/cpython/Lib/test/inspect_fodder.py target/minirepo/a.py "Unstable formatting" "target/debug/ruff_dev format-dev --stability-check target/minirepo" -//! ``` -//! This could emit -//! ```python -//! class WhichComments: # before f return 1 # end f # after f # before asyncf - line 108 -//! async def asyncf(self): return 2 -//! # end asyncf # end of WhichComments - line 114# a closing parenthesis with the opening paren being in another line -//! ( -//! ); -//! ``` -//! -//! Shrink a file with a syntax error -//! ```shell -//! cargo run --bin ruff_shrinking -- target/checkouts/jhnnsrs:mikro-napari/mikro_napari/models/representation.py target/minirepo/code.py "invalid syntax" "target/debug/ruff_dev format-dev --stability-check target/minirepo" -//! ``` -//! This could emit -//! ```python -//! class RepresentationQtModel(): -//! data[:,] = rep.data -//! ``` - -#![allow(clippy::print_stdout, clippy::print_stderr)] - -use anyhow::{Context, Result}; -use clap::Parser; -use fs_err as fs; -use regex::Regex; -use ruff_python_ast::statement_visitor::{walk_body, walk_stmt, StatementVisitor}; -use ruff_python_ast::visitor::{walk_expr, Visitor}; -use ruff_python_ast::{Expr, Stmt, Suite}; -use ruff_python_parser::Mode; -use ruff_text_size::{Ranged, TextRange}; -use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use std::process::{Command, ExitCode}; -use std::str; -use std::time::Instant; -use tracing::debug; - -const STRATEGIES: &[&dyn Strategy] = &[ - (&StrategyRemoveModuleMember), - (&StrategyRemoveStatement), - (&StrategyRemoveExpression), - (&StrategyRemoveLine), - (&StrategyRemoveNewline), - (&StrategyRemoveToken), - (&StrategyRemoveChar), -]; - -/// Each strategy is a way of producing possible minimizations -trait Strategy { - fn name(&self) -> &'static str; - - fn candidates<'a>( - &self, - input: &'a str, - ast: &'a Suite, - ) -> Result>; -} - -/// vtable surrogate trait that rust wants -trait ExactSizeStringIter: Iterator + ExactSizeIterator {} - -impl ExactSizeStringIter for T where T: Iterator + ExactSizeIterator {} - -/// Remove a top level member from a module. Generally the most effective strategy since most -/// top level items will be unrelated to the error. -struct StrategyRemoveModuleMember; - -impl Strategy for StrategyRemoveModuleMember { - fn name(&self) -> &'static str { - "remove module member" - } - - fn candidates<'a>( - &self, - input: &'a str, - ast: &'a Suite, - ) -> Result> { - let iter = ast.iter().map(|stmt| { - // trim the newlines the range misses - input[..stmt.start().to_usize()].trim_end().to_string() - + input[stmt.end().to_usize()..].trim_start() - }); - Ok(Box::new(iter)) - } -} - -/// Finds the ranges of all statements. -#[derive(Default)] -struct StatementCollector { - /// The ranges of all statements - ranges: Vec, -} - -impl StatementVisitor<'_> for StatementCollector { - fn visit_body(&mut self, body: &[Stmt]) { - if let (Some(first), Some(last)) = (body.first(), body.last()) { - if !(first == last && matches!(first, Stmt::Pass(_))) { - self.ranges.push(TextRange::new(first.start(), last.end())); - } - } - walk_body(self, body); - } - - fn visit_stmt(&mut self, stmt: &Stmt) { - if !matches!(stmt, Stmt::Pass(_)) { - self.ranges.push(stmt.range()); - } - walk_stmt(self, stmt); - } -} - -/// Try to remove each statement or replace it statement with `pass` -fn strategy_statement<'a>( - input: &'a str, - ast: &'a Suite, - pass_dummy: bool, -) -> Box { - let mut visitor = StatementCollector::default(); - visitor.visit_body(ast); - - // Remove the largest statements first - let mut ranges = visitor.ranges; - ranges.sort_by_key(|range| range.len()); - ranges.reverse(); - - let iter = ranges.into_iter().map(move |range| { - let mut without_stmt = String::new(); - // trim the newlines the range misses - without_stmt.push_str(input[..range.start().to_usize()].trim_end()); - if pass_dummy { - without_stmt.push_str("pass"); - } - without_stmt.push_str(&input[range.end().to_usize()..]); - without_stmt - }); - Box::new(iter) -} - -struct StrategyRemoveStatement; - -impl Strategy for StrategyRemoveStatement { - fn name(&self) -> &'static str { - "remove statement" - } - fn candidates<'a>( - &self, - input: &'a str, - ast: &'a Suite, - ) -> Result> { - Ok(strategy_statement(input, ast, false)) - } -} - -/// Finds the ranges of all expressions. -#[derive(Default)] -struct ExpressionCollector { - /// The ranges of all expressions - ranges: Vec, -} - -impl Visitor<'_> for ExpressionCollector { - fn visit_expr(&mut self, expr: &Expr) { - self.ranges.push(expr.range()); - walk_expr(self, expr); - } -} - -struct StrategyRemoveExpression; - -impl Strategy for StrategyRemoveExpression { - fn name(&self) -> &'static str { - "remove expression" - } - - /// Try to remove each expression - fn candidates<'a>( - &self, - input: &'a str, - ast: &'a Suite, - ) -> Result> { - let mut visitor = ExpressionCollector::default(); - visitor.visit_body(ast); - let iter = visitor.ranges.into_iter().map(move |range| { - input[TextRange::up_to(range.start())].to_string() + &input[range.end().to_usize()..] - }); - Ok(Box::new(iter)) - } -} - -/// Remove each line (physical lines, not logical lines). -struct StrategyRemoveLine; - -impl Strategy for StrategyRemoveLine { - fn name(&self) -> &'static str { - "remove line" - } - - /// Returns the number of permutations and each permutation - fn candidates<'a>( - &self, - input: &'a str, - _ast: &'a Suite, - ) -> Result> { - let lines: Vec<_> = input.lines().collect(); - let iter = (0..lines.len()).map(move |removed_line| { - let mut result = String::new(); - result.push_str(&lines[..removed_line].join("\n")); - if removed_line > 0 { - result.push('\n'); - } - result.push_str(&lines[removed_line + 1..].join("\n")); - result - }); - Ok(Box::new(iter)) - } -} - -/// Try removing newline characters -struct StrategyRemoveNewline; - -impl Strategy for StrategyRemoveNewline { - fn name(&self) -> &'static str { - "remove newline" - } - - /// Returns the number of permutations and each permutation - fn candidates<'a>( - &self, - input: &'a str, - _ast: &'a Suite, - ) -> Result> { - let newline_positions: Vec<_> = input - .char_indices() - .filter_map(|(pos, char)| { - // Don't remove newlines after `:`. Indexing is save because pos > 0 is checked and - // pos - 1 is behind a position we know exists and we're indexing into bytes instead - // of chars - if char == '\n' && pos > 0 && input.as_bytes()[pos - 1] != b':' { - Some(pos) - } else { - None - } - }) - .collect(); - let iter = newline_positions.into_iter().map(move |newline_position| { - // trim to remove the indentation - input[..newline_position].to_string() - + input[newline_position + '\n'.len_utf8()..].trim_start() - }); - Ok(Box::new(iter)) - } -} - -/// Try removing each python token. This is really slow and runs at the end -struct StrategyRemoveToken; - -impl Strategy for StrategyRemoveToken { - fn name(&self) -> &'static str { - "remove token" - } - - fn candidates<'a>( - &self, - input: &'a str, - _ast: &'a Suite, - ) -> Result> { - let token_ranges: Vec<_> = ruff_python_parser::tokenize(input, Mode::Module) - .into_iter() - // At this point we know we have valid python code - .map(Result::unwrap) - .filter(|token| token.1.len().to_usize() > 0) - .map(|token| token.1) - .collect(); - - let iter = token_ranges.into_iter().map(move |range| { - input[..range.start().to_usize()].to_string() + &input[range.end().to_usize()..] - }); - Ok(Box::new(iter)) - } -} - -/// Try removing each individual character in the file. This is really slow and runs at the end -struct StrategyRemoveChar; - -impl Strategy for StrategyRemoveChar { - fn name(&self) -> &'static str { - "remove character" - } - - fn candidates<'a>( - &self, - input: &'a str, - _ast: &'a Suite, - ) -> Result> { - let char_indices: Vec<_> = input.char_indices().collect(); - let iter = char_indices - .into_iter() - .map(move |(pos, char)| input[..pos].to_string() + &input[pos + char.len_utf8()..]); - Ok(Box::new(iter)) - } -} - -/// Returns strategy, posing in the iteration (so they can be skipped in the next attempt) and -/// minimized code. -fn minimization_step( - input: &str, - location: &Path, - command_args: &[String], - pattern: &Regex, - last_strategy_and_idx: Option<(&'static dyn Strategy, usize)>, -) -> Result> { - let tokens = ruff_python_parser::tokenize(input, Mode::Module); - let ast = ruff_python_parser::parse_program_tokens(tokens, input, false) - .context("not valid python")?; - - // Try the last succeeding strategy first, skipping all that failed last time - if let Some((last_strategy, last_idx)) = last_strategy_and_idx { - let iter = last_strategy.candidates(input, &ast)?; - println!( - "Trying {} ({last_idx} skipped) {} candidates", - iter.len() - last_idx, - last_strategy.name() - ); - for (idx, entry) in iter.enumerate().skip(last_idx) { - if is_failing(&entry, location, command_args, pattern)? { - // This one is still failing in the right way - return Ok(Some((last_strategy, idx, entry))); - } - } - } - - // Try all strategies in order, including the last successful one without skipping inputs - for strategy in STRATEGIES { - let iter = strategy.candidates(input, &ast)?; - println!("Trying {} {} candidates", iter.len(), strategy.name()); - for (idx, entry) in iter.enumerate() { - if is_failing(&entry, location, command_args, pattern)? { - // This one is still failing in the right way - return Ok(Some((*strategy, idx, entry))); - } - } - } - - // None of the minimizations worked - Ok(None) -} - -/// Does the candidate still produce the expected error? -fn is_failing( - input: &str, - location: &Path, - command_args: &[String], - pattern: &Regex, -) -> Result { - fs::write(location, input).context("Invalid location")?; - - let output = Command::new(&command_args[0]) - .args(&command_args[1..]) - .output() - .context("Failed to launch command")?; - - let stdout = str::from_utf8(&output.stdout).context("stdout was not utf8")?; - let stderr = str::from_utf8(&output.stderr).context("stderr was not utf8")?; - if pattern.is_match(stdout) { - debug!("stdout matches"); - Ok(true) - } else if pattern.is_match(stderr) { - debug!("stderr matches"); - Ok(true) - } else { - Ok(false) - } -} - -/// You specify an input file that fails. The minimizer will write minimization candidates to the -/// the file given as second argument. It will run the command and if the output still matches the -/// error pattern, the candidate will be considered a successful minimization step, otherwise it's -/// rolled back. -/// -/// ## Example -/// ```shell -/// cargo run --bin ruff_shrinking -- target/checkouts/jhnnsrs:mikro-napari/mikro_napari/models/representation.py target/minirepo/code.py "invalid syntax" "target/debug/ruff_dev format-dev --stability-check target/minirepo" -/// ``` -/// This could emit (if it wasn't fixed): -/// ```python -/// class RepresentationQtModel(): -/// data[:,] = rep.data -/// ``` -#[derive(Parser)] -struct Args { - /// The input file that fails - input_file: PathBuf, - /// The minimization attempt is written to this location - output_file: PathBuf, - /// Continue this path of the minimization if either stderr or stdout match this regex - error_pattern: String, - /// The command to run to test if the smaller version still emits the same error - /// - /// TODO(konstin): Move this to some form of trailing args so we don't need shlex - command: String, -} - -fn run() -> Result<()> { - // e.g. `RUST_LOG=ruff_shrinking=debug` - tracing_subscriber::fmt::init(); - - let args: Args = Args::parse(); - let pattern = Regex::new(&args.error_pattern).context("Invalid error_pattern")?; - let command_args = shlex::split(&args.command).context("Couldn't split command input")?; - - let loop_start = Instant::now(); - let mut stats = HashMap::new(); - - // Normalize line endings for the remove newline dependent rules - let mut input = fs::read_to_string(args.input_file)?.replace('\r', ""); - - // This can happen e.g. when main changed between collecting the errors list and running this - // script - if !is_failing(&input, &args.output_file, &command_args, &pattern)? { - println!("Input doesn't match"); - fs::write(&args.output_file, "")?; - return Ok(()); - } - - let mut num_iterations = 0; - let mut last_strategy_and_idx = None; - loop { - let start = Instant::now(); - num_iterations += 1; - let smaller_failure = minimization_step( - &input, - &args.output_file, - &command_args, - &pattern, - last_strategy_and_idx, - )?; - let duration = start.elapsed(); - if let Some((strategy, idx, smaller_failure)) = smaller_failure { - println!( - "Match found with {} {idx} in {:.2}s, {} bytes remaining", - strategy.name(), - duration.as_secs_f32(), - smaller_failure.len() - ); - *stats.entry(strategy.name()).or_insert(0) += 1; - input = smaller_failure; - last_strategy_and_idx = Some((strategy, idx)); - } else { - // The last minimization failed, write back the original content - fs::write(&args.output_file, input.as_bytes())?; - println!( - "Last iteration in {:.2}s, {} bytes remaining", - duration.as_secs_f32(), - input.as_bytes().len() - ); - break; - } - } - - println!("Strategies taken: {stats:?}"); - println!( - "Done with {num_iterations} iterations in {:.2}s. Find your minimized example in {}:\n---\n{}\n---\n", - loop_start.elapsed().as_secs_f32(), - args.output_file.display(), - input - ); - - Ok(()) -} - -fn main() -> ExitCode { - if let Err(e) = run() { - eprintln!("💥 Minimizer failed"); - for cause in e.chain() { - eprintln!(" Cause: {cause}"); - } - ExitCode::FAILURE - } else { - ExitCode::SUCCESS - } -}