diff --git a/Cargo.lock b/Cargo.lock index af6cb8c8da312..e6d8308ed7fd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2319,6 +2319,7 @@ dependencies = [ "thiserror", "toml", "typed-arena", + "unicode-normalization", "unicode-width", "unicode_names2", "url", diff --git a/crates/ruff_linter/Cargo.toml b/crates/ruff_linter/Cargo.toml index c11ba0b9eee79..b98ee74d72f57 100644 --- a/crates/ruff_linter/Cargo.toml +++ b/crates/ruff_linter/Cargo.toml @@ -69,6 +69,7 @@ toml = { workspace = true } typed-arena = { workspace = true } unicode-width = { workspace = true } unicode_names2 = { workspace = true } +unicode-normalization = { workspace = true } url = { workspace = true } [dev-dependencies] diff --git a/crates/ruff_linter/resources/test/fixtures/pyflakes/F401_30.py b/crates/ruff_linter/resources/test/fixtures/pyflakes/F401_30.py new file mode 100644 index 0000000000000..5d2e4bd119adc --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pyflakes/F401_30.py @@ -0,0 +1,6 @@ +""" +Test: ensure we're able to correctly remove unused imports +even if they have characters in them that undergo NFKC normalization +""" + +from .main import MaµToMan diff --git a/crates/ruff_linter/src/fix/codemods.rs b/crates/ruff_linter/src/fix/codemods.rs index c3c7691726967..70a928856c3b3 100644 --- a/crates/ruff_linter/src/fix/codemods.rs +++ b/crates/ruff_linter/src/fix/codemods.rs @@ -1,13 +1,16 @@ //! Interface for editing code snippets. These functions take statements or expressions as input, //! and return the modified code snippet as output. +use std::borrow::Cow; + use anyhow::{bail, Result}; use libcst_native::{ Codegen, CodegenState, Expression, ImportNames, NameOrAttribute, ParenthesizableWhitespace, SmallStatement, Statement, }; -use ruff_python_ast::name::UnqualifiedName; use smallvec::{smallvec, SmallVec}; +use unicode_normalization::UnicodeNormalization; +use ruff_python_ast::name::UnqualifiedName; use ruff_python_ast::Stmt; use ruff_python_codegen::Stylist; use ruff_source_file::Locator; @@ -167,39 +170,55 @@ pub(crate) fn retain_imports( Ok(tree.codegen_stylist(stylist)) } -fn collect_segments<'a>(expr: &'a Expression, parts: &mut SmallVec<[&'a str; 8]>) { - match expr { - Expression::Call(expr) => { - collect_segments(&expr.func, parts); - } - Expression::Attribute(expr) => { - collect_segments(&expr.value, parts); - parts.push(expr.attr.value); - } - Expression::Name(expr) => { - parts.push(expr.value); +/// Create an NFKC-normalized qualified name from a libCST node. +fn qualified_name_from_name_or_attribute(module: &NameOrAttribute) -> String { + fn collect_segments<'a>(expr: &'a Expression, parts: &mut SmallVec<[&'a str; 8]>) { + match expr { + Expression::Call(expr) => { + collect_segments(&expr.func, parts); + } + Expression::Attribute(expr) => { + collect_segments(&expr.value, parts); + parts.push(expr.attr.value); + } + Expression::Name(expr) => { + parts.push(expr.value); + } + _ => {} } - _ => {} } -} -fn unqualified_name_from_expression<'a>(expr: &'a Expression<'a>) -> Option> { - let mut segments = smallvec![]; - collect_segments(expr, &mut segments); - if segments.is_empty() { - None - } else { - Some(segments.into_iter().collect()) + /// Attempt to create an [`UnqualifiedName`] from a libCST expression. + /// + /// Strictly speaking, the `UnqualifiedName` returned by this function may be invalid, + /// since it hasn't been NFKC-normalized. In order for an `UnqualifiedName` to be + /// comparable to one constructed from a `ruff_python_ast` node, it has to undergo + /// NFKC normalization. As a local function, however, this is fine; + /// the outer function always performs NFKC normalization before returning the + /// qualified name to the caller. + fn unqualified_name_from_expression<'a>( + expr: &'a Expression<'a>, + ) -> Option> { + let mut segments = smallvec![]; + collect_segments(expr, &mut segments); + if segments.is_empty() { + None + } else { + Some(segments.into_iter().collect()) + } } -} -fn qualified_name_from_name_or_attribute(module: &NameOrAttribute) -> String { - match module { - NameOrAttribute::N(name) => name.value.to_string(), + let unnormalized = match module { + NameOrAttribute::N(name) => Cow::Borrowed(name.value), NameOrAttribute::A(attr) => { let name = attr.attr.value; let prefix = unqualified_name_from_expression(&attr.value); - prefix.map_or_else(|| name.to_string(), |prefix| format!("{prefix}.{name}")) + prefix.map_or_else( + || Cow::Borrowed(name), + |prefix| Cow::Owned(format!("{prefix}.{name}")), + ) } - } + }; + + unnormalized.nfkc().collect() } diff --git a/crates/ruff_linter/src/rules/pyflakes/mod.rs b/crates/ruff_linter/src/rules/pyflakes/mod.rs index c4c0f25d1e47f..a0b048aaad8a0 100644 --- a/crates/ruff_linter/src/rules/pyflakes/mod.rs +++ b/crates/ruff_linter/src/rules/pyflakes/mod.rs @@ -258,6 +258,7 @@ mod tests { #[test_case(Rule::UnusedImport, Path::new("F401_27__all_mistyped/__init__.py"))] #[test_case(Rule::UnusedImport, Path::new("F401_28__all_multiple/__init__.py"))] #[test_case(Rule::UnusedImport, Path::new("F401_29__all_conditional/__init__.py"))] + #[test_case(Rule::UnusedImport, Path::new("F401_30.py"))] fn f401_deprecated_option(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!( "{}_deprecated_option_{}", diff --git a/crates/ruff_linter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F401_deprecated_option_F401_30.py.snap b/crates/ruff_linter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F401_deprecated_option_F401_30.py.snap new file mode 100644 index 0000000000000..7b42b5f341833 --- /dev/null +++ b/crates/ruff_linter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F401_deprecated_option_F401_30.py.snap @@ -0,0 +1,17 @@ +--- +source: crates/ruff_linter/src/rules/pyflakes/mod.rs +--- +F401_30.py:6:19: F401 [*] `.main.MaμToMan` imported but unused + | +4 | """ +5 | +6 | from .main import MaµToMan + | ^^^^^^^^ F401 + | + = help: Remove unused import: `.main.MaμToMan` + +ℹ Safe fix +3 3 | even if they have characters in them that undergo NFKC normalization +4 4 | """ +5 5 | +6 |-from .main import MaµToMan