forked from astral-sh/ruff
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement
UnnecessaryListAllocationForFirstElement
(astral-sh#5549)
## Summary Fixes astral-sh#5503. Ready for final review as the `mkdocs` issue involving SSH keys is fixed. Note that this will only throw on a `Name` - it will be refactorable once we have a type-checker. This means that this is the only sort of input that will throw. ```python x = range(10) list(x)[0] ``` I thought it'd be confusing if we supported direct function results. Consider this example, assuming we support direct results: ```python # throws list(range(10))[0] def createRange(bound): return range(bound) # "why doesn't this throw, but a direct `range(10)` call does?" list(createRange(10))[0] ``` If it's necessary, I can go through the list of built-ins and find those which produce iterables, then add them to the throwing list. ## Test Plan Added a new fixture, then ran `cargo t`
- Loading branch information
1 parent
3562d80
commit 28fe2d3
Showing
8 changed files
with
630 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
x = range(10) | ||
|
||
# RUF015 | ||
list(x)[0] | ||
list(x)[:1] | ||
list(x)[:1:1] | ||
list(x)[:1:2] | ||
tuple(x)[0] | ||
tuple(x)[:1] | ||
tuple(x)[:1:1] | ||
tuple(x)[:1:2] | ||
list(i for i in x)[0] | ||
list(i for i in x)[:1] | ||
list(i for i in x)[:1:1] | ||
list(i for i in x)[:1:2] | ||
[i for i in x][0] | ||
[i for i in x][:1] | ||
[i for i in x][:1:1] | ||
[i for i in x][:1:2] | ||
|
||
# OK (not indexing (solely) the first element) | ||
list(x) | ||
list(x)[1] | ||
list(x)[-1] | ||
list(x)[1:] | ||
list(x)[:3:2] | ||
list(x)[::2] | ||
list(x)[::] | ||
[i for i in x] | ||
[i for i in x][1] | ||
[i for i in x][-1] | ||
[i for i in x][1:] | ||
[i for i in x][:3:2] | ||
[i for i in x][::2] | ||
[i for i in x][::] | ||
|
||
# OK (doesn't mirror the underlying list) | ||
[i + 1 for i in x][0] | ||
[i for i in x if i > 5][0] | ||
[(i, i + 1) for i in x][0] | ||
|
||
# OK (multiple generators) | ||
y = range(10) | ||
[i + j for i in x for j in y][0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
236 changes: 236 additions & 0 deletions
236
crates/ruff/src/rules/ruff/rules/unnecessary_iterable_allocation_for_first_element.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
use num_bigint::BigInt; | ||
use num_traits::{One, Zero}; | ||
use rustpython_parser::ast::{self, Comprehension, Constant, Expr}; | ||
|
||
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; | ||
use ruff_macros::{derive_message_formats, violation}; | ||
use ruff_python_semantic::SemanticModel; | ||
|
||
use crate::checkers::ast::Checker; | ||
use crate::registry::AsRule; | ||
|
||
/// ## What it does | ||
/// Checks for uses of `list(...)[0]` that can be replaced with | ||
/// `next(iter(...))`. | ||
/// | ||
/// ## Why is this bad? | ||
/// Calling `list(...)` will create a new list of the entire collection, which | ||
/// can be very expensive for large collections. If you only need the first | ||
/// element of the collection, you can use `next(iter(...))` to lazily fetch | ||
/// the first element without creating a new list. | ||
/// | ||
/// Note that migrating from `list(...)[0]` to `next(iter(...))` can change | ||
/// the behavior of your program in two ways: | ||
/// | ||
/// 1. First, `list(...)` will eagerly evaluate the entire collection, while | ||
/// `next(iter(...))` will only evaluate the first element. As such, any | ||
/// side effects that occur during iteration will be delayed. | ||
/// 2. Second, `list(...)[0]` will raise `IndexError` if the collection is | ||
/// empty, while `next(iter(...))` will raise `StopIteration`. | ||
/// | ||
/// ## Example | ||
/// ```python | ||
/// head = list(range(1000000000000))[0] | ||
/// ``` | ||
/// | ||
/// Use instead: | ||
/// ```python | ||
/// head = next(iter(range(1000000000000))) | ||
/// ``` | ||
/// | ||
/// ## References | ||
/// - [Iterators and Iterables in Python: Run Efficient Iterations](https://realpython.com/python-iterators-iterables/#when-to-use-an-iterator-in-python) | ||
#[violation] | ||
pub(crate) struct UnnecessaryIterableAllocationForFirstElement { | ||
iterable: String, | ||
subscript_kind: HeadSubscriptKind, | ||
} | ||
|
||
impl AlwaysAutofixableViolation for UnnecessaryIterableAllocationForFirstElement { | ||
#[derive_message_formats] | ||
fn message(&self) -> String { | ||
let UnnecessaryIterableAllocationForFirstElement { | ||
iterable, | ||
subscript_kind, | ||
} = self; | ||
match subscript_kind { | ||
HeadSubscriptKind::Index => { | ||
format!("Prefer `next(iter({iterable}))` over `list({iterable})[0]`") | ||
} | ||
HeadSubscriptKind::Slice => { | ||
format!("Prefer `[next(iter({iterable}))]` over `list({iterable})[:1]`") | ||
} | ||
} | ||
} | ||
|
||
fn autofix_title(&self) -> String { | ||
let UnnecessaryIterableAllocationForFirstElement { | ||
iterable, | ||
subscript_kind, | ||
} = self; | ||
match subscript_kind { | ||
HeadSubscriptKind::Index => format!("Replace with `next(iter({iterable}))`"), | ||
HeadSubscriptKind::Slice => format!("Replace with `[next(iter({iterable}))]"), | ||
} | ||
} | ||
} | ||
|
||
/// RUF015 | ||
pub(crate) fn unnecessary_iterable_allocation_for_first_element( | ||
checker: &mut Checker, | ||
subscript: &Expr, | ||
) { | ||
let Expr::Subscript(ast::ExprSubscript { | ||
value, | ||
slice, | ||
range, | ||
.. | ||
}) = subscript | ||
else { | ||
return; | ||
}; | ||
|
||
let Some(subscript_kind) = classify_subscript(slice) else { | ||
return; | ||
}; | ||
|
||
let Some(iterable) = iterable_name(value, checker.semantic()) else { | ||
return; | ||
}; | ||
|
||
let mut diagnostic = Diagnostic::new( | ||
UnnecessaryIterableAllocationForFirstElement { | ||
iterable: iterable.to_string(), | ||
subscript_kind, | ||
}, | ||
*range, | ||
); | ||
|
||
if checker.patch(diagnostic.kind.rule()) { | ||
let replacement = match subscript_kind { | ||
HeadSubscriptKind::Index => format!("next(iter({iterable}))"), | ||
HeadSubscriptKind::Slice => format!("[next(iter({iterable}))]"), | ||
}; | ||
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(replacement, *range))); | ||
} | ||
|
||
checker.diagnostics.push(diagnostic); | ||
} | ||
|
||
/// A subscript slice that represents the first element of a list. | ||
#[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||
enum HeadSubscriptKind { | ||
/// The subscript is an index (e.g., `[0]`). | ||
Index, | ||
/// The subscript is a slice (e.g., `[:1]`). | ||
Slice, | ||
} | ||
|
||
/// Check that the slice [`Expr`] is functionally equivalent to slicing into the first element. The | ||
/// first `bool` checks that the element is in fact first, the second checks if it's a slice or an | ||
/// index. | ||
fn classify_subscript(expr: &Expr) -> Option<HeadSubscriptKind> { | ||
match expr { | ||
Expr::Constant(ast::ExprConstant { | ||
value: Constant::Int(value), | ||
.. | ||
}) if value.is_zero() => Some(HeadSubscriptKind::Index), | ||
Expr::Slice(ast::ExprSlice { | ||
step, lower, upper, .. | ||
}) => { | ||
// Avoid, e.g., `list(...)[:2]` | ||
let upper = upper.as_ref()?; | ||
let upper = as_int(upper)?; | ||
if !upper.is_one() { | ||
return None; | ||
} | ||
|
||
// Avoid, e.g., `list(...)[2:]`. | ||
if let Some(lower) = lower.as_ref() { | ||
let lower = as_int(lower)?; | ||
if !lower.is_zero() { | ||
return None; | ||
} | ||
} | ||
|
||
// Avoid, e.g., `list(...)[::-1]` | ||
if let Some(step) = step.as_ref() { | ||
let step = as_int(step)?; | ||
if step < upper { | ||
return None; | ||
} | ||
} | ||
|
||
Some(HeadSubscriptKind::Slice) | ||
} | ||
_ => None, | ||
} | ||
} | ||
|
||
/// Fetch the name of the iterable from an expression if the expression returns an unmodified list | ||
/// which can be sliced into. | ||
fn iterable_name<'a>(expr: &'a Expr, model: &SemanticModel) -> Option<&'a str> { | ||
match expr { | ||
Expr::Call(ast::ExprCall { func, args, .. }) => { | ||
let ast::ExprName { id, .. } = func.as_name_expr()?; | ||
|
||
if !matches!(id.as_str(), "tuple" | "list") { | ||
return None; | ||
} | ||
|
||
if !model.is_builtin(id.as_str()) { | ||
return None; | ||
} | ||
|
||
match args.first() { | ||
Some(Expr::Name(ast::ExprName { id: arg_name, .. })) => Some(arg_name.as_str()), | ||
Some(Expr::GeneratorExp(ast::ExprGeneratorExp { | ||
elt, generators, .. | ||
})) => generator_iterable(elt, generators), | ||
_ => None, | ||
} | ||
} | ||
Expr::ListComp(ast::ExprListComp { | ||
elt, generators, .. | ||
}) => generator_iterable(elt, generators), | ||
_ => None, | ||
} | ||
} | ||
|
||
/// Given a comprehension, returns the name of the iterable over which it iterates, if it's | ||
/// a simple comprehension (e.g., `x` for `[i for i in x]`). | ||
fn generator_iterable<'a>(elt: &'a Expr, generators: &'a Vec<Comprehension>) -> Option<&'a str> { | ||
// If the `elt` field is anything other than a [`Expr::Name`], we can't be sure that it | ||
// doesn't modify the elements of the underlying iterator (e.g., `[i + 1 for i in x][0]`). | ||
if !elt.is_name_expr() { | ||
return None; | ||
} | ||
|
||
// If there's more than 1 generator, we can't safely say that it fits the diagnostic conditions | ||
// (e.g., `[(i, j) for i in x for j in y][0]`). | ||
let [generator] = generators.as_slice() else { | ||
return None; | ||
}; | ||
|
||
// Ignore if there's an `if` statement in the comprehension, since it filters the list. | ||
if !generator.ifs.is_empty() { | ||
return None; | ||
} | ||
|
||
let ast::ExprName { id, .. } = generator.iter.as_name_expr()?; | ||
Some(id.as_str()) | ||
} | ||
|
||
/// If an expression is a constant integer, returns the value of that integer; otherwise, | ||
/// returns `None`. | ||
fn as_int(expr: &Expr) -> Option<&BigInt> { | ||
if let Expr::Constant(ast::ExprConstant { | ||
value: Constant::Int(value), | ||
.. | ||
}) = expr | ||
{ | ||
Some(value) | ||
} else { | ||
None | ||
} | ||
} |
Oops, something went wrong.