Skip to content

Commit

Permalink
Implement UnnecessaryListAllocationForFirstElement (astral-sh#5549)
Browse files Browse the repository at this point in the history
## Summary

Fixes astral-sh#5503. Ready for final review as the `mkdocs` issue involving SSH
keys is fixed.

Note that this will only throw on a `Name` - it will be refactorable
once we have a type-checker. This means that this is the only sort of
input that will throw.
```python
x = range(10)
list(x)[0]
```

I thought it'd be confusing if we supported direct function results.
Consider this example, assuming we support direct results:
```python
# throws
list(range(10))[0]

def createRange(bound):
    return range(bound)

# "why doesn't this throw, but a direct `range(10)` call does?"
list(createRange(10))[0]
```
If it's necessary, I can go through the list of built-ins and find those
which produce iterables, then add them to the throwing list.

## Test Plan

Added a new fixture, then ran `cargo t`
  • Loading branch information
evanrittenhouse authored Jul 10, 2023
1 parent 3562d80 commit 28fe2d3
Show file tree
Hide file tree
Showing 8 changed files with 630 additions and 1 deletion.
44 changes: 44 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/RUF015.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
x = range(10)

# RUF015
list(x)[0]
list(x)[:1]
list(x)[:1:1]
list(x)[:1:2]
tuple(x)[0]
tuple(x)[:1]
tuple(x)[:1:1]
tuple(x)[:1:2]
list(i for i in x)[0]
list(i for i in x)[:1]
list(i for i in x)[:1:1]
list(i for i in x)[:1:2]
[i for i in x][0]
[i for i in x][:1]
[i for i in x][:1:1]
[i for i in x][:1:2]

# OK (not indexing (solely) the first element)
list(x)
list(x)[1]
list(x)[-1]
list(x)[1:]
list(x)[:3:2]
list(x)[::2]
list(x)[::]
[i for i in x]
[i for i in x][1]
[i for i in x][-1]
[i for i in x][1:]
[i for i in x][:3:2]
[i for i in x][::2]
[i for i in x][::]

# OK (doesn't mirror the underlying list)
[i + 1 for i in x][0]
[i for i in x if i > 5][0]
[(i, i + 1) for i in x][0]

# OK (multiple generators)
y = range(10)
[i + j for i in x for j in y][0]
5 changes: 4 additions & 1 deletion crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2142,7 +2142,7 @@ where

// Pre-visit.
match expr {
Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
subscript @ Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
// Ex) Optional[...], Union[...]
if self.any_enabled(&[
Rule::FutureRewritableTypeAnnotation,
Expand Down Expand Up @@ -2225,6 +2225,9 @@ where
if self.enabled(Rule::UncapitalizedEnvironmentVariables) {
flake8_simplify::rules::use_capital_environment_variables(self, expr);
}
if self.enabled(Rule::UnnecessaryIterableAllocationForFirstElement) {
ruff::rules::unnecessary_iterable_allocation_for_first_element(self, subscript);
}

pandas_vet::rules::subscript(self, value, expr);
}
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "013") => (RuleGroup::Unspecified, rules::ruff::rules::ImplicitOptional),
#[cfg(feature = "unreachable-code")]
(Ruff, "014") => (RuleGroup::Nursery, rules::ruff::rules::UnreachableCode),
(Ruff, "015") => (RuleGroup::Unspecified, rules::ruff::rules::UnnecessaryIterableAllocationForFirstElement),
(Ruff, "100") => (RuleGroup::Unspecified, rules::ruff::rules::UnusedNOQA),
(Ruff, "200") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidPyprojectToml),

Expand Down
4 changes: 4 additions & 0 deletions crates/ruff/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ mod tests {
#[test_case(Rule::MutableDataclassDefault, Path::new("RUF008.py"))]
#[test_case(Rule::PairwiseOverZipped, Path::new("RUF007.py"))]
#[test_case(Rule::StaticKeyDictComprehension, Path::new("RUF011.py"))]
#[test_case(
Rule::UnnecessaryIterableAllocationForFirstElement,
Path::new("RUF015.py")
)]
#[cfg_attr(
feature = "unreachable-code",
test_case(Rule::UnreachableCode, Path::new("RUF014.py"))
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub(crate) use mutable_class_default::*;
pub(crate) use mutable_dataclass_default::*;
pub(crate) use pairwise_over_zipped::*;
pub(crate) use static_key_dict_comprehension::*;
pub(crate) use unnecessary_iterable_allocation_for_first_element::*;
#[cfg(feature = "unreachable-code")]
pub(crate) use unreachable::*;
pub(crate) use unused_noqa::*;
Expand All @@ -26,6 +27,7 @@ mod mutable_class_default;
mod mutable_dataclass_default;
mod pairwise_over_zipped;
mod static_key_dict_comprehension;
mod unnecessary_iterable_allocation_for_first_element;
#[cfg(feature = "unreachable-code")]
pub(crate) mod unreachable;
mod unused_noqa;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
use num_bigint::BigInt;
use num_traits::{One, Zero};
use rustpython_parser::ast::{self, Comprehension, Constant, Expr};

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_semantic::SemanticModel;

use crate::checkers::ast::Checker;
use crate::registry::AsRule;

/// ## What it does
/// Checks for uses of `list(...)[0]` that can be replaced with
/// `next(iter(...))`.
///
/// ## Why is this bad?
/// Calling `list(...)` will create a new list of the entire collection, which
/// can be very expensive for large collections. If you only need the first
/// element of the collection, you can use `next(iter(...))` to lazily fetch
/// the first element without creating a new list.
///
/// Note that migrating from `list(...)[0]` to `next(iter(...))` can change
/// the behavior of your program in two ways:
///
/// 1. First, `list(...)` will eagerly evaluate the entire collection, while
/// `next(iter(...))` will only evaluate the first element. As such, any
/// side effects that occur during iteration will be delayed.
/// 2. Second, `list(...)[0]` will raise `IndexError` if the collection is
/// empty, while `next(iter(...))` will raise `StopIteration`.
///
/// ## Example
/// ```python
/// head = list(range(1000000000000))[0]
/// ```
///
/// Use instead:
/// ```python
/// head = next(iter(range(1000000000000)))
/// ```
///
/// ## References
/// - [Iterators and Iterables in Python: Run Efficient Iterations](https://realpython.com/python-iterators-iterables/#when-to-use-an-iterator-in-python)
#[violation]
pub(crate) struct UnnecessaryIterableAllocationForFirstElement {
iterable: String,
subscript_kind: HeadSubscriptKind,
}

impl AlwaysAutofixableViolation for UnnecessaryIterableAllocationForFirstElement {
#[derive_message_formats]
fn message(&self) -> String {
let UnnecessaryIterableAllocationForFirstElement {
iterable,
subscript_kind,
} = self;
match subscript_kind {
HeadSubscriptKind::Index => {
format!("Prefer `next(iter({iterable}))` over `list({iterable})[0]`")
}
HeadSubscriptKind::Slice => {
format!("Prefer `[next(iter({iterable}))]` over `list({iterable})[:1]`")
}
}
}

fn autofix_title(&self) -> String {
let UnnecessaryIterableAllocationForFirstElement {
iterable,
subscript_kind,
} = self;
match subscript_kind {
HeadSubscriptKind::Index => format!("Replace with `next(iter({iterable}))`"),
HeadSubscriptKind::Slice => format!("Replace with `[next(iter({iterable}))]"),
}
}
}

/// RUF015
pub(crate) fn unnecessary_iterable_allocation_for_first_element(
checker: &mut Checker,
subscript: &Expr,
) {
let Expr::Subscript(ast::ExprSubscript {
value,
slice,
range,
..
}) = subscript
else {
return;
};

let Some(subscript_kind) = classify_subscript(slice) else {
return;
};

let Some(iterable) = iterable_name(value, checker.semantic()) else {
return;
};

let mut diagnostic = Diagnostic::new(
UnnecessaryIterableAllocationForFirstElement {
iterable: iterable.to_string(),
subscript_kind,
},
*range,
);

if checker.patch(diagnostic.kind.rule()) {
let replacement = match subscript_kind {
HeadSubscriptKind::Index => format!("next(iter({iterable}))"),
HeadSubscriptKind::Slice => format!("[next(iter({iterable}))]"),
};
diagnostic.set_fix(Fix::suggested(Edit::range_replacement(replacement, *range)));
}

checker.diagnostics.push(diagnostic);
}

/// A subscript slice that represents the first element of a list.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum HeadSubscriptKind {
/// The subscript is an index (e.g., `[0]`).
Index,
/// The subscript is a slice (e.g., `[:1]`).
Slice,
}

/// Check that the slice [`Expr`] is functionally equivalent to slicing into the first element. The
/// first `bool` checks that the element is in fact first, the second checks if it's a slice or an
/// index.
fn classify_subscript(expr: &Expr) -> Option<HeadSubscriptKind> {
match expr {
Expr::Constant(ast::ExprConstant {
value: Constant::Int(value),
..
}) if value.is_zero() => Some(HeadSubscriptKind::Index),
Expr::Slice(ast::ExprSlice {
step, lower, upper, ..
}) => {
// Avoid, e.g., `list(...)[:2]`
let upper = upper.as_ref()?;
let upper = as_int(upper)?;
if !upper.is_one() {
return None;
}

// Avoid, e.g., `list(...)[2:]`.
if let Some(lower) = lower.as_ref() {
let lower = as_int(lower)?;
if !lower.is_zero() {
return None;
}
}

// Avoid, e.g., `list(...)[::-1]`
if let Some(step) = step.as_ref() {
let step = as_int(step)?;
if step < upper {
return None;
}
}

Some(HeadSubscriptKind::Slice)
}
_ => None,
}
}

/// Fetch the name of the iterable from an expression if the expression returns an unmodified list
/// which can be sliced into.
fn iterable_name<'a>(expr: &'a Expr, model: &SemanticModel) -> Option<&'a str> {
match expr {
Expr::Call(ast::ExprCall { func, args, .. }) => {
let ast::ExprName { id, .. } = func.as_name_expr()?;

if !matches!(id.as_str(), "tuple" | "list") {
return None;
}

if !model.is_builtin(id.as_str()) {
return None;
}

match args.first() {
Some(Expr::Name(ast::ExprName { id: arg_name, .. })) => Some(arg_name.as_str()),
Some(Expr::GeneratorExp(ast::ExprGeneratorExp {
elt, generators, ..
})) => generator_iterable(elt, generators),
_ => None,
}
}
Expr::ListComp(ast::ExprListComp {
elt, generators, ..
}) => generator_iterable(elt, generators),
_ => None,
}
}

/// Given a comprehension, returns the name of the iterable over which it iterates, if it's
/// a simple comprehension (e.g., `x` for `[i for i in x]`).
fn generator_iterable<'a>(elt: &'a Expr, generators: &'a Vec<Comprehension>) -> Option<&'a str> {
// If the `elt` field is anything other than a [`Expr::Name`], we can't be sure that it
// doesn't modify the elements of the underlying iterator (e.g., `[i + 1 for i in x][0]`).
if !elt.is_name_expr() {
return None;
}

// If there's more than 1 generator, we can't safely say that it fits the diagnostic conditions
// (e.g., `[(i, j) for i in x for j in y][0]`).
let [generator] = generators.as_slice() else {
return None;
};

// Ignore if there's an `if` statement in the comprehension, since it filters the list.
if !generator.ifs.is_empty() {
return None;
}

let ast::ExprName { id, .. } = generator.iter.as_name_expr()?;
Some(id.as_str())
}

/// If an expression is a constant integer, returns the value of that integer; otherwise,
/// returns `None`.
fn as_int(expr: &Expr) -> Option<&BigInt> {
if let Expr::Constant(ast::ExprConstant {
value: Constant::Int(value),
..
}) = expr
{
Some(value)
} else {
None
}
}
Loading

0 comments on commit 28fe2d3

Please sign in to comment.