Skip to content

Commit

Permalink
Add type inference
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Feb 13, 2024
1 parent d9e6d77 commit 66faaab
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 84 deletions.
56 changes: 39 additions & 17 deletions crates/ruff_linter/src/rules/refurb/rules/readlines_in_for.rs
Original file line number Diff line number Diff line change
@@ -1,42 +1,43 @@
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{Comprehension, Expr, StmtFor};
use ruff_python_ast::{self as ast, Comprehension, Expr, StmtFor};
use ruff_python_semantic::analyze::typing;
use ruff_python_semantic::analyze::typing::{is_io_base_expr, IoBaseChecker, TypeChecker};
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for uses of `readlines()` when iterating a file object line-by-line.
/// Checks for uses of `readlines()` when iterating over a file line-by-line.
///
/// ## Why is this bad?
/// Instead of iterating through `list[str]` which is returned from `readlines()`, use the iteration
/// through a file object which is a more convenient and performant way.
/// Rather than iterating over all lines in a file by calling `readlines()`,
/// it's more convenient and performant to iterate over the file object
/// directly.
///
/// ## Example
/// ```python
/// with open("file.txt") as f:
/// for line in f.readlines():
/// with open("file.txt") as fp:
/// for line in fp.readlines():
/// ...
/// ```
///
/// Use instead:
/// ```python
/// with open("file.txt") as f:
/// for line in f:
/// with open("file.txt") as fp:
/// for line in fp:
/// ...
/// ```
///
/// ## References
/// - [Python documentation: `io.IOBase.readlines`](https://docs.python.org/3/library/io.html#io.IOBase.readlines)
/// - [Python documentation: methods of file objects](https://docs.python.org/3/tutorial/inputoutput.html#methods-of-file-objects)
///
#[violation]
pub(crate) struct ReadlinesInFor;

impl AlwaysFixableViolation for ReadlinesInFor {
#[derive_message_formats]
fn message(&self) -> String {
format!("Use of `readlines()` in loop")
format!("Instead of calling `readlines()`, iterate over file object directly")
}

fn fix_title(&self) -> String {
Expand All @@ -63,11 +64,32 @@ fn readlines_in_iter(checker: &mut Checker, iter_expr: &Expr) {
return;
};

if expr_attr.attr.as_str() == "readlines" && expr_call.arguments.is_empty() {
let mut diagnostic = Diagnostic::new(ReadlinesInFor, expr_call.range());
diagnostic.set_fix(Fix::unsafe_edit(Edit::range_deletion(
expr_call.range().add_start(expr_attr.value.range().len()),
)));
checker.diagnostics.push(diagnostic);
if expr_attr.attr.as_str() != "readlines" || !expr_call.arguments.is_empty() {
return;
}

// Determine whether `fp` in `fp.readlines()` was bound to a file object.
if let Expr::Name(name) = expr_attr.value.as_ref() {
let Some(binding) = checker
.semantic()
.resolve_name(name)
.map(|id| checker.semantic().binding(id))
else {
return;
};

if !typing::is_io_base(binding, checker.semantic()) {
return;
}
} else {
if !is_io_base_expr(expr_attr.value.as_ref(), checker.semantic()) {
return;
}
}

let mut diagnostic = Diagnostic::new(ReadlinesInFor, expr_call.range());
diagnostic.set_fix(Fix::unsafe_edit(Edit::range_deletion(
expr_call.range().add_start(expr_attr.value.range().len()),
)));
checker.diagnostics.push(diagnostic);
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
source: crates/ruff_linter/src/rules/refurb/mod.rs
---
FURB129.py:7:18: FURB129 [*] Use of `readlines()` in loop
FURB129.py:7:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
5 | # Errors
6 | with open("FURB129.py") as f:
Expand All @@ -22,7 +22,7 @@ FURB129.py:7:18: FURB129 [*] Use of `readlines()` in loop
9 9 | a = [line.lower() for line in f.readlines()]
10 10 | b = {line.upper() for line in f.readlines()}

FURB129.py:9:35: FURB129 [*] Use of `readlines()` in loop
FURB129.py:9:35: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
7 | for _line in f.readlines():
8 | pass
Expand All @@ -43,7 +43,7 @@ FURB129.py:9:35: FURB129 [*] Use of `readlines()` in loop
11 11 | c = {line.lower(): line.upper() for line in f.readlines()}
12 12 |

FURB129.py:10:35: FURB129 [*] Use of `readlines()` in loop
FURB129.py:10:35: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
8 | pass
9 | a = [line.lower() for line in f.readlines()]
Expand All @@ -63,7 +63,7 @@ FURB129.py:10:35: FURB129 [*] Use of `readlines()` in loop
12 12 |
13 13 | with Path("FURB129.py").open() as f:

FURB129.py:11:49: FURB129 [*] Use of `readlines()` in loop
FURB129.py:11:49: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
9 | a = [line.lower() for line in f.readlines()]
10 | b = {line.upper() for line in f.readlines()}
Expand All @@ -84,7 +84,7 @@ FURB129.py:11:49: FURB129 [*] Use of `readlines()` in loop
13 13 | with Path("FURB129.py").open() as f:
14 14 | for _line in f.readlines():

FURB129.py:14:18: FURB129 [*] Use of `readlines()` in loop
FURB129.py:14:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
13 | with Path("FURB129.py").open() as f:
14 | for _line in f.readlines():
Expand All @@ -103,7 +103,7 @@ FURB129.py:14:18: FURB129 [*] Use of `readlines()` in loop
16 16 |
17 17 | for _line in open("FURB129.py").readlines():

FURB129.py:17:14: FURB129 [*] Use of `readlines()` in loop
FURB129.py:17:14: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
15 | pass
16 |
Expand All @@ -123,7 +123,7 @@ FURB129.py:17:14: FURB129 [*] Use of `readlines()` in loop
19 19 |
20 20 | for _line in Path("FURB129.py").open().readlines():

FURB129.py:20:14: FURB129 [*] Use of `readlines()` in loop
FURB129.py:20:14: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
18 | pass
19 |
Expand All @@ -143,7 +143,7 @@ FURB129.py:20:14: FURB129 [*] Use of `readlines()` in loop
22 22 |
23 23 |

FURB129.py:26:18: FURB129 [*] Use of `readlines()` in loop
FURB129.py:26:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
24 | def good1():
25 | f = Path("FURB129.py").open()
Expand All @@ -164,7 +164,7 @@ FURB129.py:26:18: FURB129 [*] Use of `readlines()` in loop
28 28 | f.close()
29 29 |

FURB129.py:32:18: FURB129 [*] Use of `readlines()` in loop
FURB129.py:32:18: FURB129 [*] Instead of calling `readlines()`, iterate over file object directly
|
31 | def good2(f: io.BytesIO):
32 | for _line in f.readlines():
Expand All @@ -183,61 +183,4 @@ FURB129.py:32:18: FURB129 [*] Use of `readlines()` in loop
34 34 |
35 35 |

FURB129.py:38:18: FURB129 [*] Use of `readlines()` in loop
|
36 | # False positives
37 | def bad(f):
38 | for _line in f.readlines():
| ^^^^^^^^^^^^^ FURB129
39 | pass
|
= help: Remove `readlines()`

Unsafe fix
35 35 |
36 36 | # False positives
37 37 | def bad(f):
38 |- for _line in f.readlines():
38 |+ for _line in f:
39 39 | pass
40 40 |
41 41 |

FURB129.py:43:18: FURB129 [*] Use of `readlines()` in loop
|
42 | def worse(f: codecs.StreamReader):
43 | for _line in f.readlines():
| ^^^^^^^^^^^^^ FURB129
44 | pass
|
= help: Remove `readlines()`

Unsafe fix
40 40 |
41 41 |
42 42 | def worse(f: codecs.StreamReader):
43 |- for _line in f.readlines():
43 |+ for _line in f:
44 44 | pass
45 45 |
46 46 |

FURB129.py:55:14: FURB129 [*] Use of `readlines()` in loop
|
55 | for _line in foo().readlines():
| ^^^^^^^^^^^^^^^^^ FURB129
56 | pass
|
= help: Remove `readlines()`

Unsafe fix
52 52 | return A()
53 53 |
54 54 |
55 |-for _line in foo().readlines():
55 |+for _line in foo():
56 56 | pass
57 57 |
58 58 | # OK


122 changes: 121 additions & 1 deletion crates/ruff_python_semantic/src/analyze/typing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ where
}

/// Abstraction for a type checker, conservatively checks for the intended type(s).
trait TypeChecker {
pub trait TypeChecker {
/// Check annotation expression to match the intended type(s).
fn match_annotation(annotation: &Expr, semantic: &SemanticModel) -> bool;
/// Check initializer expression to match the intended type(s).
Expand Down Expand Up @@ -441,6 +441,26 @@ fn check_type<T: TypeChecker>(binding: &Binding, semantic: &SemanticModel) -> bo
_ => false,
},

BindingKind::WithItemVar => match binding.statement(semantic) {
// ```python
// with open("file.txt") as x:
// ...
// ```
//
// We trust the type checker to infer the type based on the context manager.
Some(Stmt::With(ast::StmtWith { items, .. })) => {
let Some(item) = items.iter().find(|item| {
item.optional_vars
.as_ref()
.is_some_and(|vars| vars.range().contains_range(binding.range))
}) else {
return false;
};
T::match_initializer(&item.context_expr, semantic)
}
_ => false,
},

BindingKind::Argument => match binding.statement(semantic) {
// ```python
// def foo(x: annotation):
Expand Down Expand Up @@ -565,6 +585,80 @@ impl BuiltinTypeChecker for TupleChecker {
const EXPR_TYPE: PythonType = PythonType::Tuple;
}

pub struct IoBaseChecker;

impl TypeChecker for IoBaseChecker {
fn match_annotation(annotation: &Expr, semantic: &SemanticModel) -> bool {
semantic
.resolve_call_path(annotation)
.is_some_and(|call_path| {
if semantic.match_typing_call_path(&call_path, "IO") {
return true;
}
if semantic.match_typing_call_path(&call_path, "BinaryIO") {
return true;
}
if semantic.match_typing_call_path(&call_path, "TextIO") {
return true;
}
matches!(
call_path.as_slice(),
[
"io",
"IOBase"
| "RawIOBase"
| "BufferedIOBase"
| "TextIOBase"
| "BytesIO"
| "StringIO"
| "BufferedReader"
| "BufferedWriter"
| "BufferedRandom"
| "BufferedRWPair"
| "TextIOWrapper"
] | ["os", "Path" | "PathLike"]
| [
"pathlib",
"Path" | "PurePath" | "PurePosixPath" | "PureWindowsPath"
]
)
})
}

fn match_initializer(initializer: &Expr, semantic: &SemanticModel) -> bool {
let Expr::Call(ast::ExprCall { func, .. }) = initializer else {
return false;
};

// Ex) `pathlib.Path("file.txt")`
if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() {
if attr.as_str() == "open" {
if let Expr::Call(ast::ExprCall { func, .. }) = value.as_ref() {
return semantic.resolve_call_path(func).is_some_and(|call_path| {
matches!(
call_path.as_slice(),
[
"pathlib",
"Path" | "PurePath" | "PurePosixPath" | "PureWindowsPath"
]
)
});
}
}
}

// Ex) `open("file.txt")`
semantic
.resolve_call_path(func.as_ref())
.is_some_and(|call_path| {
matches!(
call_path.as_slice(),
["io", "open" | "open_code"] | ["os", "open"] | ["", "open"]
)
})
}
}

/// Test whether the given binding (and the given name) can be considered a list.
/// For this, we check what value might be associated with it through it's initialization and
/// what annotation it has (we consider `list` and `typing.List`).
Expand Down Expand Up @@ -594,6 +688,20 @@ pub fn is_tuple(binding: &Binding, semantic: &SemanticModel) -> bool {
check_type::<TupleChecker>(binding, semantic)
}

/// Test whether the given binding (and the given name) can be considered a
/// file-like object. For this, we check what value might be associated with it
/// through it's initialization and what annotation it has.
pub fn is_io_base(binding: &Binding, semantic: &SemanticModel) -> bool {
check_type::<IoBaseChecker>(binding, semantic)
}

/// Test whether the given expression (and the given name) can be considered a
/// file-like object. For this, we check what value might be associated with it
/// through it's initialization and what annotation it has.
pub fn is_io_base_expr(expr: &Expr, semantic: &SemanticModel) -> bool {
IoBaseChecker::match_initializer(expr, semantic)
}

/// Find the [`ParameterWithDefault`] corresponding to the given [`Binding`].
#[inline]
fn find_parameter<'a>(
Expand Down Expand Up @@ -699,6 +807,18 @@ pub fn find_binding_value<'a>(binding: &Binding, semantic: &'a SemanticModel) ->
_ => {}
}
}
// Ex) `with open("file.txt") as f:`
BindingKind::WithItemVar => {
let parent_id = binding.source?;
let parent = semantic.statement(parent_id);
if let Stmt::With(ast::StmtWith { items, .. }) = parent {
return items.iter().find_map(|item| {
let target = item.optional_vars.as_ref()?;
let value = &item.context_expr;
match_value(binding, target, value)
});
}
}
_ => {}
}
None
Expand Down

0 comments on commit 66faaab

Please sign in to comment.