Skip to content

Commit

Permalink
Use UTF-8 as default encoding in unspecified-encoding fix (#12370)
Browse files Browse the repository at this point in the history
## Summary

This is the _intended_ default that PEP 597 _wants_, but it's not
backwards compatible. The fix is already unsafe, so it's better for us
to recommend the desired and expected behavior.

Closes #12069.
  • Loading branch information
charliermarsh authored Jul 17, 2024
1 parent 1de8ff3 commit e39298d
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 641 deletions.
11 changes: 0 additions & 11 deletions crates/ruff_linter/src/rules/pylint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,15 +396,4 @@ mod tests {
assert_messages!(diagnostics);
Ok(())
}

#[test]
fn unspecified_encoding_python39_or_lower() -> Result<()> {
let diagnostics = test_path(
Path::new("pylint/unspecified_encoding.py"),
&LinterSettings::for_rule(Rule::UnspecifiedEncoding)
.with_target_version(PythonVersion::Py39),
)?;
assert_messages!(diagnostics);
Ok(())
}
}
45 changes: 12 additions & 33 deletions crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use std::fmt::{Display, Formatter};

use anyhow::Result;

use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::name::QualifiedName;
Expand All @@ -11,21 +9,24 @@ use ruff_text_size::{Ranged, TextRange};

use crate::checkers::ast::Checker;
use crate::fix::edits::add_argument;
use crate::importer::ImportRequest;
use crate::settings::types::PythonVersion;

/// ## What it does
/// Checks for uses of `open` and related calls without an explicit `encoding`
/// argument.
///
/// ## Why is this bad?
/// Using `open` in text mode without an explicit encoding can lead to
/// non-portable code, with differing behavior across platforms.
/// non-portable code, with differing behavior across platforms. While readers
/// may assume that UTF-8 is the default encoding, in reality, the default
/// is locale-specific.
///
/// Instead, consider using the `encoding` parameter to enforce a specific
/// encoding. [PEP 597] recommends using `locale.getpreferredencoding(False)`
/// as the default encoding on versions earlier than Python 3.10, and
/// `encoding="locale"` on Python 3.10 and later.
/// encoding. [PEP 597] recommends the use of `encoding="utf-8"` as a default,
/// and suggests that it may become the default in future versions of Python.
///
/// If a local-specific encoding is intended, use `encoding="local"` on
/// Python 3.10 and later, or `locale.getpreferredencoding()` on earlier versions,
/// to make the encoding explicit.
///
/// ## Example
/// ```python
Expand Down Expand Up @@ -86,13 +87,7 @@ pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall)
},
call.func.range(),
);

if checker.settings.target_version >= PythonVersion::Py310 {
diagnostic.set_fix(generate_keyword_fix(checker, call));
} else {
diagnostic.try_set_fix(|| generate_import_fix(checker, call));
}

diagnostic.set_fix(generate_keyword_fix(checker, call));
checker.diagnostics.push(diagnostic);
}

Expand Down Expand Up @@ -158,7 +153,7 @@ impl Display for Callee<'_> {
}
}

/// Generate an [`Edit`] for Python 3.10 and later.
/// Generate an [`Edit`] to set `encoding="utf-8"`.
fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix {
Fix::unsafe_edit(add_argument(
&format!(
Expand All @@ -167,7 +162,7 @@ fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix {
.generator()
.expr(&Expr::StringLiteral(ast::ExprStringLiteral {
value: ast::StringLiteralValue::single(ast::StringLiteral {
value: "locale".to_string().into_boxed_str(),
value: "utf-8".to_string().into_boxed_str(),
flags: StringLiteralFlags::default(),
range: TextRange::default(),
}),
Expand All @@ -180,22 +175,6 @@ fn generate_keyword_fix(checker: &Checker, call: &ast::ExprCall) -> Fix {
))
}

/// Generate an [`Edit`] for Python 3.9 and earlier.
fn generate_import_fix(checker: &Checker, call: &ast::ExprCall) -> Result<Fix> {
let (import_edit, binding) = checker.importer().get_or_import_symbol(
&ImportRequest::import("locale", "getpreferredencoding"),
call.start(),
checker.semantic(),
)?;
let argument_edit = add_argument(
&format!("encoding={binding}(False)"),
&call.arguments,
checker.comment_ranges(),
checker.locator().contents(),
);
Ok(Fix::unsafe_edits(import_edit, [argument_edit]))
}

/// Returns `true` if the given expression is a string literal containing a `b` character.
fn is_binary_mode(expr: &Expr) -> Option<bool> {
Some(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ unspecified_encoding.py:8:1: PLW1514 [*] `open` in text mode without explicit `e
6 6 |
7 7 | # Errors.
8 |-open("test.txt")
8 |+open("test.txt", encoding="locale")
8 |+open("test.txt", encoding="utf-8")
9 9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 11 | tempfile.NamedTemporaryFile("w")
Expand All @@ -37,7 +37,7 @@ unspecified_encoding.py:9:1: PLW1514 [*] `io.TextIOWrapper` without explicit `en
7 7 | # Errors.
8 8 | open("test.txt")
9 |-io.TextIOWrapper(io.FileIO("test.txt"))
9 |+io.TextIOWrapper(io.FileIO("test.txt"), encoding="locale")
9 |+io.TextIOWrapper(io.FileIO("test.txt"), encoding="utf-8")
10 10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 11 | tempfile.NamedTemporaryFile("w")
12 12 | tempfile.TemporaryFile("w")
Expand All @@ -58,7 +58,7 @@ unspecified_encoding.py:10:1: PLW1514 [*] `io.TextIOWrapper` without explicit `e
8 8 | open("test.txt")
9 9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 |-hugo.TextIOWrapper(hugo.FileIO("test.txt"))
10 |+hugo.TextIOWrapper(hugo.FileIO("test.txt"), encoding="locale")
10 |+hugo.TextIOWrapper(hugo.FileIO("test.txt"), encoding="utf-8")
11 11 | tempfile.NamedTemporaryFile("w")
12 12 | tempfile.TemporaryFile("w")
13 13 | codecs.open("test.txt")
Expand All @@ -79,7 +79,7 @@ unspecified_encoding.py:11:1: PLW1514 [*] `tempfile.NamedTemporaryFile` in text
9 9 | io.TextIOWrapper(io.FileIO("test.txt"))
10 10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 |-tempfile.NamedTemporaryFile("w")
11 |+tempfile.NamedTemporaryFile("w", encoding="locale")
11 |+tempfile.NamedTemporaryFile("w", encoding="utf-8")
12 12 | tempfile.TemporaryFile("w")
13 13 | codecs.open("test.txt")
14 14 | tempfile.SpooledTemporaryFile(0, "w")
Expand All @@ -100,7 +100,7 @@ unspecified_encoding.py:12:1: PLW1514 [*] `tempfile.TemporaryFile` in text mode
10 10 | hugo.TextIOWrapper(hugo.FileIO("test.txt"))
11 11 | tempfile.NamedTemporaryFile("w")
12 |-tempfile.TemporaryFile("w")
12 |+tempfile.TemporaryFile("w", encoding="locale")
12 |+tempfile.TemporaryFile("w", encoding="utf-8")
13 13 | codecs.open("test.txt")
14 14 | tempfile.SpooledTemporaryFile(0, "w")
15 15 |
Expand All @@ -120,7 +120,7 @@ unspecified_encoding.py:13:1: PLW1514 [*] `codecs.open` in text mode without exp
11 11 | tempfile.NamedTemporaryFile("w")
12 12 | tempfile.TemporaryFile("w")
13 |-codecs.open("test.txt")
13 |+codecs.open("test.txt", encoding="locale")
13 |+codecs.open("test.txt", encoding="utf-8")
14 14 | tempfile.SpooledTemporaryFile(0, "w")
15 15 |
16 16 | # Non-errors.
Expand All @@ -141,7 +141,7 @@ unspecified_encoding.py:14:1: PLW1514 [*] `tempfile.SpooledTemporaryFile` in tex
12 12 | tempfile.TemporaryFile("w")
13 13 | codecs.open("test.txt")
14 |-tempfile.SpooledTemporaryFile(0, "w")
14 |+tempfile.SpooledTemporaryFile(0, "w", encoding="locale")
14 |+tempfile.SpooledTemporaryFile(0, "w", encoding="utf-8")
15 15 |
16 16 | # Non-errors.
17 17 | open("test.txt", encoding="utf-8")
Expand All @@ -162,7 +162,7 @@ unspecified_encoding.py:46:1: PLW1514 [*] `open` in text mode without explicit `
44 44 | tempfile.SpooledTemporaryFile(0, )
45 45 |
46 |-open("test.txt",)
46 |+open("test.txt", encoding="locale",)
46 |+open("test.txt", encoding="utf-8",)
47 47 | open()
48 48 | open(
49 49 | "test.txt", # comment
Expand All @@ -182,7 +182,7 @@ unspecified_encoding.py:47:1: PLW1514 [*] `open` in text mode without explicit `
45 45 |
46 46 | open("test.txt",)
47 |-open()
47 |+open(encoding="locale")
47 |+open(encoding="utf-8")
48 48 | open(
49 49 | "test.txt", # comment
50 50 | )
Expand All @@ -203,7 +203,7 @@ unspecified_encoding.py:48:1: PLW1514 [*] `open` in text mode without explicit `
47 47 | open()
48 48 | open(
49 |- "test.txt", # comment
49 |+ "test.txt", encoding="locale", # comment
49 |+ "test.txt", encoding="utf-8", # comment
50 50 | )
51 51 | open(
52 52 | "test.txt",
Expand All @@ -224,7 +224,7 @@ unspecified_encoding.py:51:1: PLW1514 [*] `open` in text mode without explicit `
50 50 | )
51 51 | open(
52 |- "test.txt",
52 |+ "test.txt", encoding="locale",
52 |+ "test.txt", encoding="utf-8",
53 53 | # comment
54 54 | )
55 55 | open(("test.txt"),)
Expand All @@ -245,7 +245,7 @@ unspecified_encoding.py:55:1: PLW1514 [*] `open` in text mode without explicit `
53 53 | # comment
54 54 | )
55 |-open(("test.txt"),)
55 |+open(("test.txt"), encoding="locale",)
55 |+open(("test.txt"), encoding="utf-8",)
56 56 | open(
57 57 | ("test.txt"), # comment
58 58 | )
Expand All @@ -266,7 +266,7 @@ unspecified_encoding.py:56:1: PLW1514 [*] `open` in text mode without explicit `
55 55 | open(("test.txt"),)
56 56 | open(
57 |- ("test.txt"), # comment
57 |+ ("test.txt"), encoding="locale", # comment
57 |+ ("test.txt"), encoding="utf-8", # comment
58 58 | )
59 59 | open(
60 60 | ("test.txt"),
Expand All @@ -287,7 +287,7 @@ unspecified_encoding.py:59:1: PLW1514 [*] `open` in text mode without explicit `
58 58 | )
59 59 | open(
60 |- ("test.txt"),
60 |+ ("test.txt"), encoding="locale",
60 |+ ("test.txt"), encoding="utf-8",
61 61 | # comment
62 62 | )
63 63 |
Expand All @@ -308,7 +308,7 @@ unspecified_encoding.py:64:1: PLW1514 [*] `open` in text mode without explicit `
62 62 | )
63 63 |
64 |-open((("test.txt")),)
64 |+open((("test.txt")), encoding="locale",)
64 |+open((("test.txt")), encoding="utf-8",)
65 65 | open(
66 66 | (("test.txt")), # comment
67 67 | )
Expand All @@ -328,7 +328,7 @@ unspecified_encoding.py:65:1: PLW1514 [*] `open` in text mode without explicit `
64 64 | open((("test.txt")),)
65 65 | open(
66 |- (("test.txt")), # comment
66 |+ (("test.txt")), encoding="locale", # comment
66 |+ (("test.txt")), encoding="utf-8", # comment
67 67 | )
68 68 | open(
69 69 | (("test.txt")),
Expand All @@ -349,7 +349,7 @@ unspecified_encoding.py:68:1: PLW1514 [*] `open` in text mode without explicit `
67 67 | )
68 68 | open(
69 |- (("test.txt")),
69 |+ (("test.txt")), encoding="locale",
69 |+ (("test.txt")), encoding="utf-8",
70 70 | # comment
71 71 | )
72 72 |
Expand All @@ -369,7 +369,7 @@ unspecified_encoding.py:77:1: PLW1514 [*] `pathlib.Path(...).open` in text mode
75 75 |
76 76 | # Errors.
77 |-Path("foo.txt").open()
77 |+Path("foo.txt").open(encoding="locale")
77 |+Path("foo.txt").open(encoding="utf-8")
78 78 | Path("foo.txt").open("w")
79 79 | text = Path("foo.txt").read_text()
80 80 | Path("foo.txt").write_text(text)
Expand All @@ -390,7 +390,7 @@ unspecified_encoding.py:78:1: PLW1514 [*] `pathlib.Path(...).open` in text mode
76 76 | # Errors.
77 77 | Path("foo.txt").open()
78 |-Path("foo.txt").open("w")
78 |+Path("foo.txt").open("w", encoding="locale")
78 |+Path("foo.txt").open("w", encoding="utf-8")
79 79 | text = Path("foo.txt").read_text()
80 80 | Path("foo.txt").write_text(text)
81 81 |
Expand All @@ -410,7 +410,7 @@ unspecified_encoding.py:79:8: PLW1514 [*] `pathlib.Path(...).read_text` without
77 77 | Path("foo.txt").open()
78 78 | Path("foo.txt").open("w")
79 |-text = Path("foo.txt").read_text()
79 |+text = Path("foo.txt").read_text(encoding="locale")
79 |+text = Path("foo.txt").read_text(encoding="utf-8")
80 80 | Path("foo.txt").write_text(text)
81 81 |
82 82 | # Non-errors.
Expand All @@ -431,7 +431,7 @@ unspecified_encoding.py:80:1: PLW1514 [*] `pathlib.Path(...).write_text` without
78 78 | Path("foo.txt").open("w")
79 79 | text = Path("foo.txt").read_text()
80 |-Path("foo.txt").write_text(text)
80 |+Path("foo.txt").write_text(text, encoding="locale")
80 |+Path("foo.txt").write_text(text, encoding="utf-8")
81 81 |
82 82 | # Non-errors.
83 83 | Path("foo.txt").open(encoding="utf-8")
Loading

0 comments on commit e39298d

Please sign in to comment.