Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue/1487/improve handling of encoding errors #1521

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Find out more about isort's release policy [here](https://pycqa.github.io/isort/
- Implemented #1494: Default to sorting imports within `.pxd` files.
- Implemented #1502: Improved float-to-top behavior when there is an existing import section present at top-of-file.
- Implemented #1511: Support for easily seeing all files isort will be ran against using `isort . --show-files`.
- Implemented #1487: Improved handling of encoding errors.
- Improved handling of unsupported configuration option errors (see #1475).
- Fixed #1463: Better interactive documentation for future option.
- Fixed #1461: Quiet config option not respected by file API in some circumstances.
Expand Down
14 changes: 13 additions & 1 deletion isort/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""All isort specific exception classes should be defined here"""
from typing import Any, Dict
from pathlib import Path
from typing import Any, Dict, Union

from .profiles import profiles

Expand Down Expand Up @@ -157,3 +158,14 @@ def __init__(self, unsupported_settings: Dict[str, Dict[str, str]]):
"https://pycqa.github.io/isort/docs/configuration/options/.\n"
)
self.unsupported_settings = unsupported_settings


class UnsupportedEncoding(ISortError):
"""Raised when isort encounters an encoding error while trying to read a file"""

def __init__(
self,
filename: Union[str, Path],
):
super().__init__(f"Unknown or unsupported encoding in {filename}")
self.filename = filename
15 changes: 12 additions & 3 deletions isort/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from contextlib import contextmanager
from io import BytesIO, StringIO, TextIOWrapper
from pathlib import Path
from typing import Iterator, NamedTuple, TextIO, Union
from typing import Callable, Iterator, NamedTuple, TextIO, Union

from isort.exceptions import UnsupportedEncoding

_ENCODING_PATTERN = re.compile(br"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")

Expand All @@ -14,9 +16,16 @@ class File(NamedTuple):
path: Path
encoding: str

@staticmethod
def detect_encoding(filename: str, readline: Callable[[], bytes]):
try:
return tokenize.detect_encoding(readline)[0]
except Exception:
raise UnsupportedEncoding(filename)

@staticmethod
def from_contents(contents: str, filename: str) -> "File":
encoding, _ = tokenize.detect_encoding(BytesIO(contents.encode("utf-8")).readline)
encoding = File.detect_encoding(filename, BytesIO(contents.encode("utf-8")).readline)
return File(StringIO(contents), path=Path(filename).resolve(), encoding=encoding)

@property
Expand All @@ -30,7 +39,7 @@ def _open(filename):
"""
buffer = open(filename, "rb")
try:
encoding, _ = tokenize.detect_encoding(buffer.readline)
encoding = File.detect_encoding(filename, buffer.readline)
buffer.seek(0)
text = TextIOWrapper(buffer, encoding, line_buffering=True, newline="")
text.mode = "r" # type: ignore
Expand Down
29 changes: 25 additions & 4 deletions isort/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from warnings import warn

from . import __version__, api, sections
from .exceptions import FileSkipped
from .exceptions import FileSkipped, UnsupportedEncoding
from .format import create_terminal_printer
from .logo import ASCII_ART
from .profiles import profiles
Expand Down Expand Up @@ -67,9 +67,10 @@


class SortAttempt:
def __init__(self, incorrectly_sorted: bool, skipped: bool) -> None:
def __init__(self, incorrectly_sorted: bool, skipped: bool, supported_encoding: bool) -> None:
self.incorrectly_sorted = incorrectly_sorted
self.skipped = skipped
self.supported_encoding = supported_encoding


def sort_imports(
Expand All @@ -88,7 +89,7 @@ def sort_imports(
incorrectly_sorted = not api.check_file(file_name, config=config, **kwargs)
except FileSkipped:
skipped = True
return SortAttempt(incorrectly_sorted, skipped)
return SortAttempt(incorrectly_sorted, skipped, True)
else:
try:
incorrectly_sorted = not api.sort_file(
Expand All @@ -100,10 +101,14 @@ def sort_imports(
)
except FileSkipped:
skipped = True
return SortAttempt(incorrectly_sorted, skipped)
return SortAttempt(incorrectly_sorted, skipped, True)
except (OSError, ValueError) as error:
warn(f"Unable to parse file {file_name} due to {error}")
return None
except UnsupportedEncoding:
if config.verbose:
warn(f"Encoding not supported for {file_name}")
return SortAttempt(incorrectly_sorted, skipped, False)
except Exception:
printer = create_terminal_printer(color=config.color_output)
printer.error(
Expand Down Expand Up @@ -860,6 +865,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
remapped_deprecated_args = config_dict.pop("remapped_deprecated_args", False)
wrong_sorted_files = False
all_attempt_broken = False
no_valid_encodings = False

if "src_paths" in config_dict:
config_dict["src_paths"] = {
Expand Down Expand Up @@ -909,6 +915,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
return
num_skipped = 0
num_broken = 0
num_invalid_encoding = 0
if config.verbose:
print(ASCII_ART)

Expand Down Expand Up @@ -942,6 +949,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =

# If any files passed in are missing considered as error, should be removed
is_no_attempt = True
any_encoding_valid = False
for sort_attempt in attempt_iterator:
if not sort_attempt:
continue # pragma: no cover - shouldn't happen, satisfies type constraint
Expand All @@ -952,6 +960,12 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
num_skipped += (
1 # pragma: no cover - shouldn't happen, due to skip in iter_source_code
)

if not sort_attempt.supported_encoding:
num_invalid_encoding += 1
else:
any_encoding_valid = True

is_no_attempt = False

num_skipped += len(skipped)
Expand All @@ -973,6 +987,8 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =

if num_broken > 0 and is_no_attempt:
all_attempt_broken = True
if num_invalid_encoding > 0 and not any_encoding_valid:
no_valid_encodings = True

if not config.quiet and (remapped_deprecated_args or deprecated_flags):
if remapped_deprecated_args:
Expand All @@ -996,6 +1012,11 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] =
if all_attempt_broken:
sys.exit(1)

if no_valid_encodings:
printer = create_terminal_printer(color=config.color_output)
printer.error("No valid encodings.")
sys.exit(1)


if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions tests/unit/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,11 @@ def setup_class(self):

def test_variables(self):
assert self.instance.unsupported_settings == {"apply": {"value": "true", "source": "/"}}


class TestUnsupportedEncoding(TestISortError):
def setup_class(self):
self.instance = exceptions.UnsupportedEncoding("file.py")

def test_variables(self):
assert self.instance.filename == "file.py"
30 changes: 30 additions & 0 deletions tests/unit/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,36 @@ def test_isort_with_stdin(capsys):
"""
)


def test_unsupported_encodings(tmpdir, capsys):
tmp_file = tmpdir.join("file.py")
# fmt: off
tmp_file.write_text(
'''
# [syntax-error]\
# -*- coding: IBO-8859-1 -*-
""" check correct unknown encoding declaration
"""
__revision__ = 'יייי'
''',
encoding="utf8"
)
# fmt: on

# should throw an error if only unsupported encoding provided
with pytest.raises(SystemExit):
main.main([str(tmp_file)])
out, error = capsys.readouterr()

assert "No valid encodings." in error

# should not throw an error if at least one valid encoding found
normal_file = tmpdir.join("file1.py")
normal_file.write("import os\nimport sys")

main.main([str(tmp_file), str(normal_file), "--verbose"])
out, error = capsys.readouterr()

# ensures that only-modified flag works with stdin
input_content = TextIOWrapper(
BytesIO(
Expand Down