Merge pull request #883 from bcroq/file-encoding

work on file encoding detection
PyCQA · Mar 8, 2019 · 6655b28 · 6655b28
2 parents f4c29ca + 9377ab2
commit 6655b28
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 15 deletions.
diff --git a/isort/isort.py b/isort/isort.py
@@ -26,6 +26,7 @@
 """
 from __future__ import absolute_import, division, print_function, unicode_literals
 
+import codecs
 import copy
 import io
 import itertools
@@ -46,7 +47,7 @@ class SortImports(object):
     incorrectly_sorted = False
     skipped = False
 
-    def __init__(self, file_path=None, file_contents=None, write_to_stdout=False, check=False,
+    def __init__(self, file_path=None, file_contents=None, file_=None, write_to_stdout=False, check=False,
                  show_diff=False, settings_path=None, ask_to_apply=False, run_path='', check_skip=True,
                  **setting_overrides):
         if not settings_path and file_path:
@@ -108,7 +109,8 @@ def __init__(self, file_path=None, file_contents=None, write_to_stdout=False, ch
                             " or matches a glob in 'skip_glob' setting".format(file_path))
                     file_contents = None
             if not self.skipped and not file_contents:
-                file_encoding = coding_check(file_path)
+                with io.open(file_path, 'rb') as f:
+                    file_encoding = coding_check(f)
                 with io.open(file_path, encoding=file_encoding, newline='') as file_to_import_sort:
                     try:
                         file_contents = file_to_import_sort.read()
@@ -133,6 +135,24 @@ def __init__(self, file_path=None, file_contents=None, write_to_stdout=False, ch
                                       "{} encoding or {} fallback encoding".format(file_path,
                                                                                    self.file_encoding,
                                                                                    file_to_import_sort.encoding))
+        elif file_:
+            self.file_encoding = coding_check(file_)
+            file_.seek(0)
+            reader = codecs.getreader(self.file_encoding)
+            file_contents = reader(file_).read()
+
+        # try to decode file_contents
+        if file_contents:
+            try:
+                basestring
+                # python 2
+                need_decode = (str, bytes)
+            except NameError:
+                # python 3
+                need_decode = bytes
+
+            if isinstance(file_contents, need_decode):
+                file_contents = file_contents.decode(coding_check(file_contents.splitlines()))
 
         if file_contents is None or ("isort:" + "skip_file") in file_contents:
             self.skipped = True
@@ -1010,19 +1030,16 @@ def _parse(self):
                         self.imports[placed_module][import_type][module] = None
 
 
-def coding_check(fname, default='utf-8'):
+def coding_check(lines, default='utf-8'):
 
     # see https://www.python.org/dev/peps/pep-0263/
     pattern = re.compile(br'coding[:=]\s*([-\w.]+)')
 
-    coding = default
-    with io.open(fname, 'rb') as f:
-        for line_number, line in enumerate(f, 1):
-            groups = re.findall(pattern, line)
-            if groups:
-                coding = groups[0].decode('ascii')
-                break
-            if line_number > 2:
-                break
-
-    return coding
+    for line_number, line in enumerate(lines, 1):
+        groups = re.findall(pattern, line)
+        if groups:
+            return groups[0].decode('ascii')
+        if line_number > 2:
+            break
+
+    return default
diff --git a/isort/main.py b/isort/main.py
@@ -330,7 +330,13 @@ def main(argv=None):
 
     file_names = arguments.pop('files', [])
     if file_names == ['-']:
-        SortImports(file_contents=sys.stdin.read(), write_to_stdout=True, **arguments)
+        try:
+            # python 3
+            file_ = sys.stdin.buffer
+        except AttributeError:
+            # python 2
+            file_ = sys.stdin
+        SortImports(file_=file_, write_to_stdout=True, **arguments)
     else:
         if not file_names:
             file_names = ['.']