Skip to content

Commit

Permalink
fix an issue where UTF-8 with a BOM would not be detected as UTF-8-SI…
Browse files Browse the repository at this point in the history
…G ( fix #28 )
  • Loading branch information
PyYoshi committed Apr 14, 2017
1 parent 1f1e8c1 commit f729e11
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/ext/uchardet
12 changes: 12 additions & 0 deletions src/tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,15 @@ def test_decode(self):
except LookupError as e:
print("LookupError: { file=%s, encoding=%s }" % (testfile, detected_encoding["encoding"]))
raise e

def test_utf8_with_bom(self):
sample = b'\xEF\xBB\xBF'
detected_encoding = cchardet.detect(sample)
eq_(
"utf-8-sig",
detected_encoding['encoding'].lower(),
'Expected %s, but got %s' % (
"utf-8-sig",
detected_encoding['encoding'].lower()
)
)

0 comments on commit f729e11

Please sign in to comment.