Skip to content

Commit

Permalink
Improvement patch on jp (#30)
Browse files Browse the repository at this point in the history
* More punctuation sign hardcoded.

* Do not trigger suspicious range on hiragana x katakana

* bump version minor
  • Loading branch information
Ousret authored Dec 16, 2019
1 parent b0e4e94 commit 48c2e6b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
5 changes: 4 additions & 1 deletion charset_normalizer/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def is_punc(letter):
return r_name is not None and \
("Punctuation" in r_name or
'Forms' in r_name or
letter in 'º¯—–‒‐⁃«‹?!;.:^$*»£¹¿~ª؟©±¡{}[]|¼½¾⅕⅙⅛™℠‼⁇❝❞¶⁋√↑↓�¤`')
letter in set('º¯—–‒‐⁃«‹?!;.:^$¥*»£¹¿~ª؟©±¡{}[]|½⅓⅔¼¾⅕⅖⅗⅘⅙⅚⅐⅛⅜⅝⅞⅑⅒™℠¬‼⁇❝❞¶⁋√↑↓�¤`¨'))

@staticmethod
@lru_cache(maxsize=8192)
Expand Down Expand Up @@ -141,6 +141,9 @@ def is_suspiciously_successive_range(range_name_a, range_name_b):
if 'CJK' in range_name_a and range_name_b in ['Katakana', 'Hiragana']:
return False

if range_name_a in ['Katakana', 'Hiragana'] and range_name_b in ['Katakana', 'Hiragana']:
return False

return True

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion charset_normalizer/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
Expose version
"""

__version__ = "1.3.2"
__version__ = "1.3.3"
VERSION = __version__.split('.')

0 comments on commit 48c2e6b

Please sign in to comment.