diff --git a/misspellings_lib.py b/misspellings_lib.py index 5fc2c79..5cf9311 100644 --- a/misspellings_lib.py +++ b/misspellings_lib.py @@ -15,7 +15,7 @@ import string _NORM_REGEX = re.compile('([a-z])([A-Z][a-z])') -_WORD_REGEX = re.compile('[\s_0-9<>/,\.]+') +_WORD_REGEX = re.compile('[\s_0-9\W]+', flags=re.UNICODE) def normalize(word): diff --git a/tests/test_class.py b/tests/test_class.py index 918a2a0..ee3aeb4 100755 --- a/tests/test_class.py +++ b/tests/test_class.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # For Python 2.5 from __future__ import with_statement @@ -135,8 +136,12 @@ def testSplitWordsWithCamelCase(self): self.assertEqual(['one', 'Two', 'Three', 'four', 'five'], misspellings.split_words('oneTwoThree_four five')) - def testNormalize(self): - self.assertEqual('alpha', misspellings.normalize('"alpha".')) + def testSplitWordsWithOtherCharacters(self): + self.assertEqual(['the', 'big', 'cat'], + misspellings.split_words('the%big$cat')) + + def testNormalize(self): + self.assertEqual('alpha', misspellings.normalize('"alpha".')) if __name__ == '__main__':