diff --git a/MANIFEST.in b/MANIFEST.in index 6e4218fb0..514159c0f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,7 +3,7 @@ include readme.md include requirements.txt recursive-include chatterbot/corpus/* *.json -recursive-include chatterbot/adapters/* *.json +recursive-include chatterbot/corpus/* *.data recursive-exclude * *.pyc recursive-exclude * *.py~ diff --git a/chatterbot/adapters/logic/mathematical_evaluation.py b/chatterbot/adapters/logic/mathematical_evaluation.py index c7103a26c..132075603 100644 --- a/chatterbot/adapters/logic/mathematical_evaluation.py +++ b/chatterbot/adapters/logic/mathematical_evaluation.py @@ -21,6 +21,35 @@ class MathematicalEvaluation(LogicAdapter): 5) Solve the equation & return result """ + def __init__(self, **kwargs): + super(MathematicalEvaluation, self).__init__(**kwargs) + + language = kwargs.get('math_words_language', 'english') + self.math_words = self.get_language_data(language) + + def get_language_data(self, language): + """ + Load language-specific data + """ + from chatterbot.corpus import Corpus + + corpus = Corpus() + + math_words_data_file_path = corpus.get_file_path( + 'chatterbot.corpus.{}.math_words'.format(language), + extension='data' + ) + + try: + with open(math_words_data_file_path) as data: + return json.load(data) + except IOError: + raise self.UnrecognizedLanguageException( + 'A math_words data file was not found for `{}` at `{}`.'.format( + language, math_words_data_file_path + ) + ) + def can_process(self, statement): """ Determines whether it is appropriate for this @@ -129,44 +158,30 @@ def normalize(self, string): # Returning normalized text return string - def load_data(self, language): - """ - Load language-specific data - """ - if language == "english": - data_file = os.path.join( - os.path.dirname(__file__), 'data', 'math_words_EN.json' - ) - with open(data_file) as data_file: - data = json.load(data_file) - self.data = data - def substitute_words(self, string): """ Substitutes numbers for words. """ - self.load_data("english") - condensed_string = '_'.join(string.split()) - for word in self.data["words"]: + for word in self.math_words["words"]: condensed_string = re.sub( '_'.join(word.split(' ')), - self.data["words"][word], + self.math_words["words"][word], condensed_string ) - for number in self.data["numbers"]: + for number in self.math_words["numbers"]: condensed_string = re.sub( number, - str(self.data["numbers"][number]), + str(self.math_words["numbers"][number]), condensed_string ) - for scale in self.data["scales"]: + for scale in self.math_words["scales"]: condensed_string = re.sub( "_" + scale, - " " + self.data["scales"][scale], + " " + self.math_words["scales"][scale], condensed_string ) @@ -197,3 +212,11 @@ def substitute_words(self, string): condensed_string[end_index] += " )" return ' '.join(condensed_string) + + class UnrecognizedLanguageException(Exception): + + def __init__(self, value='The specified language was not recognized'): + self.value = value + + def __str__(self): + return repr(self.value) diff --git a/chatterbot/corpus/corpus.py b/chatterbot/corpus/corpus.py index 38ee41c72..658e98bda 100644 --- a/chatterbot/corpus/corpus.py +++ b/chatterbot/corpus/corpus.py @@ -7,7 +7,7 @@ def __init__(self): current_directory = os.path.dirname(__file__) self.data_directory = os.path.join(current_directory, 'data') - def get_file_path(self, dotted_path): + def get_file_path(self, dotted_path, extension='json'): """ Reads a dotted file path and returns the file path. """ @@ -18,8 +18,8 @@ def get_file_path(self, dotted_path): corpus_path = os.path.join(*parts) - if os.path.exists(corpus_path + '.json'): - corpus_path += '.json' + if os.path.exists(corpus_path + '.{}'.format(extension)): + corpus_path += '.{}'.format(extension) return corpus_path diff --git a/chatterbot/adapters/logic/data/math_words_EN.json b/chatterbot/corpus/data/english/math_words.data similarity index 100% rename from chatterbot/adapters/logic/data/math_words_EN.json rename to chatterbot/corpus/data/english/math_words.data diff --git a/tests/logic_adapter_tests/test_evaluate_mathematically.py b/tests/logic_adapter_tests/test_mathematical_evaluation.py similarity index 53% rename from tests/logic_adapter_tests/test_evaluate_mathematically.py rename to tests/logic_adapter_tests/test_mathematical_evaluation.py index d397638f8..47a7f0c5f 100644 --- a/tests/logic_adapter_tests/test_evaluate_mathematically.py +++ b/tests/logic_adapter_tests/test_mathematical_evaluation.py @@ -9,11 +9,11 @@ def setUp(self): self.adapter = MathematicalEvaluation() def test_can_process(self): - statement = Statement("What is 10 + 10 + 10?") + statement = Statement('What is 10 + 10 + 10?') self.assertTrue(self.adapter.can_process(statement)) def test_can_not_process(self): - statement = Statement("What is your favorite song?") + statement = Statement('What is your favorite song?') self.assertFalse(self.adapter.can_process(statement)) def test_is_integer(self): @@ -32,19 +32,23 @@ def test_normalize_empty_string(self): """ If a string is empty, the string should be returned. """ - self.assertEqual(self.adapter.normalize(""), "") + self.assertEqual(self.adapter.normalize(''), '') def test_normalize_text_to_lowercase(self): - normalized = self.adapter.normalize("HELLO") + normalized = self.adapter.normalize('HELLO') self.assertTrue(normalized.islower()) def test_normalize_punctuation(self): - normalized = self.adapter.normalize("the end.") - self.assertEqual(normalized, "the end") + normalized = self.adapter.normalize('the end.') + self.assertEqual(normalized, 'the end') - def test_load_data(self): - self.adapter.load_data("english") - self.assertIn("numbers", self.adapter.data) + def test_load_english_data(self): + self.adapter.get_language_data('english') + self.assertIn('numbers', self.adapter.math_words) + + def test_load_nonexistent_data(self): + with self.assertRaises(MathematicalEvaluation.UnrecognizedLanguageException): + self.adapter.get_language_data('0101010') class MathematicalEvaluationOperationTests(TestCase): @@ -58,68 +62,68 @@ def setUp(self): self.python_version = sys.version_info[0] def test_addition_operator(self): - statement = Statement("What is 100 + 54?") + statement = Statement('What is 100 + 54?') confidence, response = self.adapter.process(statement) - self.assertEqual(response.text, "( 100 + 54 ) = 154") + self.assertEqual(response.text, '( 100 + 54 ) = 154') def test_subtraction_operator(self): - statement = Statement("What is 100 - 58?") + statement = Statement('What is 100 - 58?') confidence, response = self.adapter.process(statement) - self.assertEqual(response.text, "( 100 - 58 ) = 42") + self.assertEqual(response.text, '( 100 - 58 ) = 42') def test_multiplication_operator(self): - statement = Statement("What is 100 * 20") + statement = Statement('What is 100 * 20') confidence, response = self.adapter.process(statement) - self.assertEqual(response.text, "( 100 * 20 ) = 2000") + self.assertEqual(response.text, '( 100 * 20 ) = 2000') def test_division_operator(self): - statement = Statement("What is 100 / 20") + statement = Statement('What is 100 / 20') confidence, response = self.adapter.process(statement) if self.python_version <= 2: - self.assertEqual(response.text, "( 100 / 20 ) = 5") + self.assertEqual(response.text, '( 100 / 20 ) = 5') else: - self.assertEqual(response.text, "( 100 / 20 ) = 5.0") + self.assertEqual(response.text, '( 100 / 20 ) = 5.0') def test_parenthesized_multiplication_and_addition(self): - statement = Statement("What is 100 + ( 1000 * 2 )?") + statement = Statement('What is 100 + ( 1000 * 2 )?') confidence, response = self.adapter.process(statement) - self.assertEqual(response.text, "( 100 + ( ( 1000 * ( 2 ) ) ) ) = 2100") + self.assertEqual(response.text, '( 100 + ( ( 1000 * ( 2 ) ) ) ) = 2100') def test_parenthesized_with_words(self): - statement = Statement("What is four plus 100 + ( 100 * 2 )?") + statement = Statement('What is four plus 100 + ( 100 * 2 )?') confidence, response = self.adapter.process(statement) - self.assertEqual(response.text, "( 4 + ( 100 + ( ( 100 * ( 2 ) ) ) ) ) = 304") + self.assertEqual(response.text, '( 4 + ( 100 + ( ( 100 * ( 2 ) ) ) ) ) = 304') def test_word_numbers_addition(self): - statement = Statement("What is one hundred + four hundred?") + statement = Statement('What is one hundred + four hundred?') confidence, response = self.adapter.process(statement) - self.assertEqual(response.text, "( 100 + 400 ) = 500") + self.assertEqual(response.text, '( 100 + 400 ) = 500') def test_word_division_operator(self): - statement = Statement("What is 100 divided by 100?") + statement = Statement('What is 100 divided by 100?') confidence, response = self.adapter.process(statement) if self.python_version <= 2: - self.assertEqual(response.text, "( 100 / 100 ) = 1") + self.assertEqual(response.text, '( 100 / 100 ) = 1') else: - self.assertEqual(response.text, "( 100 / 100 ) = 1.0") + self.assertEqual(response.text, '( 100 / 100 ) = 1.0') def test_large_word_division_operator(self): - statement = Statement("What is one thousand two hundred four divided by one hundred?") + statement = Statement('What is one thousand two hundred four divided by one hundred?') confidence, response = self.adapter.process(statement) if self.python_version <= 2: - self.assertEqual(response.text, "( 1000 + 200 + 4 ) / ( 100 ) = 12") + self.assertEqual(response.text, '( 1000 + 200 + 4 ) / ( 100 ) = 12') else: - self.assertEqual(response.text, "( 1000 + 200 + 4 ) / ( 100 ) = 12.04") + self.assertEqual(response.text, '( 1000 + 200 + 4 ) / ( 100 ) = 12.04') def test_negative_multiplication(self): - statement = Statement("What is -105 * 5") + statement = Statement('What is -105 * 5') confidence, response = self.adapter.process(statement) - self.assertEqual(response.text, "( -105 * 5 ) = -525") + self.assertEqual(response.text, '( -105 * 5 ) = -525') def test_negative_decimal_multiplication(self): - statement = Statement("What is -100.5 * 20?") + statement = Statement('What is -100.5 * 20?') confidence, response = self.adapter.process(statement) - self.assertEqual(response.text, "( -100.5 * 20 ) = -2010.0") + self.assertEqual(response.text, '( -100.5 * 20 ) = -2010.0')