Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consolidate data files into corpus #273

Merged
merged 1 commit into from
Sep 3, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ include readme.md
include requirements.txt

recursive-include chatterbot/corpus/* *.json
recursive-include chatterbot/adapters/* *.json
recursive-include chatterbot/corpus/* *.data

recursive-exclude * *.pyc
recursive-exclude * *.py~
63 changes: 43 additions & 20 deletions chatterbot/adapters/logic/mathematical_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,35 @@ class MathematicalEvaluation(LogicAdapter):
5) Solve the equation & return result
"""

def __init__(self, **kwargs):
super(MathematicalEvaluation, self).__init__(**kwargs)

language = kwargs.get('math_words_language', 'english')
self.math_words = self.get_language_data(language)

def get_language_data(self, language):
"""
Load language-specific data
"""
from chatterbot.corpus import Corpus

corpus = Corpus()

math_words_data_file_path = corpus.get_file_path(
'chatterbot.corpus.{}.math_words'.format(language),
extension='data'
)

try:
with open(math_words_data_file_path) as data:
return json.load(data)
except IOError:
raise self.UnrecognizedLanguageException(
'A math_words data file was not found for `{}` at `{}`.'.format(
language, math_words_data_file_path
)
)

def can_process(self, statement):
"""
Determines whether it is appropriate for this
Expand Down Expand Up @@ -129,44 +158,30 @@ def normalize(self, string):
# Returning normalized text
return string

def load_data(self, language):
"""
Load language-specific data
"""
if language == "english":
data_file = os.path.join(
os.path.dirname(__file__), 'data', 'math_words_EN.json'
)
with open(data_file) as data_file:
data = json.load(data_file)
self.data = data

def substitute_words(self, string):
"""
Substitutes numbers for words.
"""
self.load_data("english")

condensed_string = '_'.join(string.split())

for word in self.data["words"]:
for word in self.math_words["words"]:
condensed_string = re.sub(
'_'.join(word.split(' ')),
self.data["words"][word],
self.math_words["words"][word],
condensed_string
)

for number in self.data["numbers"]:
for number in self.math_words["numbers"]:
condensed_string = re.sub(
number,
str(self.data["numbers"][number]),
str(self.math_words["numbers"][number]),
condensed_string
)

for scale in self.data["scales"]:
for scale in self.math_words["scales"]:
condensed_string = re.sub(
"_" + scale,
" " + self.data["scales"][scale],
" " + self.math_words["scales"][scale],
condensed_string
)

Expand Down Expand Up @@ -197,3 +212,11 @@ def substitute_words(self, string):
condensed_string[end_index] += " )"

return ' '.join(condensed_string)

class UnrecognizedLanguageException(Exception):

def __init__(self, value='The specified language was not recognized'):
self.value = value

def __str__(self):
return repr(self.value)
6 changes: 3 additions & 3 deletions chatterbot/corpus/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def __init__(self):
current_directory = os.path.dirname(__file__)
self.data_directory = os.path.join(current_directory, 'data')

def get_file_path(self, dotted_path):
def get_file_path(self, dotted_path, extension='json'):
"""
Reads a dotted file path and returns the file path.
"""
Expand All @@ -18,8 +18,8 @@ def get_file_path(self, dotted_path):

corpus_path = os.path.join(*parts)

if os.path.exists(corpus_path + '.json'):
corpus_path += '.json'
if os.path.exists(corpus_path + '.{}'.format(extension)):
corpus_path += '.{}'.format(extension)

return corpus_path

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ def setUp(self):
self.adapter = MathematicalEvaluation()

def test_can_process(self):
statement = Statement("What is 10 + 10 + 10?")
statement = Statement('What is 10 + 10 + 10?')
self.assertTrue(self.adapter.can_process(statement))

def test_can_not_process(self):
statement = Statement("What is your favorite song?")
statement = Statement('What is your favorite song?')
self.assertFalse(self.adapter.can_process(statement))

def test_is_integer(self):
Expand All @@ -32,19 +32,23 @@ def test_normalize_empty_string(self):
"""
If a string is empty, the string should be returned.
"""
self.assertEqual(self.adapter.normalize(""), "")
self.assertEqual(self.adapter.normalize(''), '')

def test_normalize_text_to_lowercase(self):
normalized = self.adapter.normalize("HELLO")
normalized = self.adapter.normalize('HELLO')
self.assertTrue(normalized.islower())

def test_normalize_punctuation(self):
normalized = self.adapter.normalize("the end.")
self.assertEqual(normalized, "the end")
normalized = self.adapter.normalize('the end.')
self.assertEqual(normalized, 'the end')

def test_load_data(self):
self.adapter.load_data("english")
self.assertIn("numbers", self.adapter.data)
def test_load_english_data(self):
self.adapter.get_language_data('english')
self.assertIn('numbers', self.adapter.math_words)

def test_load_nonexistent_data(self):
with self.assertRaises(MathematicalEvaluation.UnrecognizedLanguageException):
self.adapter.get_language_data('0101010')


class MathematicalEvaluationOperationTests(TestCase):
Expand All @@ -58,68 +62,68 @@ def setUp(self):
self.python_version = sys.version_info[0]

def test_addition_operator(self):
statement = Statement("What is 100 + 54?")
statement = Statement('What is 100 + 54?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 + 54 ) = 154")
self.assertEqual(response.text, '( 100 + 54 ) = 154')

def test_subtraction_operator(self):
statement = Statement("What is 100 - 58?")
statement = Statement('What is 100 - 58?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 - 58 ) = 42")
self.assertEqual(response.text, '( 100 - 58 ) = 42')

def test_multiplication_operator(self):
statement = Statement("What is 100 * 20")
statement = Statement('What is 100 * 20')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 * 20 ) = 2000")
self.assertEqual(response.text, '( 100 * 20 ) = 2000')

def test_division_operator(self):
statement = Statement("What is 100 / 20")
statement = Statement('What is 100 / 20')
confidence, response = self.adapter.process(statement)

if self.python_version <= 2:
self.assertEqual(response.text, "( 100 / 20 ) = 5")
self.assertEqual(response.text, '( 100 / 20 ) = 5')
else:
self.assertEqual(response.text, "( 100 / 20 ) = 5.0")
self.assertEqual(response.text, '( 100 / 20 ) = 5.0')

def test_parenthesized_multiplication_and_addition(self):
statement = Statement("What is 100 + ( 1000 * 2 )?")
statement = Statement('What is 100 + ( 1000 * 2 )?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 + ( ( 1000 * ( 2 ) ) ) ) = 2100")
self.assertEqual(response.text, '( 100 + ( ( 1000 * ( 2 ) ) ) ) = 2100')

def test_parenthesized_with_words(self):
statement = Statement("What is four plus 100 + ( 100 * 2 )?")
statement = Statement('What is four plus 100 + ( 100 * 2 )?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 4 + ( 100 + ( ( 100 * ( 2 ) ) ) ) ) = 304")
self.assertEqual(response.text, '( 4 + ( 100 + ( ( 100 * ( 2 ) ) ) ) ) = 304')

def test_word_numbers_addition(self):
statement = Statement("What is one hundred + four hundred?")
statement = Statement('What is one hundred + four hundred?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( 100 + 400 ) = 500")
self.assertEqual(response.text, '( 100 + 400 ) = 500')

def test_word_division_operator(self):
statement = Statement("What is 100 divided by 100?")
statement = Statement('What is 100 divided by 100?')
confidence, response = self.adapter.process(statement)

if self.python_version <= 2:
self.assertEqual(response.text, "( 100 / 100 ) = 1")
self.assertEqual(response.text, '( 100 / 100 ) = 1')
else:
self.assertEqual(response.text, "( 100 / 100 ) = 1.0")
self.assertEqual(response.text, '( 100 / 100 ) = 1.0')

def test_large_word_division_operator(self):
statement = Statement("What is one thousand two hundred four divided by one hundred?")
statement = Statement('What is one thousand two hundred four divided by one hundred?')
confidence, response = self.adapter.process(statement)

if self.python_version <= 2:
self.assertEqual(response.text, "( 1000 + 200 + 4 ) / ( 100 ) = 12")
self.assertEqual(response.text, '( 1000 + 200 + 4 ) / ( 100 ) = 12')
else:
self.assertEqual(response.text, "( 1000 + 200 + 4 ) / ( 100 ) = 12.04")
self.assertEqual(response.text, '( 1000 + 200 + 4 ) / ( 100 ) = 12.04')

def test_negative_multiplication(self):
statement = Statement("What is -105 * 5")
statement = Statement('What is -105 * 5')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( -105 * 5 ) = -525")
self.assertEqual(response.text, '( -105 * 5 ) = -525')

def test_negative_decimal_multiplication(self):
statement = Statement("What is -100.5 * 20?")
statement = Statement('What is -100.5 * 20?')
confidence, response = self.adapter.process(statement)
self.assertEqual(response.text, "( -100.5 * 20 ) = -2010.0")
self.assertEqual(response.text, '( -100.5 * 20 ) = -2010.0')