Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add evaluate_mathematically #80

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions chatterbot/adapters/plugins/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .plugin import PluginAdapter
from .evaluate_mathematically import EvaluateMathematically
from .plugin_chooser import PluginChooser
46 changes: 46 additions & 0 deletions chatterbot/adapters/plugins/data/math_words_EN.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"numbers" : {
"one" : 1,
"two" : 2,
"three" : 3,
"four" : 4,
"five" : 5,
"six" : 6,
"seven" : 7,
"eight" : 8,
"nine" : 9,
"ten" : 10,
"eleven" : 11,
"twelve" : 12,
"thirteen" : 13,
"fourteen" : 14,
"fifteen" : 15,
"sixteen" : 16,
"seventeen" : 17,
"eighteen" : 18,
"nineteen" : 19,
"twenty" : 20,
"thirty" : 30,
"forty" : 40,
"fifty" : 50,
"sixty" : 60,
"seventy" : 70,
"eighty" : 80,
"ninety" : 90
},
"words" : {
"plus" : "+",
"divided by" : "/",
"minus" : "-",
"times" : "*",
"squared" : "^ 2",
"to the power of" : "^"
},
"scales" : {
"hundred" : "* 100",
"thousand" : "* 1000",
"million" : "* 1000000",
"billion" : "* 1000000000",
"trillion" : "* 1000000000000"
}
}
183 changes: 183 additions & 0 deletions chatterbot/adapters/plugins/evaluate_mathematically.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from .plugin import PluginAdapter
import re
import os, json
import decimal

class EvaluateMathematically(PluginAdapter):

def should_answer(self, input_text):
"""
Determines whether it is appropriate for this plugin
to respond to the user input.
"""

response = self.process( input_text )

if response is False:
return False
else:
return True


def process(self, input_text):
"""
Takes a statement string.
Returns the simplified statement string
with the mathematical terms "solved".
"""

# Getting the mathematical terms within the input statement
expression = self.simplify_chunks( self.normalize( input_text ) )

# Returning important information
try:
expression += '= ' + str( eval( expression ) )

return expression
except:
return False


def simplify_chunks(self, input_text):
"""
Separates the incoming text.
"""

string = ''

for chunk in input_text.split( ' ' ):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I use input_text.split() instead of input_text.split( ' ' )? That way I would be separating the words based on all whitespace characters not just spaces.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, input_text.split() is the way to go here 👍


is_chunk_integer = self.is_integer( chunk )

if is_chunk_integer is False:
is_chunk_float = self.is_float( chunk )

if is_chunk_float is False:
is_chunk_operator = self.is_operator( chunk )

if is_chunk_operator is False:
continue
else:
string += str( is_chunk_operator ) + ' '
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be simplified to:

if not is_chunk_operator is False:
     string += str( is_chunk_operator ) + ' '

else:
string += str( is_chunk_float ) + ' '
else:
string += str( is_chunk_integer ) + ' '

return string


def is_float(self, string):
"""
If the string is a float, returns
the float of the string. Otherwise,
it returns False.
"""

try:
return decimal.Decimal(string)
except decimal.DecimalException:
return False


def is_integer(self, string):
"""
If the string is an integer, returns
the int of the string. Otherwise,
it returns False.
"""

if string.isdigit():
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this handle negative integers?

If not, it should be possible to detect them like this:

string.lstrip('-').isdigit()

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I have made sure that it will handle negative integers without any issues

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are lacking tests for negative numbers, which is why I'm also curious about how they are being handled.

isdigit will return False if there are any non-digits (including decimals and the negative sign) in the string.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added tests in my next commit. They were previously being treated as floats, which I have now changed by replacing the is_integer() code with:

try:
            return int( string )
except:
            return False

return int( string )
else:
return False


def is_operator(self, string):
"""
If the string is an operator, returns
said operator. Otherwise, it returns
false.
"""

if string in "+-/*^\(\)":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason why you appear to be escaping the parentheses () in this string? These shouldn't need to be escaped, and right now it's causing \ to be detected as a valid operator.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I am not sure why I did that. I have fixed it.

return string
else:
return False


def normalize(self, string):
"""
Normalizes input text, reducing errors
and improper calculations.
"""

# Setting all words to lowercase
string = string.lower()

# Removing punctuation
if string.endswith( ('.', '!', '?', ':', ';' ) ):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If might be a good idea to handle any non-alphanumeric character rather than just this specific set.
if not string[-1].isalnum(): would check if the last character in the string is not an alphanumeric character.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a good idea & I have updated the code in the latest commit.

string = string[ : len(string) - 1 ]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can get everything but the last character with string[:-1] (or string[0:-1] if you want to be slightly more explicit).

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DarkmatterVale I agree with kevin on this.


# Removing words
string = self.substitute_words( string )

# Returning normalized text
return string

def load_data( self, language ):
"""
Load language-specific data
"""

if language == "english":
with open(os.path.join(os.path.dirname(__file__), 'data', "math_words_EN.json")) as data_file:
data = json.load(data_file)
self.data = data


def substitute_words(self, string):
"""
Substitutes numbers for words.
"""

self.load_data( "english" )

condensed_string = '_'.join( string.split( ' ' ) )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm reading on through this code, and I can't figure out the reason for handling all spaces as underscores.

My guess would be that it has something to do with the use of regular expressions.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I generate numbers out of words, I create spaces. This makes it really difficult to attempt to separate the code based on spaces because I have just created additional spaces...

I chose the underscore character because it is not commonly used in English. Occasionally it will be used, but not in most conversations.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note that might be useful here, calling string.split(' ') will split the string at every space character. It might be a good idea to use string.split() instead. If no character is passed in, the split method will split the string for any number of white space characters (including multiple consecutive spaces as well as newline and tabs which might just so happen to fall into the input string).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok; that is a much better way to remove whitespace. I have fixed it and am waiting to push the change to see if you guys have any other comments...I don't like making messy git histories

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I will run through again and do another sweep. I might not get a chance to do it until later this week. I'll let you know if I find anything.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you!

I will also do a check to see if I can find any more errors.


for word in self.data[ "words" ]:
condensed_string = re.sub( '_'.join( word.split( ' ' ) ), self.data[ "words" ][ word ], condensed_string )

for number in self.data[ "numbers" ]:
condensed_string = re.sub( number, str( self.data[ "numbers" ][ number ] ), condensed_string )

for scale in self.data[ "scales" ]:
condensed_string = re.sub( "_" + scale, " " + self.data[ "scales" ][ scale ], condensed_string)

condensed_string = condensed_string.split( '_' )
for chunk_index in range( 0, len( condensed_string ) ):
value = ""

try:
value = str( eval( condensed_string[ chunk_index ] ) )

condensed_string[ chunk_index ] = value
except:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What exceptions might happen here that you are trying to catch?

It's generally recommended to avoid Pokemon exception handling, especially when you know what errors should be raised.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am catching all exceptions that rise from attempting to evaluate a non-numerical statement. Because I go through all chunks (including those that are not math expressions) I need to catch all exceptions that might be thrown.

pass

for chunk_index in range( 0, len( condensed_string ) ):
if self.is_integer( condensed_string[ chunk_index ] ) or self.is_float( condensed_string[ chunk_index ] ):
i = 1
start_index = chunk_index
end_index = -1
while( chunk_index + i < len( condensed_string ) and ( self.is_integer( condensed_string[ chunk_index + i ] ) or self.is_float( condensed_string[ chunk_index + i ] ) ) ):
end_index = chunk_index + i
i += 1

for sub_chunk in range( start_index, end_index ):
condensed_string[ sub_chunk ] += " +"

condensed_string[ start_index ] = "( " + condensed_string[ start_index ]
condensed_string[ end_index ] += " )"

return ' '.join( condensed_string )
17 changes: 17 additions & 0 deletions chatterbot/adapters/plugins/plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from chatterbot.adapters.exceptions import AdapterNotImplementedError


class PluginAdapter(object):
"""
This is an abstract class that represents the interface
that all plugins should implement.
"""

def __init__(self, **kwargs):
pass

def process(self, text):
raise AdapterNotImplementedError()

def should_answer(self, text):
raise AdapterNotImplementedError()
28 changes: 28 additions & 0 deletions chatterbot/adapters/plugins/plugin_chooser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from .evaluate_mathematically import EvaluateMathematically

class PluginChooser():

def __init__( self, **kwargs ):
"""
Initializes all plugins & initial variables.
"""

self.plugins = [
EvaluateMathematically(**kwargs)
]


def choose( self, input_statement ):
"""
Used to determine whether a plugin should be used
to "answer" or reply to the user input.
"""

# Testing each plugin to determine whether it should be used to answer user input
for plugin in self.plugins:
# If it should, get the response and return that
if plugin.should_answer( input_statement.text ):
return plugin.process( input_statement.text )

# Otherwise, return that no plugin was found that should respond
return False
10 changes: 9 additions & 1 deletion chatterbot/chatterbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def __init__(self, name, **kwargs):
"chatterbot.adapters.io.TerminalAdapter"
)

PluginChooser = import_module("chatterbot.adapters.plugins.PluginChooser")
self.plugin_chooser = PluginChooser(**kwargs)

StorageAdapter = import_module(storage_adapter)
self.storage = StorageAdapter(**kwargs)

Expand Down Expand Up @@ -80,6 +83,12 @@ def get_response(self, input_text):
"""
input_statement = Statement(input_text)

# Applying plugin logic to see whether the chatbot should respond in this way
plugin_response = self.plugin_chooser.choose( input_statement )

if not plugin_response is False:
return plugin_response

# If no responses exist, return the input statement
if not self.storage.count():
self.storage.update(input_statement)
Expand Down Expand Up @@ -152,4 +161,3 @@ def train(self, conversation=None, *args, **kwargs):
self.trainer.train_from_corpora(corpora)
else:
self.trainer.train_from_list(conversation)

1 change: 0 additions & 1 deletion tests/base_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,3 @@ def setUp(self):
self.chatbot.train(data1)
self.chatbot.train(data2)
self.chatbot.train(data3)

1 change: 0 additions & 1 deletion tests/logic_adapter_tests/test_closest_meaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,3 @@ def test_get_closest_statement(self):
close = self.adapter.get(statement, possible_choices)

self.assertEqual("This is a lovely bog.", close)

18 changes: 17 additions & 1 deletion tests/test_chatbot_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,23 @@ def test_second_response_format(self):
self.assertEqual(len(statement_object.in_response_to), 1)
self.assertIn("Hi", statement_object.in_response_to)

def test_evaluate_mathematically(self):
self.chatbot.storage.update(self.test_statement)

response = self.chatbot.get_response("What is 100 + 54?")
second_response = self.chatbot.get_response("What is 100 * 20")
third_response = self.chatbot.get_response("What is 100 + ( 1000 * 2 )?")
fourth_response = self.chatbot.get_response("What is four plus 100 + ( 100 * 2 )?")
fifth_response = self.chatbot.get_response("What is one hundred + four hundred?")
sixth_response = self.chatbot.get_response("What is 100 divided by 100?")
seventh_response = self.chatbot.get_response("What is one thousand two hundred four divided by one hundred?")

self.assertEqual(response, "( 100 + 54 ) = 154")
self.assertEqual(second_response, "( 100 * 20 ) = 2000")
self.assertEqual(third_response, "( 100 + ( ( 1000 * ( 2 ) ) ) ) = 2100")
self.assertEqual(fourth_response, "( 4 + ( 100 + ( ( 100 * ( 2 ) ) ) ) ) = 304")
self.assertEqual(fifth_response, "( 100 + 400 ) = 500")


class ChatterBotStorageIntegrationTests(UntrainedChatBotTestCase):

Expand Down Expand Up @@ -207,4 +224,3 @@ def test_database_is_not_updated_when_read_only(self):

self.assertFalse(exists_before)
self.assertFalse(exists_after)