-
Notifications
You must be signed in to change notification settings - Fork 4.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add evaluate_mathematically #80
Changes from 13 commits
08ede07
0bd770b
05450e0
f0c511c
71a03d9
9c4f552
e469f4f
e1d9219
d77669a
fee0369
88e273f
e59570e
8e49176
366ebaf
f3ce77b
1fef0ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .plugin import PluginAdapter | ||
from .evaluate_mathematically import EvaluateMathematically | ||
from .plugin_chooser import PluginChooser |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
{ | ||
"numbers" : { | ||
"one" : 1, | ||
"two" : 2, | ||
"three" : 3, | ||
"four" : 4, | ||
"five" : 5, | ||
"six" : 6, | ||
"seven" : 7, | ||
"eight" : 8, | ||
"nine" : 9, | ||
"ten" : 10, | ||
"eleven" : 11, | ||
"twelve" : 12, | ||
"thirteen" : 13, | ||
"fourteen" : 14, | ||
"fifteen" : 15, | ||
"sixteen" : 16, | ||
"seventeen" : 17, | ||
"eighteen" : 18, | ||
"nineteen" : 19, | ||
"twenty" : 20, | ||
"thirty" : 30, | ||
"forty" : 40, | ||
"fifty" : 50, | ||
"sixty" : 60, | ||
"seventy" : 70, | ||
"eighty" : 80, | ||
"ninety" : 90 | ||
}, | ||
"words" : { | ||
"plus" : "+", | ||
"divided by" : "/", | ||
"minus" : "-", | ||
"times" : "*", | ||
"squared" : "^ 2", | ||
"to the power of" : "^" | ||
}, | ||
"scales" : { | ||
"hundred" : "* 100", | ||
"thousand" : "* 1000", | ||
"million" : "* 1000000", | ||
"billion" : "* 1000000000", | ||
"trillion" : "* 1000000000000" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
from .plugin import PluginAdapter | ||
import re | ||
import os, json | ||
import decimal | ||
|
||
class EvaluateMathematically(PluginAdapter): | ||
|
||
def should_answer(self, input_text): | ||
""" | ||
Determines whether it is appropriate for this plugin | ||
to respond to the user input. | ||
""" | ||
|
||
response = self.process( input_text ) | ||
|
||
if response is False: | ||
return False | ||
else: | ||
return True | ||
|
||
|
||
def process(self, input_text): | ||
""" | ||
Takes a statement string. | ||
Returns the simplified statement string | ||
with the mathematical terms "solved". | ||
""" | ||
|
||
# Getting the mathematical terms within the input statement | ||
expression = self.simplify_chunks( self.normalize( input_text ) ) | ||
|
||
# Returning important information | ||
try: | ||
expression += '= ' + str( eval( expression ) ) | ||
|
||
return expression | ||
except: | ||
return False | ||
|
||
|
||
def simplify_chunks(self, input_text): | ||
""" | ||
Separates the incoming text. | ||
""" | ||
|
||
string = '' | ||
|
||
for chunk in input_text.split( ' ' ): | ||
|
||
is_chunk_integer = self.is_integer( chunk ) | ||
|
||
if is_chunk_integer is False: | ||
is_chunk_float = self.is_float( chunk ) | ||
|
||
if is_chunk_float is False: | ||
is_chunk_operator = self.is_operator( chunk ) | ||
|
||
if is_chunk_operator is False: | ||
continue | ||
else: | ||
string += str( is_chunk_operator ) + ' ' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be simplified to:
|
||
else: | ||
string += str( is_chunk_float ) + ' ' | ||
else: | ||
string += str( is_chunk_integer ) + ' ' | ||
|
||
return string | ||
|
||
|
||
def is_float(self, string): | ||
""" | ||
If the string is a float, returns | ||
the float of the string. Otherwise, | ||
it returns False. | ||
""" | ||
|
||
try: | ||
return decimal.Decimal(string) | ||
except decimal.DecimalException: | ||
return False | ||
|
||
|
||
def is_integer(self, string): | ||
""" | ||
If the string is an integer, returns | ||
the int of the string. Otherwise, | ||
it returns False. | ||
""" | ||
|
||
if string.isdigit(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this handle negative integers? If not, it should be possible to detect them like this: string.lstrip('-').isdigit() There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I have made sure that it will handle negative integers without any issues There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are lacking tests for negative numbers, which is why I'm also curious about how they are being handled.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added tests in my next commit. They were previously being treated as floats, which I have now changed by replacing the is_integer() code with:
|
||
return int( string ) | ||
else: | ||
return False | ||
|
||
|
||
def is_operator(self, string): | ||
""" | ||
If the string is an operator, returns | ||
said operator. Otherwise, it returns | ||
false. | ||
""" | ||
|
||
if string in "+-/*^\(\)": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any reason why you appear to be escaping the parentheses There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I am not sure why I did that. I have fixed it. |
||
return string | ||
else: | ||
return False | ||
|
||
|
||
def normalize(self, string): | ||
""" | ||
Normalizes input text, reducing errors | ||
and improper calculations. | ||
""" | ||
|
||
# Setting all words to lowercase | ||
string = string.lower() | ||
|
||
# Removing punctuation | ||
if string.endswith( ('.', '!', '?', ':', ';' ) ): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If might be a good idea to handle any non-alphanumeric character rather than just this specific set. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is a good idea & I have updated the code in the latest commit. |
||
string = string[ : len(string) - 1 ] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can get everything but the last character with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @DarkmatterVale I agree with kevin on this. |
||
|
||
# Removing words | ||
string = self.substitute_words( string ) | ||
|
||
# Returning normalized text | ||
return string | ||
|
||
def load_data( self, language ): | ||
""" | ||
Load language-specific data | ||
""" | ||
|
||
if language == "english": | ||
with open(os.path.join(os.path.dirname(__file__), 'data', "math_words_EN.json")) as data_file: | ||
data = json.load(data_file) | ||
self.data = data | ||
|
||
|
||
def substitute_words(self, string): | ||
""" | ||
Substitutes numbers for words. | ||
""" | ||
|
||
self.load_data( "english" ) | ||
|
||
condensed_string = '_'.join( string.split( ' ' ) ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm reading on through this code, and I can't figure out the reason for handling all spaces as underscores. My guess would be that it has something to do with the use of regular expressions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I generate numbers out of words, I create spaces. This makes it really difficult to attempt to separate the code based on spaces because I have just created additional spaces... I chose the underscore character because it is not commonly used in English. Occasionally it will be used, but not in most conversations. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a note that might be useful here, calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok; that is a much better way to remove whitespace. I have fixed it and am waiting to push the change to see if you guys have any other comments...I don't like making messy git histories There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, I will run through again and do another sweep. I might not get a chance to do it until later this week. I'll let you know if I find anything. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you! I will also do a check to see if I can find any more errors. |
||
|
||
for word in self.data[ "words" ]: | ||
condensed_string = re.sub( '_'.join( word.split( ' ' ) ), self.data[ "words" ][ word ], condensed_string ) | ||
|
||
for number in self.data[ "numbers" ]: | ||
condensed_string = re.sub( number, str( self.data[ "numbers" ][ number ] ), condensed_string ) | ||
|
||
for scale in self.data[ "scales" ]: | ||
condensed_string = re.sub( "_" + scale, " " + self.data[ "scales" ][ scale ], condensed_string) | ||
|
||
condensed_string = condensed_string.split( '_' ) | ||
for chunk_index in range( 0, len( condensed_string ) ): | ||
value = "" | ||
|
||
try: | ||
value = str( eval( condensed_string[ chunk_index ] ) ) | ||
|
||
condensed_string[ chunk_index ] = value | ||
except: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What exceptions might happen here that you are trying to catch? It's generally recommended to avoid Pokemon exception handling, especially when you know what errors should be raised. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am catching all exceptions that rise from attempting to evaluate a non-numerical statement. Because I go through all chunks (including those that are not math expressions) I need to catch all exceptions that might be thrown. |
||
pass | ||
|
||
for chunk_index in range( 0, len( condensed_string ) ): | ||
if self.is_integer( condensed_string[ chunk_index ] ) or self.is_float( condensed_string[ chunk_index ] ): | ||
i = 1 | ||
start_index = chunk_index | ||
end_index = -1 | ||
while( chunk_index + i < len( condensed_string ) and ( self.is_integer( condensed_string[ chunk_index + i ] ) or self.is_float( condensed_string[ chunk_index + i ] ) ) ): | ||
end_index = chunk_index + i | ||
i += 1 | ||
|
||
for sub_chunk in range( start_index, end_index ): | ||
condensed_string[ sub_chunk ] += " +" | ||
|
||
condensed_string[ start_index ] = "( " + condensed_string[ start_index ] | ||
condensed_string[ end_index ] += " )" | ||
|
||
return ' '.join( condensed_string ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from chatterbot.adapters.exceptions import AdapterNotImplementedError | ||
|
||
|
||
class PluginAdapter(object): | ||
""" | ||
This is an abstract class that represents the interface | ||
that all plugins should implement. | ||
""" | ||
|
||
def __init__(self, **kwargs): | ||
pass | ||
|
||
def process(self, text): | ||
raise AdapterNotImplementedError() | ||
|
||
def should_answer(self, text): | ||
raise AdapterNotImplementedError() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from .evaluate_mathematically import EvaluateMathematically | ||
|
||
class PluginChooser(): | ||
|
||
def __init__( self, **kwargs ): | ||
""" | ||
Initializes all plugins & initial variables. | ||
""" | ||
|
||
self.plugins = [ | ||
EvaluateMathematically(**kwargs) | ||
] | ||
|
||
|
||
def choose( self, input_statement ): | ||
""" | ||
Used to determine whether a plugin should be used | ||
to "answer" or reply to the user input. | ||
""" | ||
|
||
# Testing each plugin to determine whether it should be used to answer user input | ||
for plugin in self.plugins: | ||
# If it should, get the response and return that | ||
if plugin.should_answer( input_statement.text ): | ||
return plugin.process( input_statement.text ) | ||
|
||
# Otherwise, return that no plugin was found that should respond | ||
return False |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,4 +70,3 @@ def setUp(self): | |
self.chatbot.train(data1) | ||
self.chatbot.train(data2) | ||
self.chatbot.train(data3) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should I use
input_text.split()
instead ofinput_text.split( ' ' )
? That way I would be separating the words based on all whitespace characters not just spaces.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yup,
input_text.split()
is the way to go here 👍