Skip to content

Commit

Permalink
Correct unicode equality issue.
Browse files Browse the repository at this point in the history
  • Loading branch information
gunthercox committed Aug 13, 2016
1 parent 2f7ea84 commit 95b332e
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 35 deletions.
1 change: 1 addition & 0 deletions chatterbot/adapters/logic/base_match.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from .logic_adapter import LogicAdapter
from .mixins import TieBreaking

Expand Down
32 changes: 17 additions & 15 deletions chatterbot/adapters/logic/closest_match.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
from .base_match import BaseMatchAdapter
from fuzzywuzzy import process
from fuzzywuzzy import fuzz


class ClosestMatchAdapter(BaseMatchAdapter):
Expand All @@ -26,25 +27,26 @@ def get(self, input_statement):
else:
raise self.EmptyDatasetException()

# Get the text of each statement
text_of_all_statements = []
confidence = -1
closest_match = input_statement

# Find the closest matching known statement
for statement in statement_list:
text_of_all_statements.append(statement.text)
ratio = fuzz.ratio(input_statement.text, statement.text)

# Check if an exact match exists
if input_statement.text in text_of_all_statements:
return 1, input_statement
if ratio > confidence:
confidence = ratio
closest_match = statement

# Get the closest matching statement from the database
closest_match, confidence = process.extract(
'''
closest_match, confidence = process.extractOne(
input_statement.text,
text_of_all_statements,
limit=1
)[0]
text_of_all_statements
)
'''

# Convert the confidence integer to a percent
confidence /= 100.0

return confidence, next(
(s for s in statement_list if s.text == closest_match), None
)
return confidence, closest_match

6 changes: 3 additions & 3 deletions chatterbot/chatterbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,14 @@ def get_response(self, input_item):
"""
input_statement = self.input.process_input(input_item)

# Select a response to the input statement
confidence, response = self.logic.process(input_statement)

existing_statement = self.storage.find(input_statement.text)

if existing_statement:
input_statement = existing_statement

# Select a response to the input statement
confidence, response = self.logic.process(input_statement)

previous_statement = self.get_last_response_statement()

if previous_statement:
Expand Down
1 change: 1 addition & 0 deletions chatterbot/conversation/statement.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from .response import Response


Expand Down
3 changes: 1 addition & 2 deletions chatterbot/corpus/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,14 @@ def load_corpus(self, dotted_path):
"""
Return the data contained within a specified corpus.
"""

corpus_path = self.get_file_path(dotted_path)

corpora = []

if os.path.isdir(corpus_path):
for dirname, dirnames, filenames in os.walk(corpus_path):
for datafile in filenames:
if datafile.endswith(".json"):
if datafile.endswith('.json'):

corpus = self.read_corpus(
os.path.join(dirname, datafile)
Expand Down
13 changes: 12 additions & 1 deletion tests/conversation_tests/test_statements.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from unittest import TestCase
from chatterbot.conversation import Statement, Response

Expand All @@ -7,7 +8,7 @@ class StatementTests(TestCase):
def setUp(self):
self.statement = Statement("A test statement.")

def test_equality(self):
def test_list_equality(self):
"""
It should be possible to check if a statement
exists in the list of statements that another
Expand All @@ -17,6 +18,16 @@ def test_equality(self):
self.assertEqual(len(self.statement.in_response_to), 1)
self.assertIn(Response("Yo"), self.statement.in_response_to)

def test_list_equality_unicode(self):
"""
Test that it is possible to check if a statement
is in a list of other statements when the
statements text is unicode.
"""
statements = [Statement("Hello"), Statement("我很好太感谢")]
statement = Statement("我很好太感谢")
self.assertIn(statement, statements)

def test_update_response_list_new(self):
self.statement.add_response(Response("Hello"))
self.assertTrue(len(self.statement.in_response_to), 1)
Expand Down
29 changes: 15 additions & 14 deletions tests/training_tests/test_list_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_database_has_correct_format(self):
# There should be a total of 9 statements in the database after training
self.assertEqual(self.chatbot.storage.count(), 9)

# The first statement should be in responce to another statement yet
# The first statement should be in response to another statement
self.assertEqual(
len(self.chatbot.storage.find(conversation[0]).in_response_to),
0
Expand All @@ -100,13 +100,13 @@ def test_training_with_unicode_characters(self):
to the database.
"""
conversation = [
u"¶ ∑ ∞ ∫ π ∈ ℝ² ∖ ⩆ ⩇ ⩈ ⩉ ⩊ ⩋ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⋒ ⋓",
u"⊂ ⊃ ⊆ ⊇ ⊈ ⊉ ⊊ ⊋ ⊄ ⊅ ⫅ ⫆ ⫋ ⫌ ⫃ ⫄ ⫇ ⫈ ⫉ ⫊ ⟃ ⟄",
u"∠ ∡ ⦛ ⦞ ⦟ ⦢ ⦣ ⦤ ⦥ ⦦ ⦧ ⦨ ⦩ ⦪ ⦫ ⦬ ⦭ ⦮ ⦯ ⦓ ⦔ ⦕ ⦖ ⟀",
u"∫ ∬ ∭ ∮ ∯ ∰ ∱ ∲ ∳ ⨋ ⨌ ⨍ ⨎ ⨏ ⨐ ⨑ ⨒ ⨓ ⨔ ⨕ ⨖ ⨗ ⨘ ⨙ ⨚ ⨛ ⨜",
u"≁ ≂ ≃ ≄ ⋍ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ⩯ ⩰ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖",
u"¬ ⫬ ⫭ ⊨ ⊭ ∀ ∁ ∃ ∄ ∴ ∵ ⊦ ⊬ ⊧ ⊩ ⊮ ⊫ ⊯ ⊪ ⊰ ⊱ ⫗ ⫘",
u"∧ ∨ ⊻ ⊼ ⊽ ⋎ ⋏ ⟑ ⟇ ⩑ ⩒ ⩓ ⩔ ⩕ ⩖ ⩗ ⩘ ⩙ ⩚ ⩛ ⩜ ⩝ ⩞ ⩟ ⩠ ⩢",
u'¶ ∑ ∞ ∫ π ∈ ℝ² ∖ ⩆ ⩇ ⩈ ⩉ ⩊ ⩋ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⋒ ⋓',
u'⊂ ⊃ ⊆ ⊇ ⊈ ⊉ ⊊ ⊋ ⊄ ⊅ ⫅ ⫆ ⫋ ⫌ ⫃ ⫄ ⫇ ⫈ ⫉ ⫊ ⟃ ⟄',
u'∠ ∡ ⦛ ⦞ ⦟ ⦢ ⦣ ⦤ ⦥ ⦦ ⦧ ⦨ ⦩ ⦪ ⦫ ⦬ ⦭ ⦮ ⦯ ⦓ ⦔ ⦕ ⦖ ⟀',
u'∫ ∬ ∭ ∮ ∯ ∰ ∱ ∲ ∳ ⨋ ⨌ ⨍ ⨎ ⨏ ⨐ ⨑ ⨒ ⨓ ⨔ ⨕ ⨖ ⨗ ⨘ ⨙ ⨚ ⨛ ⨜',
u'≁ ≂ ≃ ≄ ⋍ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ⩯ ⩰ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖',
u'¬ ⫬ ⫭ ⊨ ⊭ ∀ ∁ ∃ ∄ ∴ ∵ ⊦ ⊬ ⊧ ⊩ ⊮ ⊫ ⊯ ⊪ ⊰ ⊱ ⫗ ⫘',
u'∧ ∨ ⊻ ⊼ ⊽ ⋎ ⋏ ⟑ ⟇ ⩑ ⩒ ⩓ ⩔ ⩕ ⩖ ⩗ ⩘ ⩙ ⩚ ⩛ ⩜ ⩝ ⩞ ⩟ ⩠ ⩢',
]

self.chatbot.train(conversation)
Expand All @@ -117,8 +117,9 @@ def test_training_with_unicode_characters(self):

def test_similar_sentence_gets_same_response_multiple_times(self):
"""
Tests if the bot returns the same response for the same question (which
is similar to the one present in the training set) when asked repeatedly.
Tests if the bot returns the same response for the same
question (which is similar to the one present in the training set)
when asked repeatedly.
"""
training = [
'how do you login to gmail?',
Expand All @@ -130,11 +131,11 @@ def test_similar_sentence_gets_same_response_multiple_times(self):
self.chatbot.train(training)

response_to_trained_set = self.chatbot.get_response('how do you login to gmail?')
response_to_similar_question_1 = self.chatbot.get_response(similar_question)
response_to_similar_question_2 = self.chatbot.get_response(similar_question)
response1 = self.chatbot.get_response(similar_question)
response2 = self.chatbot.get_response(similar_question)

self.assertEqual(response_to_trained_set, response_to_similar_question_1)
self.assertEqual(response_to_similar_question_1, response_to_similar_question_2)
self.assertEqual(response_to_trained_set, response1)
self.assertEqual(response1, response2)


class ChatterBotResponseTests(ChatBotTestCase):
Expand Down

0 comments on commit 95b332e

Please sign in to comment.