From f5e30300c9114e1bc16d9f53b013eae7ba2afc21 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Tue, 2 Aug 2016 23:45:41 -0400 Subject: [PATCH] Correct unicode equality issue in python 2.7 --- chatterbot/adapters/logic/base_match.py | 1 + chatterbot/adapters/logic/closest_match.py | 12 ++++++++++-- chatterbot/conversation/statement.py | 3 +++ chatterbot/corpus/corpus.py | 1 - tests/conversation_tests/test_statements.py | 13 ++++++++++++- 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/chatterbot/adapters/logic/base_match.py b/chatterbot/adapters/logic/base_match.py index 763e71bfc..fe5edf159 100644 --- a/chatterbot/adapters/logic/base_match.py +++ b/chatterbot/adapters/logic/base_match.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from .logic_adapter import LogicAdapter from .mixins import TieBreaking diff --git a/chatterbot/adapters/logic/closest_match.py b/chatterbot/adapters/logic/closest_match.py index 07d87eaa9..79d2c1dda 100644 --- a/chatterbot/adapters/logic/closest_match.py +++ b/chatterbot/adapters/logic/closest_match.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- from .base_match import BaseMatchAdapter from fuzzywuzzy import process +import sys class ClosestMatchAdapter(BaseMatchAdapter): @@ -32,8 +34,14 @@ def get(self, input_statement): text_of_all_statements.append(statement.text) # Check if an exact match exists - if input_statement.text in text_of_all_statements: - return 1, input_statement + + # Decode unicode strings in python 2.x + if sys.version < '3': + if input_statement.text.decode('utf-8') in text_of_all_statements: + return 1, input_statement + else: + if input_statement.text in text_of_all_statements: + return 1, input_statement # Get the closest matching statement from the database closest_match, confidence = process.extract( diff --git a/chatterbot/conversation/statement.py b/chatterbot/conversation/statement.py index 0fb4d2f2a..249d31a44 100644 --- a/chatterbot/conversation/statement.py +++ b/chatterbot/conversation/statement.py @@ -1,3 +1,6 @@ +# -*- coding: utf-8 -*- + + class Statement(object): """ A statement represents a single spoken entity, sentence or diff --git a/chatterbot/corpus/corpus.py b/chatterbot/corpus/corpus.py index 7745ad427..5bebc0091 100644 --- a/chatterbot/corpus/corpus.py +++ b/chatterbot/corpus/corpus.py @@ -38,7 +38,6 @@ def load_corpus(self, dotted_path): """ Return the data contained within a specified corpus. """ - corpus_path = self.get_file_path(dotted_path) corpora = [] diff --git a/tests/conversation_tests/test_statements.py b/tests/conversation_tests/test_statements.py index 8ad22fb9a..9e8d0b515 100644 --- a/tests/conversation_tests/test_statements.py +++ b/tests/conversation_tests/test_statements.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from unittest import TestCase from chatterbot.conversation import Statement, Response @@ -7,7 +8,7 @@ class StatementTests(TestCase): def setUp(self): self.statement = Statement("A test statement.") - def test_equality(self): + def test_list_equality(self): """ It should be possible to check if a statement exists in the list of statements that another @@ -17,6 +18,16 @@ def test_equality(self): self.assertEqual(len(self.statement.in_response_to), 1) self.assertIn(Response("Yo"), self.statement.in_response_to) + def test_list_equality_unicode(self): + """ + Test that it is possible to check if a statement + is in a list of other statements when the + statements text is unicode. + """ + statements = [Statement("Hello"), Statement("我很好太感谢")] + statement = Statement("我很好太感谢") + self.assertIn(statement, statements) + def test_update_response_list_new(self): self.statement.add_response(Response("Hello")) self.assertTrue(len(self.statement.in_response_to), 1)