Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to export database as ChatterBot training corpus #239

Merged
merged 4 commits into from
Aug 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions chatterbot/chatterbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .adapters.input import InputAdapter
from .adapters.output import OutputAdapter
from .conversation import Statement, Response
from .trainers import Trainer
from .utils.queues import ResponseQueue
from .utils.module_loading import import_module

Expand Down Expand Up @@ -66,7 +67,7 @@ def __init__(self, name, **kwargs):
self.input.set_context(self)
self.output.set_context(self)

self.trainer = None
self.trainer = Trainer(self.storage)

def add_adapter(self, adapter, **kwargs):
self.validate_adapter_class(adapter, LogicAdapter)
Expand Down Expand Up @@ -165,8 +166,6 @@ def set_trainer(self, training_class, **kwargs):

@property
def train(self):
if not self.trainer:
raise self.TrainerInitializationException()
# Proxy method to the trainer
return self.trainer.train

Expand All @@ -177,11 +176,3 @@ def __init__(self, value='Recieved an unexpected adapter setting.'):

def __str__(self):
return repr(self.value)

class TrainerInitializationException(Exception):

def __init__(self, value='The `set_trainer` method must be called before calling `train`.'):
self.value = value

def __str__(self):
return repr(self.value)
31 changes: 29 additions & 2 deletions chatterbot/trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,35 @@ def __init__(self, storage, **kwargs):
self.storage = storage
self.corpus = Corpus()

def train(self):
pass
def train(self, *args, **kwargs):
raise self.TrainerInitializationException()

class TrainerInitializationException(Exception):

def __init__(self, value='A training class bust be set using the `set_trainer` method before calling `train()`.'):
self.value = value

def __str__(self):
return repr(self.value)

def _generate_export_data(self):
result = []

for statement in self.storage.filter():
for response in statement.in_response_to:
result.append([response.text, statement.text])

return result

def export_for_training(self, file_path='./export.json'):
"""
Create a file from the database that can be used to
train other chat bots.
"""
from jsondb.db import Database
database = Database(file_path)
export = {'export': self._generate_export_data()}
database.data(dictionary=export)


class ListTrainer(Trainer):
Expand Down
16 changes: 16 additions & 0 deletions docs/training.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,22 @@ To explore what languages and sets of corpora are available, check out the `chat

If you are interested in contributing a new language corpus, or adding a module to an existing language, please create a pull request. Contributions are welcomed!

Exporting your chat bot's database as a training corpus
=======================================================

Now that you have created your chat bot and sent it out into the world, perhaps
you are looking for a way to share what it has learned with other chat bots?
ChatterBot's training module provides methods that allow you to export the
content of your chat bot's database as a training corpus that can be used to
train other chat bots.

Here is an example:

.. code-block:: python

chatbot = ChatBot("Export Example Bot")
chatbot.trainer.export_for_training('./export.json')

.. glossary::

corpus
Expand Down
16 changes: 16 additions & 0 deletions examples/export_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from chatterbot import ChatBot
from chatterbot.trainers import ChatterBotCorpusTrainer

'''
This is an example showing how to create an export file from
an existing chat bot that can then be used to train other bots.
'''

chatbot = ChatBot("Export Example Bot")

# First, lets train our bot with some data
chatbot.set_trainer(ChatterBotCorpusTrainer)
chatbot.train("chatterbot.corpus.english")

# Now we can export the data to a file
chatbot.trainer.export_for_training('./myfile.json')
20 changes: 20 additions & 0 deletions tests/training_tests/test_database_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from tests.base_case import ChatBotTestCase
from chatterbot.trainers import ListTrainer


class DatabaseExportTests(ChatBotTestCase):

def setUp(self):
super(DatabaseExportTests, self).setUp()
self.chatbot.set_trainer(ListTrainer)

def test_generate_export_data(self):
self.chatbot.trainer.train([
'Hello, how are you?',
'I am good.'
])
data = self.chatbot.trainer._generate_export_data()

self.assertEqual(
[['Hello, how are you?', 'I am good.']], data
)
3 changes: 1 addition & 2 deletions tests/training_tests/test_training.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from tests.base_case import ChatBotTestCase
from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer


class TrainingTests(ChatBotTestCase):

def test_trainer_not_set(self):
with self.assertRaises(ChatBot.TrainerInitializationException):
with self.assertRaises(ListTrainer.TrainerInitializationException):
self.chatbot.train()