diff --git a/.gitignore b/.gitignore index 7fb4febf38f9..9ea6de6fb31f 100644 --- a/.gitignore +++ b/.gitignore @@ -80,7 +80,11 @@ failed_stories.md errors.json pip-wheel-metadata/* events.db +events.db-shm +events.db-wal rasa.db +rasa.db-shm +rasa.db-wal *.swp *.coverage* env diff --git a/changelog/4088.feature.rst b/changelog/4088.feature.rst new file mode 100644 index 000000000000..654e3b72c23e --- /dev/null +++ b/changelog/4088.feature.rst @@ -0,0 +1 @@ +Add story structure validation functionality (e.g. `rasa data validate stories --max-history 5`). diff --git a/changelog/5174.doc.rst b/changelog/5174.doc.rst new file mode 100644 index 000000000000..020c4deed905 --- /dev/null +++ b/changelog/5174.doc.rst @@ -0,0 +1 @@ +Updated the documentation to properly suggest not to explicitly add utterance actions to the domain. diff --git a/changelog/5189.doc.rst b/changelog/5189.doc.rst new file mode 100644 index 000000000000..4d7f55483574 --- /dev/null +++ b/changelog/5189.doc.rst @@ -0,0 +1 @@ +Added user guide for reminders and external events, including ``reminderbot`` demo. \ No newline at end of file diff --git a/changelog/5189.misc.rst b/changelog/5189.misc.rst new file mode 100644 index 000000000000..3f4edbdd1d5f --- /dev/null +++ b/changelog/5189.misc.rst @@ -0,0 +1 @@ +Added `db-shm` and `db-wal` files to `.gitignore` \ No newline at end of file diff --git a/changelog/5201.bugfix.rst b/changelog/5201.bugfix.rst new file mode 100644 index 000000000000..84a22ceb8d5d --- /dev/null +++ b/changelog/5201.bugfix.rst @@ -0,0 +1,6 @@ +Fixed incorrectly raised Error encountered in pipelines with a ``ResponseSelector`` and NLG. + +When NLU training data is split before NLU pipeline comparison, +NLG responses were not also persisted and therefore training for a pipeline including the ``ResponseSelector`` would fail. + +NLG responses are now persisted along with NLU data to a ``/train`` directory in the ``run_x/xx%_exclusion`` folder. \ No newline at end of file diff --git a/changelog/5292.enhancement.rst b/changelog/5292.enhancement.rst new file mode 100644 index 000000000000..943a78a93ae6 --- /dev/null +++ b/changelog/5292.enhancement.rst @@ -0,0 +1 @@ +Added ``followlinks=True`` to os.walk calls, to allow the use of symlinks in training, NLU and domain data. diff --git a/changelog/5317.improvement.rst b/changelog/5317.improvement.rst new file mode 100644 index 000000000000..e685c340cd4d --- /dev/null +++ b/changelog/5317.improvement.rst @@ -0,0 +1,10 @@ +Events exported using ``rasa export`` receive a message header if published through a +``PikaEventBroker``. The header is added to the message's ``BasicProperties.headers`` +under the ``rasa-export-process-id`` key +(``rasa.core.constants.RASA_EXPORT_PROCESS_ID_HEADER_NAME``). The value is a +UUID4 generated at each call of ``rasa export``. The resulting header is a key-value +pair that looks as follows: + +.. code-block:: text + + 'rasa-export-process-id': 'd3b3d3ffe2bd4f379ccf21214ccfb261' diff --git a/data/test_domains/default.yml b/data/test_domains/default.yml index 3f822882f74f..82da02d3b117 100644 --- a/data/test_domains/default.yml +++ b/data/test_domains/default.yml @@ -19,8 +19,3 @@ responses: - text: goodbye :( utter_default: - text: default message - -actions: - - utter_default - - utter_greet - - utter_goodbye diff --git a/data/test_domains/default_unfeaturized_entities.yml b/data/test_domains/default_unfeaturized_entities.yml index e71c411901e7..2d0a4672efbd 100644 --- a/data/test_domains/default_unfeaturized_entities.yml +++ b/data/test_domains/default_unfeaturized_entities.yml @@ -19,8 +19,3 @@ responses: - goodbye :( utter_default: - default message - -actions: - - utter_default - - utter_greet - - utter_goodbye diff --git a/data/test_domains/default_with_mapping.yml b/data/test_domains/default_with_mapping.yml index 8a8bfd89bfcd..df408b123a6c 100644 --- a/data/test_domains/default_with_mapping.yml +++ b/data/test_domains/default_with_mapping.yml @@ -21,8 +21,3 @@ responses: - text: goodbye :( utter_default: - text: default message - -actions: - - utter_default - - utter_greet - - utter_goodbye diff --git a/data/test_domains/default_with_slots.yml b/data/test_domains/default_with_slots.yml index c6b5bd9a8a48..3083ae158c06 100644 --- a/data/test_domains/default_with_slots.yml +++ b/data/test_domains/default_with_slots.yml @@ -30,8 +30,3 @@ responses: - text: "bye bye 😢" utter_default: # utterance sent by action_default_fallback - text: "sorry, I didn't get that, can you rephrase it?" - -actions: - - utter_default - - utter_greet - - utter_goodbye diff --git a/data/test_domains/duplicate_entities.yml b/data/test_domains/duplicate_entities.yml index 75bca1331a9f..2a5e482ba7cd 100644 --- a/data/test_domains/duplicate_entities.yml +++ b/data/test_domains/duplicate_entities.yml @@ -23,7 +23,3 @@ responses: utter_default: - text: default message -actions: - - utter_default - - utter_greet - - utter_goodbye diff --git a/data/test_domains/duplicate_intents.yml b/data/test_domains/duplicate_intents.yml index 4a53198ec3ad..4341f589a3b5 100644 --- a/data/test_domains/duplicate_intents.yml +++ b/data/test_domains/duplicate_intents.yml @@ -22,11 +22,6 @@ responses: utter_default: - text: default message -actions: - - utter_default - - utter_greet - - utter_goodbye - session_config: session_expiration_time: 60 carry_over_slots_to_new_session: true diff --git a/data/test_domains/duplicate_templates.yml b/data/test_domains/duplicate_templates.yml index 7c104888c004..3d0fbe10cb5b 100644 --- a/data/test_domains/duplicate_templates.yml +++ b/data/test_domains/duplicate_templates.yml @@ -21,8 +21,3 @@ responses: - text: default message utter_greet: - text: hey there! - -actions: - - utter_default - - utter_greet - - utter_goodbye diff --git a/data/test_domains/form.yml b/data/test_domains/form.yml index 30529c44e71f..d2ce5b2bc5ea 100644 --- a/data/test_domains/form.yml +++ b/data/test_domains/form.yml @@ -24,12 +24,8 @@ responses: - text: goodbye :( utter_default: - text: default message - -actions: - - utter_default - - utter_greet - - utter_goodbye - - utter_ask_continue + utter_ask_continue: + - text: should I continue? forms: - some_form \ No newline at end of file diff --git a/data/test_domains/missing_text_for_templates.yml b/data/test_domains/missing_text_for_templates.yml index 671b02f5658a..0d478179bdc7 100644 --- a/data/test_domains/missing_text_for_templates.yml +++ b/data/test_domains/missing_text_for_templates.yml @@ -10,8 +10,3 @@ responses: - goodbye :( utter_default: - default message - -actions: - - utter_default - - utter_greet - - utter_goodbye diff --git a/data/test_multi_domain/data/GreetBot/domain.yml b/data/test_multi_domain/data/GreetBot/domain.yml index dd69caa975d3..c683c9c83edb 100644 --- a/data/test_multi_domain/data/GreetBot/domain.yml +++ b/data/test_multi_domain/data/GreetBot/domain.yml @@ -2,10 +2,6 @@ intents: - greet - goodbye -actions: -- utter_greet -- utter_goodbye - responses: utter_greet: - text: "Hey! How are you?" diff --git a/data/test_multi_domain/data/MoodBot/domain.yml b/data/test_multi_domain/data/MoodBot/domain.yml index dde60b3316cc..ff849acf812d 100644 --- a/data/test_multi_domain/data/MoodBot/domain.yml +++ b/data/test_multi_domain/data/MoodBot/domain.yml @@ -4,11 +4,6 @@ intents: - mood_great - mood_unhappy -actions: -- utter_did_that_help -- utter_happy -- utter_cheer_up - responses: utter_cheer_up: - text: "Here is something to cheer you up:" diff --git a/data/test_multi_domain/domain.yml b/data/test_multi_domain/domain.yml index 9dc70b0098dc..93a3518feac3 100644 --- a/data/test_multi_domain/domain.yml +++ b/data/test_multi_domain/domain.yml @@ -1,9 +1,6 @@ intents: - goodbye -actions: -- utter_goodbye - responses: utter_goodbye: - text: "Bye" diff --git a/data/test_stories/stories_conflicting_1.md b/data/test_stories/stories_conflicting_1.md new file mode 100644 index 000000000000..d772f46ee33a --- /dev/null +++ b/data/test_stories/stories_conflicting_1.md @@ -0,0 +1,15 @@ +## story 1 +* greet + - utter_greet +* greet + - utter_greet +* greet + - utter_greet + +## story 2 +* default + - utter_greet +* greet + - utter_greet +* greet + - utter_default diff --git a/data/test_stories/stories_conflicting_2.md b/data/test_stories/stories_conflicting_2.md new file mode 100644 index 000000000000..001b7087c700 --- /dev/null +++ b/data/test_stories/stories_conflicting_2.md @@ -0,0 +1,14 @@ +## greetings +* greet + - utter_greet +> check_greet + +## happy path +> check_greet +* default + - utter_default + +## problem +> check_greet +* default + - utter_goodbye diff --git a/data/test_stories/stories_conflicting_3.md b/data/test_stories/stories_conflicting_3.md new file mode 100644 index 000000000000..2218f6cea164 --- /dev/null +++ b/data/test_stories/stories_conflicting_3.md @@ -0,0 +1,14 @@ +## greetings +* greet + - utter_greet +> check_greet + +## happy path +> check_greet +* default OR greet + - utter_default + +## problem +> check_greet +* greet + - utter_goodbye diff --git a/data/test_stories/stories_conflicting_4.md b/data/test_stories/stories_conflicting_4.md new file mode 100644 index 000000000000..372c38ff6d15 --- /dev/null +++ b/data/test_stories/stories_conflicting_4.md @@ -0,0 +1,17 @@ +## story 1 +* greet + - utter_greet +* greet + - slot{"cuisine": "German"} + - utter_greet +* greet + - utter_greet + +## story 2 +* greet + - utter_greet +* greet + - slot{"cuisine": "German"} + - utter_greet +* greet + - utter_default diff --git a/data/test_stories/stories_conflicting_5.md b/data/test_stories/stories_conflicting_5.md new file mode 100644 index 000000000000..6865c9db9b4f --- /dev/null +++ b/data/test_stories/stories_conflicting_5.md @@ -0,0 +1,16 @@ +## story 1 +* greet + - utter_greet +* greet + - utter_greet + - slot{"cuisine": "German"} +* greet + - utter_greet + +## story 2 +* greet + - utter_greet +* greet + - utter_greet +* greet + - utter_default diff --git a/data/test_stories/stories_conflicting_6.md b/data/test_stories/stories_conflicting_6.md new file mode 100644 index 000000000000..f58dc258078e --- /dev/null +++ b/data/test_stories/stories_conflicting_6.md @@ -0,0 +1,22 @@ +## story 1 +* greet + - utter_greet + +## story 2 +* greet + - utter_default + +## story 3 +* greet + - utter_default +* greet + +## story 4 +* greet + - utter_default +* default + +## story 5 +* greet + - utter_default +* goodbye diff --git a/docs/_static/spec/rasa.yml b/docs/_static/spec/rasa.yml index c119cee26ca1..90433653f9ba 100644 --- a/docs/_static/spec/rasa.yml +++ b/docs/_static/spec/rasa.yml @@ -1119,7 +1119,7 @@ components: type: array items: type: string - example: ['utter_greet', 'utter_goodbye', 'action_listen'] + example: ['action_greet', 'action_goodbye', 'action_listen'] BotMessage: type: object @@ -1420,13 +1420,6 @@ components: - mood_great - mood_unhappy - actions: - - utter_greet - - utter_cheer_up - - utter_did_that_help - - utter_happy - - utter_goodbye - responses: utter_greet: - text: "Hey! How are you?" diff --git a/docs/core/domains.rst b/docs/core/domains.rst index dc7124ea0cc5..b157ebaf2183 100644 --- a/docs/core/domains.rst +++ b/docs/core/domains.rst @@ -20,7 +20,7 @@ for the things your bot can say. An example of a Domain ---------------------- -As an example, the ``DefaultDomain`` has the following yaml definition: +As an example, the domain created by ``rasa init`` has the following yaml definition: .. literalinclude:: ../../rasa/cli/initial_project/domain.yml @@ -29,7 +29,20 @@ As an example, the ``DefaultDomain`` has the following yaml definition: **What does this mean?** Your NLU model will define the ``intents`` and ``entities`` that you -need to include in the domain. +need to include in the domain. The ``entities`` section lists all entities +extracted by any :ref:`entity extractor` in your +NLU pipeline. + +For example: + +.. code-block:: yaml + + entities: + - PERSON # entity extracted by SpacyEntityExtractor + - time # entity extracted by DucklingHTTPExtractor + - membership_type # custom entity extracted by CRFEntityExtractor + - priority # custom entity extracted by CRFEntityExtractor + :ref:`slots` hold information you want to keep track of during a conversation. A categorical slot called ``risk_level`` would be diff --git a/docs/core/reminders-and-external-events.rst b/docs/core/reminders-and-external-events.rst new file mode 100644 index 000000000000..7c736306d05c --- /dev/null +++ b/docs/core/reminders-and-external-events.rst @@ -0,0 +1,169 @@ +:desc: Learn how to use external events and schedule reminders. + +.. _reminders-and-external-events: + +Reminders and External Events +============================= + +.. edit-link:: + +The ``ReminderScheduled`` event and the +`trigger_intent endpoint <../../api/http-api/#operation/triggerConversationIntent>`_ let your assistant remind you +about things after a given period of time, or to respond to external events (other applications, sensors, etc.). +You can find a full example assistant that implements these features +`here `_. + +.. contents:: + :local: + +.. _reminders: + +Reminders +--------- + +Instead of an external sensor, you might just want to be reminded about something after a certain amount of time. +For this, Rasa provides the special event ``ReminderScheduled``, and another event, ``ReminderCancelled``, to unschedule a reminder. + +.. _scheduling-reminders-guide: + +Scheduling Reminders +^^^^^^^^^^^^^^^^^^^^ + +Let's say you want your assistant to remind you to call a friend in 5 seconds. +(You probably want some longer time span, but for the sake of testing, let it be 5 seconds.) +Thus, we define an intent ``ask_remind_call`` with some NLU data, + +.. code-block:: md + + ## intent:ask_remind_call + - remind me to call [Albert](name) + - remind me to call [Susan](name) + - later I have to call [Daksh](name) + - later I have to call [Anna](name) + ... + +and connect this intent with a new custom action ``action_set_reminder``. +We could make this connection by providing training stories (recommended for more complex assistants), or using the :ref:`mapping-policy`. + +The custom action ``action_set_reminder`` should schedule a reminder that, 5 seconds later, triggers an intent ``EXTERNAL_reminder`` with all the entities that the user provided in his/her last message (similar to an external event): + +.. literalinclude:: ../../examples/reminderbot/actions.py + :pyobject: ActionSetReminder + +Note that this requires the ``datetime`` and ``rasa_sdk.events`` packages. + +Finally, we define another custom action ``action_react_to_reminder`` and link it to the ``EXTERNAL_reminder`` intent: + +.. code-block:: md + + - EXTERNAL_reminder: + triggers: action_react_to_reminder + +where the ``action_react_to_reminder`` is + +.. literalinclude:: ../../examples/reminderbot/actions.py + :pyobject: ActionReactToReminder + +Instead of a custom action, we could also have used a simple response template. +But here we want to make use of the fact that the reminder can carry entities, and we can process the entities in this custom action. + +.. warning:: + + Reminders are cancelled whenever you shutdown your Rasa server. + +.. warning:: + + Reminders currently (Rasa 1.8) don't work in `rasa shell`. + You have to test them with a + `running Rasa X server `_ instead. + +.. note:: + + Proactively reaching out to the user is dependent on the abilities of a channel and + hence not supported by every channel. If your channel does not support it, consider + using the :ref:`callbackInput` channel to send messages to a `webhook `_. + +.. _cancelling-reminders-guide: + +Cancelling Reminders +^^^^^^^^^^^^^^^^^^^^ + +Sometimes the user may want to cancel a reminder that he has scheduled earlier. +A simple way of adding this functionality to your assistant is to create an intent ``ask_forget_reminders`` and let your assistant respond to it with a custom action such as + +.. literalinclude:: ../../examples/reminderbot/actions.py + :pyobject: ForgetReminders + +Here, ``ReminderCancelled()`` simply cancels all the reminders that are currently scheduled. +Alternatively, you may provide some parameters to narrow down the types of reminders that you want to cancel. +For example, + + - ``ReminderCancelled(intent="greet")`` cancels all reminders with intent ``greet`` + - ``ReminderCancelled(entities={...})`` cancels all reminders with the given entities + - ``ReminderCancelled("...")`` cancels the one unique reminder with the given name "``...``" that you supplied + during its creation + +.. _external-event-guide: + +External Events +--------------- + +Let's say you want to send a message from some other device to change the course of an ongoing conversation. +For example, some moisture-sensor attached to a Raspberry Pi should inform your personal assistant that your favourite +plant needs watering, and your assistant should then relay this message to you. + +To do this, your Raspberry Pi needs to send a message to the `trigger_intent endpoint <../../api/http-api/#operation/triggerConversationIntent>`_ of your conversation. +As the name says, this injects a user intent (possibly with entities) into your conversation. +So for Rasa it is almost as if you had entered a message that got classified with this intent and these entities. +Rasa then needs to respond to this input with an action such as ``action_warn_dry``. +The easiest and most reliable way to connect this action with the intent is via the :ref:`mapping-policy`. + +.. _getting-conversation-id: + +Getting the Conversation ID +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first thing we need is the Session ID of the conversation that your sensor should send a notification to. +An easy way to get this is to define a custom action (see :ref:`custom-actions`) that displays the ID in the conversation. +For example: + +.. literalinclude:: ../../examples/reminderbot/actions.py + :pyobject: ActionTellID + +In addition, we also declare an intent ``ask_id``, define some NLU data for it, and add both ``action_tell_id`` and +``ask_id`` to the domain file, where we specify that one should trigger the other: + +.. code-block:: md + + intents: + - ask_id: + triggers: action_tell_id + +Now, when you ask "What is the ID of this conversation?", the assistant replies with something like "The ID of this +conversation is: 38cc25d7e23e4dde800353751b7c2d3e". + +If you want your assistant to link to the Raspberry Pi automatically, you will have to write a custom action that +informs the Pi about the conversation id when your conversation starts (see :ref:`custom_session_start`). + +.. _responding_to_external_events: + +Responding to External Events +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Now that we have our Session ID, we need to prepare the assistant so it responds to messages from the sensor. +To this end, we define a new intent ``EXTERNAL_dry_plant`` without any NLU data. +This intent will later be triggered by the external sensor. +Here, we start the intent name with ``EXTERNAL_`` to indicate that this is not something the user would say, but you can name the intent however you like. + +In the domain file, we now connect the intent ``EXTERNAL_dry_plant`` with another custom action ``action_warn_dry``, e.g. + +.. literalinclude:: ../../examples/reminderbot/actions.py + :pyobject: ActionWarnDry + +Now, when you are in a conversation with id ``38cc25d7e23e4dde800353751b7c2d3e``, then running + +.. code-block:: shell + + curl -H "Content-Type: application/json" -X POST -d '{"name": "EXTERNAL_dry_plant", "entities": {"plant": "Orchid"}}' http://localhost:5005/conversations/38cc25d7e23e4dde800353751b7c2d3e/trigger_intent + +in the terminal will cause your assistant to say "Your Orchid needs some water!". diff --git a/docs/core/responses.rst b/docs/core/responses.rst index 2342c503eac7..b51b0c2129eb 100644 --- a/docs/core/responses.rst +++ b/docs/core/responses.rst @@ -166,22 +166,5 @@ Proactively Reaching Out to the User with External Events You may want to proactively reach out to the user, for example to display the output of a long running background operation or notify the user of an external event. - -To do so, you can ``POST`` an intent to the -`trigger_intent endpoint <../../api/http-api/#operation/triggerConversationIntent>`_. -The intent, let's call it ``EXTERNAL_sensor``, will be treated as if the user had sent a message with this intent. -You can even provide a dictionary of entities as parameters, e.g. ``{"temperature": "high"}``. -For your bot to respond, we recommend you use the :ref:`mapping-policy` to connect the sent intent ``EXTERNAL_sensor`` -with the action you want your bot to execute, e.g. ``utter_warn_temperature``. -You can also use a custom action here, of course. - -Use the ``output_channel`` query parameter to specify which output -channel should be used to communicate the assistant's responses back to the user. -Any messages that are dispatched in the custom action will be forwarded to the specified output channel. -Set this parameter to ``"latest"`` if you want to use the latest input channel that the user has used. - -.. note:: - - Proactively reaching out to the user is dependent on the abilities of a channel and - hence not supported by every channel. If your channel does not support it, consider - using the :ref:`callbackInput` channel to send messages to a webhook. +To learn more, check out `reminderbot `_ in +the Rasa examples directory, or the `docs page on this topic <../../core/external-events-and-reminders>`_. diff --git a/docs/index.rst b/docs/index.rst index 90a42982aed4..28519e449532 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -57,6 +57,7 @@ Understand messages, hold conversations, and connect to messaging channels and A core/domains core/responses core/actions + core/reminders-and-external-events core/policies core/slots core/forms diff --git a/docs/user-guide/building-assistants.rst b/docs/user-guide/building-assistants.rst index fd63da6abc64..2deffe2ddc2e 100644 --- a/docs/user-guide/building-assistants.rst +++ b/docs/user-guide/building-assistants.rst @@ -93,11 +93,6 @@ We’ll also need to add the intents, actions and templates to our ``domain.yml` - bye - thank - actions: - - utter_greet - - utter_noworries - - utter_bye - templates: utter_noworries: - text: No worries! @@ -279,9 +274,6 @@ to the list of actions. These actions always have to start with the ``respond_`` .. code-block:: yaml actions: - - utter_greet - - utter_noworries - - utter_bye - respond_faq Next we’ll write a story so that Core knows which action to predict: @@ -656,7 +648,8 @@ yet we don’t want the intent to affect the dialogue history. To do this, the r must be an action that returns the ``UserUtteranceReverted()`` event to remove the interaction from the dialogue history. -First, open the ``domain.yml`` and modify the greet intent and add ``action_greet`` as shown here: +First, open the ``domain.yml`` and modify the greet intent and add a new block ```actions``` in +the file, next, add the ``action_greet`` as shown here: .. code-block:: yaml @@ -668,11 +661,7 @@ First, open the ``domain.yml`` and modify the greet intent and add ``action_gree - contact_sales - inform - Actions: - - utter_greet - - utter_noworries - - utter_bye - - respond_faq + actions: - action_greet Remove any stories using the "greet" intent if you have them. @@ -785,10 +774,6 @@ We’ll need to add the intent and utterances we just added to our domain: - faq - explain - actions: - - utter_explain_why_budget - - utter_explain_why_email - templates: utter_explain_why_budget: - text: We need to know your budget to recommend a subscription @@ -896,9 +881,6 @@ And finally we’ll add a template to our domain file: .. code-block:: yaml - actions: - - utter_out_of_scope - templates: utter_out_of_scope: - text: Sorry, I can’t handle that request. diff --git a/docs/user-guide/validate-files.rst b/docs/user-guide/validate-files.rst index c4d8f590c362..2421eea49e05 100644 --- a/docs/user-guide/validate-files.rst +++ b/docs/user-guide/validate-files.rst @@ -18,7 +18,8 @@ You can run it with the following command: rasa data validate -The script above runs all the validations on your files. Here is the list of options to +The script above runs all the validations on your files, except for story structure validation, +which is omitted unless you provide the ``--max-history`` argument. Here is the list of options to the script: .. program-output:: rasa data validate --help @@ -65,3 +66,53 @@ To use these functions it is necessary to create a `Validator` object and initia stories='data/stories.md') validator.verify_all() + +Test Story Files for Conflicts +------------------------------ + +In addition to the default tests described above, you can also do a more in-depth structural test of your stories. +In particular, you can test if your stories are inconsistent, i.e. if different bot actions follow from the same dialogue history. +If this is not the case, then Rasa cannot learn the correct behaviour. + +Take, for example, the following two stories: + +.. code-block:: md + + ## Story 1 + * greet + - utter_greet + * inform_happy + - utter_happy + - utter_goodbye + + ## Story 2 + * greet + - utter_greet + * inform_happy + - utter_goodbye + +These two stories are inconsistent, because Rasa doesn't know if it should predict ``utter_happy`` or ``utter_goodbye`` +after ``inform_happy``, as there is nothing that would distinguish the dialogue states at ``inform_happy`` in the two +stories and the subsequent actions are different in Story 1 and Story 2. + +This conflict can be automatically identified with our story structure validation tool. +To do this, use ``rasa data validate`` in the command line, as follows: + +.. code-block:: bash + + rasa data validate stories --max-history 3 + > 2019-12-09 09:32:13 INFO rasa.core.validator - Story structure validation... + > 2019-12-09 09:32:13 INFO rasa.core.validator - Assuming max_history = 3 + > Processed Story Blocks: 100% 2/2 [00:00<00:00, 3237.59it/s, # trackers=1] + > 2019-12-09 09:32:13 WARNING rasa.core.validator - CONFLICT after intent 'inform_happy': + > utter_goodbye predicted in 'Story 2' + > utter_happy predicted in 'Story 1' + +Here we specify a ``max-history`` value of 3. +This means, that 3 events (user messages / bot actions) are taken into account for action predictions, but the particular setting does not matter for this example, because regardless of how long of a history you take into account, the conflict always exists. + +.. warning:: + + The ``rasa data validate stories`` script assumes that all your **story names are unique**. + If your stories are in the Markdown format, you may find duplicate names with a command like + ``grep -h "##" data/*.md | uniq -c | grep "^[^1]"``. diff --git a/examples/moodbot/domain.yml b/examples/moodbot/domain.yml index 919b977460d8..80e77ddeb6a5 100644 --- a/examples/moodbot/domain.yml +++ b/examples/moodbot/domain.yml @@ -7,14 +7,6 @@ intents: - mood_unhappy - bot_challenge -actions: -- utter_greet -- utter_cheer_up -- utter_did_that_help -- utter_happy -- utter_goodbye -- utter_iamabot - responses: utter_greet: - text: "Hey! How are you?" diff --git a/examples/reminderbot/README.md b/examples/reminderbot/README.md new file mode 100644 index 000000000000..3acba395cd04 --- /dev/null +++ b/examples/reminderbot/README.md @@ -0,0 +1,40 @@ +# Reminderbot + +The `reminderbot` example demonstrates how your bot can respond to external events or reminders. + +## What’s inside this example? + +This example contains some training data and the main files needed to build an +assistant on your local machine. The `reminderbot` consists of the following files: + +- **data/nlu.md** contains training examples for the NLU model +- **data/stories.md** contains training stories for the Core model +- **config.yml** contains the model configuration +- **domain.yml** contains the domain of the assistant +- **credentials.yml** contains credentials for the different channels +- **endpoints.yml** contains the different endpoints reminderbot can use +- **actions.py** contains the custom actions that deal with external events and reminders + +## How to use this example? + +To train and chat with `reminderbot`, execute the following steps: + +1. Train a Rasa Open Source model containing the Rasa NLU and Rasa Core models by running: + ``` + rasa train + ``` + The model will be stored in the `/models` directory as a zipped file. + +2. Run a Rasa action server with + ``` + rasa run actions + ``` + +3. Run a Rasa X to talk to your bot. + If you don't have a Rasa X server running, you can test things with `rasa x` in a separate shell (the action server must keep running). + +For more information about the individual commands, please check out our +[documentation](http://rasa.com/docs/rasa/user-guide/command-line-interface/). + +## Encountered any issues? +Let us know about it by posting on [Rasa Community Forum](https://forum.rasa.com)! diff --git a/examples/reminderbot/__init__.py b/examples/reminderbot/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/examples/reminderbot/actions.py b/examples/reminderbot/actions.py new file mode 100644 index 000000000000..97562853db55 --- /dev/null +++ b/examples/reminderbot/actions.py @@ -0,0 +1,127 @@ +# This files contains your custom actions which can be used to run +# custom Python code. +# +# See this guide on how to implement these action: +# https://rasa.com/docs/rasa/core/actions/#custom-actions/ + + +# This is a simple example for an assistant that schedules reminders and +# reacts to external events. + +from typing import Any, Text, Dict, List +import datetime + +from rasa_sdk import Action, Tracker +from rasa_sdk.events import ReminderScheduled, ReminderCancelled +from rasa_sdk.executor import CollectingDispatcher + + +class ActionSetReminder(Action): + """Schedules a reminder, supplied with the last message's entities.""" + + def name(self) -> Text: + return "action_set_reminder" + + async def run( + self, + dispatcher: CollectingDispatcher, + tracker: Tracker, + domain: Dict[Text, Any], + ) -> List[Dict[Text, Any]]: + + dispatcher.utter_message("I will remind you in 5 seconds.") + + date = datetime.datetime.now() + datetime.timedelta(seconds=5) + entities = tracker.latest_message.get("entities") + + reminder = ReminderScheduled( + "EXTERNAL_reminder", + trigger_date_time=date, + entities=entities, + name="my_reminder", + kill_on_user_message=False, + ) + + return [reminder] + + +class ActionReactToReminder(Action): + """Reminds the user to call someone.""" + + def name(self) -> Text: + return "action_react_to_reminder" + + async def run( + self, + dispatcher: CollectingDispatcher, + tracker: Tracker, + domain: Dict[Text, Any], + ) -> List[Dict[Text, Any]]: + + name = next(tracker.get_latest_entity_values("name"), "someone") + dispatcher.utter_message(f"Remember to call {name}!") + + return [] + + +class ActionTellID(Action): + """Informs the user about the conversation ID.""" + + def name(self) -> Text: + return "action_tell_id" + + async def run( + self, dispatcher, tracker: Tracker, domain: Dict[Text, Any] + ) -> List[Dict[Text, Any]]: + + conversation_id = tracker.sender_id + + dispatcher.utter_message( + f"The ID of this conversation is: " f"{conversation_id}." + ) + + dispatcher.utter_message( + f"Trigger an intent with " + f'curl -H "Content-Type: application/json" ' + f'-X POST -d \'{{"name": "EXTERNAL_dry_plant", ' + f'"entities": {{"plant": "Orchid"}}}}\' ' + f"http://localhost:5005/conversations/{conversation_id}/" + f"trigger_intent" + ) + + return [] + + +class ActionWarnDry(Action): + """Informs the user that a plant needs water.""" + + def name(self) -> Text: + return "action_warn_dry" + + async def run( + self, + dispatcher: CollectingDispatcher, + tracker: Tracker, + domain: Dict[Text, Any], + ) -> List[Dict[Text, Any]]: + + plant = next(tracker.get_latest_entity_values("plant"), "someone") + dispatcher.utter_message(f"Your {plant} needs some water!") + + return [] + + +class ForgetReminders(Action): + """Cancels all reminders.""" + + def name(self) -> Text: + return "action_forget_reminders" + + async def run( + self, dispatcher, tracker: Tracker, domain: Dict[Text, Any] + ) -> List[Dict[Text, Any]]: + + dispatcher.utter_message(f"Okay, I'll cancel all your reminders.") + + # Cancel all reminders + return [ReminderCancelled()] diff --git a/examples/reminderbot/config.yml b/examples/reminderbot/config.yml new file mode 100644 index 000000000000..963003b0df8e --- /dev/null +++ b/examples/reminderbot/config.yml @@ -0,0 +1,5 @@ +language: en +pipeline: supervised_embeddings +policies: +- name: MemoizationPolicy +- name: MappingPolicy diff --git a/examples/reminderbot/credentials.yml b/examples/reminderbot/credentials.yml new file mode 100644 index 000000000000..c61e3acddbac --- /dev/null +++ b/examples/reminderbot/credentials.yml @@ -0,0 +1,36 @@ +# This file contains the credentials for the voice & chat platforms +# which your bot is using. +# https://rasa.com/docs/rasa/user-guide/messaging-and-voice-channels/ + +rest: +# you don't need to provide anything here - this channel doesn't +# require any credentials + +callback: + # URL to which Rasa Open Source will send the bot responses + # See https://rasa.com/docs/rasa/user-guide/connectors/your-own-website/#callbackinput + url: "http://localhost:5034/bot" + +#facebook: +# verify: "" +# secret: "" +# page-access-token: "" + +#slack: +# slack_token: "" +# slack_channel: "" + +#socketio: +# user_message_evt: +# bot_message_evt: +# session_persistence: + +#mattermost: +# url: "https:///api/v4" +# team: "" +# user: "" +# pw: "" +# webhook_url: "" + +rasa: + url: "http://localhost:5002/api" diff --git a/examples/reminderbot/data/nlu.md b/examples/reminderbot/data/nlu.md new file mode 100644 index 000000000000..f4afb6ef63f2 --- /dev/null +++ b/examples/reminderbot/data/nlu.md @@ -0,0 +1,43 @@ +## intent:greet +- hey +- hello +- hi +- good morning +- good evening +- hey there + +## intent:bye +- bye +- good bye +- ciao +- see you +- see ya + +## intent:ask_remind_call +- remind me to call [John](name) +- remind me to call [Lis](name) +- remind me to call [Albert](name) +- remind me to call [Susan](name) +- later I have to call [Alan](name) +- later I have to call [Jessie](name) +- later I have to call [Alex](name) +- Please, remind me to call [vova](name) +- please remind me to call [tanja](name) +- I must not forget to call [santa](name) +- I must not forget to call [Daksh](name) +- I must not forget to call [Juste](name) + +## intent:ask_id +- what's the conversation id? +- id +- What is the ID of this conversation? +- How do I send a POST request to this conversation? + +## intent:ask_forget_reminders +- forget about it +- don't remind me! +- Forget about the reminder +- do not remind me +- do not remind me! +- Forget reminding me +- Forget reminding me! diff --git a/examples/reminderbot/data/stories.md b/examples/reminderbot/data/stories.md new file mode 100644 index 000000000000..77fc9ddfb305 --- /dev/null +++ b/examples/reminderbot/data/stories.md @@ -0,0 +1,5 @@ +## happy path +* greet + - utter_what_can_do +* bye + - utter_goodbye \ No newline at end of file diff --git a/examples/reminderbot/domain.yml b/examples/reminderbot/domain.yml new file mode 100644 index 000000000000..0ee3aed3d55d --- /dev/null +++ b/examples/reminderbot/domain.yml @@ -0,0 +1,35 @@ +session_config: + session_expiration_time: 0.0 + carry_over_slots_to_new_session: true +intents: +- greet: + triggers: action_set_reminder +- ask_remind_call: + triggers: action_set_reminder +- ask_forget_reminders: + triggers: action_forget_reminders +- bye: + triggers: utter_goodbye +- ask_id: + triggers: action_tell_id +- EXTERNAL_dry_plant: + triggers: action_warn_dry +- EXTERNAL_reminder: + triggers: action_react_to_reminder +- EXT_reminder +entities: +- name +- plant +responses: + utter_what_can_do: + - text: What can I do for you? + utter_goodbye: + - text: Bye +actions: +- action_set_reminder +- action_forget_reminders +- action_react_to_reminder +- action_tell_id +- action_warn_dry +- utter_what_can_do +- utter_goodbye diff --git a/examples/reminderbot/endpoints.yml b/examples/reminderbot/endpoints.yml new file mode 100644 index 000000000000..2ff5a0923d65 --- /dev/null +++ b/examples/reminderbot/endpoints.yml @@ -0,0 +1,42 @@ +# This file contains the different endpoints your bot can use. + +# Server where the models are pulled from. +# https://rasa.com/docs/rasa/user-guide/running-the-server/#fetching-models-from-a-server/ + +#models: +# url: http://my-server.com/models/default_core@latest +# wait_time_between_pulls: 10 # [optional](default: 100) + +# Server which runs your custom actions. +# https://rasa.com/docs/rasa/core/actions/#custom-actions/ + +action_endpoint: + url: "http://localhost:5055/webhook" + +# Tracker store which is used to store the conversations. +# By default the conversations are stored in memory. +# https://rasa.com/docs/rasa/api/tracker-stores/ + +#tracker_store: +# type: redis +# url: +# port: +# db: +# password: +# use_ssl: + +#tracker_store: +# type: mongod +# url: +# db: +# username: +# password: + +# Event broker which all conversation events should be streamed to. +# https://rasa.com/docs/rasa/api/event-brokers/ + +#event_broker: +# url: localhost +# username: username +# password: password +# queue: queue diff --git a/rasa/cli/data.py b/rasa/cli/data.py index edb7bb28b6b2..f5ed5d0f1da4 100644 --- a/rasa/cli/data.py +++ b/rasa/cli/data.py @@ -1,21 +1,22 @@ +import logging import argparse import asyncio -import sys from typing import List from rasa import data from rasa.cli.arguments import data as arguments -from rasa.cli.utils import get_validated_path +import rasa.cli.utils from rasa.constants import DEFAULT_DATA_PATH -from typing import NoReturn +from rasa.validator import Validator +from rasa.importers.rasa import RasaFileImporter + +logger = logging.getLogger(__name__) # noinspection PyProtectedMember def add_subparser( subparsers: argparse._SubParsersAction, parents: List[argparse.ArgumentParser] ): - import rasa.nlu.convert as convert - data_parser = subparsers.add_parser( "data", conflict_handler="resolve", @@ -26,6 +27,17 @@ def add_subparser( data_parser.set_defaults(func=lambda _: data_parser.print_help(None)) data_subparsers = data_parser.add_subparsers() + + _add_data_convert_parsers(data_subparsers, parents) + _add_data_split_parsers(data_subparsers, parents) + _add_data_validate_parsers(data_subparsers, parents) + + +def _add_data_convert_parsers( + data_subparsers, parents: List[argparse.ArgumentParser] +) -> None: + from rasa.nlu import convert + convert_parser = data_subparsers.add_parser( "convert", formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -45,6 +57,10 @@ def add_subparser( arguments.set_convert_arguments(convert_nlu_parser) + +def _add_data_split_parsers( + data_subparsers, parents: List[argparse.ArgumentParser] +) -> None: split_parser = data_subparsers.add_parser( "split", formatter_class=argparse.ArgumentDefaultsHelpFormatter, @@ -65,21 +81,46 @@ def add_subparser( arguments.set_split_arguments(nlu_split_parser) + +def _add_data_validate_parsers( + data_subparsers, parents: List[argparse.ArgumentParser] +) -> None: validate_parser = data_subparsers.add_parser( "validate", formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=parents, help="Validates domain and data files to check for possible mistakes.", ) + _append_story_structure_arguments(validate_parser) validate_parser.set_defaults(func=validate_files) arguments.set_validator_arguments(validate_parser) + validate_subparsers = validate_parser.add_subparsers() + story_structure_parser = validate_subparsers.add_parser( + "stories", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + parents=parents, + help="Checks for inconsistencies in the story files.", + ) + _append_story_structure_arguments(story_structure_parser) + story_structure_parser.set_defaults(func=validate_stories) + arguments.set_validator_arguments(story_structure_parser) + + +def _append_story_structure_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--max-history", + type=int, + default=None, + help="Number of turns taken into account for story structure validation.", + ) -def split_nlu_data(args) -> None: + +def split_nlu_data(args: argparse.Namespace) -> None: from rasa.nlu.training_data.loading import load_data from rasa.nlu.training_data.util import get_file_format - data_path = get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) + data_path = rasa.cli.utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) @@ -91,22 +132,53 @@ def split_nlu_data(args) -> None: test.persist(args.out, filename=f"test_data.{fformat}") -def validate_files(args) -> NoReturn: - """Validate all files needed for training a model. - - Fails with a non-zero exit code if there are any errors in the data.""" - from rasa.core.validator import Validator - from rasa.importers.rasa import RasaFileImporter +def validate_files(args: argparse.Namespace, stories_only: bool = False) -> None: + """ + Validates either the story structure or the entire project. + Args: + args: Commandline arguments + stories_only: If `True`, only the story structure is validated. + """ loop = asyncio.get_event_loop() file_importer = RasaFileImporter( domain_path=args.domain, training_data_paths=args.data ) validator = loop.run_until_complete(Validator.from_importer(file_importer)) - domain_is_valid = validator.verify_domain_validity() - if not domain_is_valid: - sys.exit(1) - everything_is_alright = validator.verify_all(not args.fail_on_warnings) - sys.exit(0) if everything_is_alright else sys.exit(1) + if stories_only: + all_good = _validate_story_structure(validator, args) + else: + all_good = ( + _validate_domain(validator) + and _validate_nlu(validator, args) + and _validate_story_structure(validator, args) + ) + + if not all_good: + rasa.cli.utils.print_error_and_exit("Project validation completed with errors.") + + +def validate_stories(args: argparse.Namespace) -> None: + validate_files(args, stories_only=True) + + +def _validate_domain(validator: Validator) -> bool: + return validator.verify_domain_validity() + + +def _validate_nlu(validator: Validator, args: argparse.Namespace) -> bool: + return validator.verify_nlu(not args.fail_on_warnings) + + +def _validate_story_structure(validator: Validator, args: argparse.Namespace) -> bool: + # Check if a valid setting for `max_history` was given + if isinstance(args.max_history, int) and args.max_history < 1: + raise argparse.ArgumentTypeError( + f"The value of `--max-history {args.max_history}` is not a positive integer.", + ) + + return validator.verify_story_structure( + not args.fail_on_warnings, max_history=args.max_history + ) diff --git a/rasa/cli/export.py b/rasa/cli/export.py index 03f3c7500685..c03c5c0f5d4e 100644 --- a/rasa/cli/export.py +++ b/rasa/cli/export.py @@ -14,7 +14,7 @@ if typing.TYPE_CHECKING: from rasa.core.brokers.broker import EventBroker - from rasa.core.brokers.pika import PikaEventBroker + from rasa.core.brokers.pika import PikaEventBroker, PikaProducer from rasa.core.tracker_store import TrackerStore from rasa.core.exporter import Exporter from rasa.core.utils import AvailableEndpoints @@ -145,9 +145,9 @@ def _prepare_event_broker(event_broker: "EventBroker") -> None: In addition, wait until the event broker reports a `ready` state. """ - from rasa.core.brokers.pika import PikaEventBroker + from rasa.core.brokers.pika import PikaEventBroker, PikaProducer - if isinstance(event_broker, PikaEventBroker): + if isinstance(event_broker, (PikaEventBroker, PikaProducer)): event_broker.should_keep_unpublished_messages = False event_broker.raise_on_failure = True diff --git a/rasa/cli/initial_project/domain.yml b/rasa/cli/initial_project/domain.yml index e2a3d27bcbc9..deb199d32dc7 100644 --- a/rasa/cli/initial_project/domain.yml +++ b/rasa/cli/initial_project/domain.yml @@ -7,14 +7,6 @@ intents: - mood_unhappy - bot_challenge -actions: -- utter_greet -- utter_cheer_up -- utter_did_that_help -- utter_happy -- utter_goodbye -- utter_iamabot - responses: utter_greet: - text: "Hey! How are you?" diff --git a/rasa/constants.py b/rasa/constants.py index 41cf5382aa72..75ba1b9547e2 100644 --- a/rasa/constants.py +++ b/rasa/constants.py @@ -14,6 +14,7 @@ TEST_DATA_FILE = "test.md" TRAIN_DATA_FILE = "train.md" +NLG_DATA_FILE = "responses.md" RESULTS_FILE = "results.json" NUMBER_OF_TRAINING_STORIES_FILE = "num_stories.json" PERCENTAGE_KEY = "__percentage__" diff --git a/rasa/core/brokers/pika.py b/rasa/core/brokers/pika.py index 45949ff4fd6d..0c8ed8682ee7 100644 --- a/rasa/core/brokers/pika.py +++ b/rasa/core/brokers/pika.py @@ -5,7 +5,7 @@ import typing from collections import deque from threading import Thread -from typing import Callable, Deque, Dict, Optional, Text, Union +from typing import Callable, Deque, Dict, Optional, Text, Union, Any from rasa.constants import ( DEFAULT_LOG_LEVEL_LIBRARIES, @@ -326,6 +326,7 @@ def _run_pika_io_loop_in_thread(self) -> None: thread.start() def _run_pika_io_loop(self) -> None: + # noinspection PyUnresolvedReferences self._pika_connection.ioloop.start() def is_ready( @@ -353,18 +354,28 @@ def is_ready( return False def publish( - self, event: Dict, retries: int = 60, retry_delay_in_seconds: int = 5 + self, + event: Dict[Text, Any], + retries: int = 60, + retry_delay_in_seconds: int = 5, + headers: Optional[Dict[Text, Text]] = None, ) -> None: """Publish `event` into Pika queue. - Perform `retries` publish attempts with `retry_delay_in_seconds` between them. - """ + Args: + event: Serialised event to be published. + retries: Number of retries if publishing fails + retry_delay_in_seconds: Delay in seconds between retries. + headers: Message headers to append to the published message (key-value + dictionary). The headers can be retrieved in the consumer from the + `headers` attribute of the message's `BasicProperties`. + """ body = json.dumps(event) while retries: try: - self._publish(body) + self._publish(body, headers) return except Exception as e: logger.error( @@ -383,28 +394,48 @@ def publish( "'{}':\n{}".format(self.queue, self.host, body) ) - @property - def _message_properties(self) -> "BasicProperties": - """Create RabbitMQ message properties. + def _get_message_properties( + self, headers: Optional[Dict[Text, Text]] = None + ) -> "BasicProperties": + """Create RabbitMQ message `BasicProperties`. + + The `app_id` property is set to the value of `self.rasa_environment` if + present, and the message delivery mode is set to 2 (persistent). In + addition, the `headers` property is set if supplied. + + Args: + headers: Message headers to add to the message properties of the + published message (key-value dictionary). The headers can be retrieved in + the consumer from the `headers` attribute of the message's + `BasicProperties`. Returns: - pika.spec.BasicProperties with the `RASA_ENVIRONMENT` environment - variable as the properties' `app_id` value. If this variable is unset, empty - pika.spec.BasicProperties. + `pika.spec.BasicProperties` with the `RASA_ENVIRONMENT` environment variable + as the properties' `app_id` value, `delivery_mode`=2 and `headers` as the + properties' headers. """ from pika.spec import BasicProperties - kwargs = {"app_id": self.rasa_environment} if self.rasa_environment else {} + # make message persistent + kwargs = {"delivery_mode": 2} + + if self.rasa_environment: + kwargs["app_id"] = self.rasa_environment - return BasicProperties(delivery_mode=2, **kwargs) # make message persistent + if headers: + kwargs["headers"] = headers - def _basic_publish(self, body: Text) -> None: + return BasicProperties(**kwargs) + + def _basic_publish( + self, body: Text, headers: Optional[Dict[Text, Text]] = None + ) -> None: self.channel.basic_publish( "", self.queue, body.encode(DEFAULT_ENCODING), - properties=self._message_properties, + properties=self._get_message_properties(headers), ) logger.debug( @@ -412,11 +443,11 @@ def _basic_publish(self, body: Text) -> None: f"'{self.host}':\n{body}" ) - def _publish(self, body: Text) -> None: + def _publish(self, body: Text, headers: Optional[Dict[Text, Text]] = None) -> None: if self._pika_connection.is_closed: # Try to reset connection self._run_pika() - self._basic_publish(body) + self._basic_publish(body, headers) elif not self.channel and self.should_keep_unpublished_messages: logger.warning( f"RabbitMQ channel has not been assigned. Adding message to " @@ -426,7 +457,7 @@ def _publish(self, body: Text) -> None: ) self._unpublished_messages.append(body) else: - self._basic_publish(body) + self._basic_publish(body, headers) def create_rabbitmq_ssl_options( diff --git a/rasa/core/constants.py b/rasa/core/constants.py index cbbf2b76e63a..a4b2799b523c 100644 --- a/rasa/core/constants.py +++ b/rasa/core/constants.py @@ -58,3 +58,6 @@ RESPOND_PREFIX = "respond_" DEFAULT_CATEGORICAL_SLOT_VALUE = "__other__" + +# RabbitMQ message property header added to events published using `rasa export` +RASA_EXPORT_PROCESS_ID_HEADER_NAME = "rasa-export-process-id" diff --git a/rasa/core/domain.py b/rasa/core/domain.py index f6bfd80a2070..f497bc5f286c 100644 --- a/rasa/core/domain.py +++ b/rasa/core/domain.py @@ -190,7 +190,7 @@ def from_directory(cls, path: Text) -> "Domain": from rasa import data domain = Domain.empty() - for root, _, files in os.walk(path): + for root, _, files in os.walk(path, followlinks=True): for file in files: full_path = os.path.join(root, file) if data.is_domain_file(full_path): diff --git a/rasa/core/exporter.py b/rasa/core/exporter.py index 527fda24d70a..d529895990aa 100644 --- a/rasa/core/exporter.py +++ b/rasa/core/exporter.py @@ -1,11 +1,14 @@ import itertools import logging +import uuid from typing import Text, Optional, List, Set, Dict, Any from tqdm import tqdm import rasa.cli.utils as cli_utils from rasa.core.brokers.broker import EventBroker +from rasa.core.brokers.pika import PikaProducer, PikaEventBroker +from rasa.core.constants import RASA_EXPORT_PROCESS_ID_HEADER_NAME from rasa.core.tracker_store import TrackerStore from rasa.core.trackers import EventVerbosity from rasa.exceptions import ( @@ -57,6 +60,9 @@ def publish_events(self) -> int: Exits if the publishing of events is interrupted due to an error. In that case, the CLI command to continue the export where it was interrupted is printed. + Returns: + The number of successfully published events. + """ events = self._fetch_events_within_time_range() @@ -67,10 +73,12 @@ def publish_events(self) -> int: published_events = 0 current_timestamp = None + headers = self._get_message_headers() + for event in tqdm(events, "events"): # noinspection PyBroadException try: - self.event_broker.publish(event) + self._publish_with_message_headers(event, headers) published_events += 1 current_timestamp = event["timestamp"] except Exception as e: @@ -81,6 +89,35 @@ def publish_events(self) -> int: return published_events + def _get_message_headers(self) -> Optional[Dict[Text, Text]]: + """Generate a message header for publishing events to a `PikaEventBroker`. + + Returns: + Message headers with a randomly generated uuid under the + `RASA_EXPORT_PROCESS_ID_HEADER_NAME` key if `self.event_broker` is a + `PikaEventBroker`, else `None`. + + """ + if isinstance(self.event_broker, (PikaEventBroker, PikaProducer)): + return {RASA_EXPORT_PROCESS_ID_HEADER_NAME: uuid.uuid4().hex} + + return None + + def _publish_with_message_headers( + self, event: Dict[Text, Any], headers: Optional[Dict[Text, Text]] + ) -> None: + """Publish `event` to a message broker with `headers`. + + Args: + event: Serialized event to be published. + headers: Message headers to be published if `self.event_broker` is a + `PikaEventBroker`. + """ + if isinstance(self.event_broker, (PikaEventBroker, PikaProducer)): + self.event_broker.publish(event=event, headers=headers) + else: + self.event_broker.publish(event) + def _get_conversation_ids_in_tracker(self) -> Set[Text]: """Fetch conversation IDs in `self.tracker_store`. diff --git a/rasa/core/training/story_conflict.py b/rasa/core/training/story_conflict.py new file mode 100644 index 000000000000..2510608a68b8 --- /dev/null +++ b/rasa/core/training/story_conflict.py @@ -0,0 +1,323 @@ +import logging +from collections import defaultdict, namedtuple +from typing import List, Optional, Dict, Text, Tuple, Generator, NamedTuple + +from rasa.core.actions.action import ACTION_LISTEN_NAME +from rasa.core.domain import PREV_PREFIX, Domain +from rasa.core.events import ActionExecuted, Event +from rasa.core.featurizers import MaxHistoryTrackerFeaturizer +from rasa.nlu.constants import INTENT_ATTRIBUTE +from rasa.core.training.generator import TrackerWithCachedStates + +logger = logging.getLogger(__name__) + + +class StoryConflict: + """ + Represents a conflict between two or more stories. + + Here, a conflict means that different actions are supposed to follow from + the same dialogue state, which most policies cannot learn. + + Attributes: + conflicting_actions: A list of actions that all follow from the same state. + conflict_has_prior_events: If `False`, then the conflict occurs without any + prior events (i.e. at the beginning of a dialogue). + """ + + def __init__(self, sliced_states: List[Optional[Dict[Text, float]]],) -> None: + """ + Creates a `StoryConflict` from a given state. + + Args: + sliced_states: The (sliced) dialogue state at which the conflict occurs. + """ + self._sliced_states = sliced_states + self._conflicting_actions = defaultdict( + list + ) # {"action": ["story_1", ...], ...} + + def __hash__(self) -> int: + return hash(str(list(self._sliced_states))) + + def add_conflicting_action(self, action: Text, story_name: Text) -> None: + """Adds another action that follows from the same state. + + Args: + action: Name of the action. + story_name: Name of the story where this action is chosen. + """ + self._conflicting_actions[action] += [story_name] + + @property + def conflicting_actions(self) -> List[Text]: + """List of conflicting actions. + + Returns: + List of conflicting actions. + + """ + return list(self._conflicting_actions.keys()) + + @property + def conflict_has_prior_events(self) -> bool: + """Checks if prior events exist. + + Returns: + `True` if anything has happened before this conflict, otherwise `False`. + """ + return _get_previous_event(self._sliced_states[-1])[0] is not None + + def __str__(self) -> Text: + # Describe where the conflict occurs in the stories + last_event_type, last_event_name = _get_previous_event(self._sliced_states[-1]) + if last_event_type: + conflict_message = f"Story structure conflict after {last_event_type} '{last_event_name}':\n" + else: + conflict_message = ( + f"Story structure conflict at the beginning of stories:\n" + ) + + # List which stories are in conflict with one another + for action, stories in self._conflicting_actions.items(): + conflict_message += ( + f" {self._summarize_conflicting_actions(action, stories)}" + ) + + return conflict_message + + @staticmethod + def _summarize_conflicting_actions(action: Text, stories: List[Text]) -> Text: + """Gives a summarized textual description of where one action occurs. + + Args: + action: The name of the action. + stories: The stories in which the action occurs. + + Returns: + A textural summary. + """ + if len(stories) > 3: + # Four or more stories are present + conflict_description = ( + f"'{stories[0]}', '{stories[1]}', and {len(stories) - 2} other trackers" + ) + elif len(stories) == 3: + conflict_description = f"'{stories[0]}', '{stories[1]}', and '{stories[2]}'" + elif len(stories) == 2: + conflict_description = f"'{stories[0]}' and '{stories[1]}'" + elif len(stories) == 1: + conflict_description = f"'{stories[0]}'" + else: + raise ValueError( + "An internal error occurred while trying to summarise a conflict without stories. " + "Please file a bug report at https://github.com/RasaHQ/rasa." + ) + + return f"{action} predicted in {conflict_description}\n" + + +class TrackerEventStateTuple(NamedTuple): + """Holds a tracker, an event, and sliced states associated with those.""" + + tracker: TrackerWithCachedStates + event: Event + sliced_states: List[Dict[Text, float]] + + @property + def sliced_states_hash(self) -> int: + return hash(str(list(self.sliced_states))) + + +def _get_length_of_longest_story( + trackers: List[TrackerWithCachedStates], domain: Domain +) -> int: + """Returns the longest story in the given trackers. + + Args: + trackers: Trackers to get stories from. + domain: The domain. + + Returns: + The maximal length of any story + """ + return max([len(tracker.past_states(domain)) for tracker in trackers]) + + +def find_story_conflicts( + trackers: List[TrackerWithCachedStates], + domain: Domain, + max_history: Optional[int] = None, +) -> List[StoryConflict]: + """Generates `StoryConflict` objects, describing conflicts in the given trackers. + + Args: + trackers: Trackers in which to search for conflicts. + domain: The domain. + max_history: The maximum history length to be taken into account. + + Returns: + StoryConflict objects. + """ + if not max_history: + max_history = _get_length_of_longest_story(trackers, domain) + + logger.info(f"Considering the preceding {max_history} turns for conflict analysis.") + + # We do this in two steps, to reduce memory consumption: + + # Create a 'state -> list of actions' dict, where the state is + # represented by its hash + conflicting_state_action_mapping = _find_conflicting_states( + trackers, domain, max_history + ) + + # Iterate once more over all states and note the (unhashed) state, + # for which a conflict occurs + conflicts = _build_conflicts_from_states( + trackers, domain, max_history, conflicting_state_action_mapping + ) + + return conflicts + + +def _find_conflicting_states( + trackers: List[TrackerWithCachedStates], domain: Domain, max_history: int +) -> Dict[int, Optional[List[Text]]]: + """Identifies all states from which different actions follow. + + Args: + trackers: Trackers that contain the states. + domain: The domain object. + max_history: Number of turns to take into account for the state descriptions. + + Returns: + A dictionary mapping state-hashes to a list of actions that follow from each state. + """ + # Create a 'state -> list of actions' dict, where the state is + # represented by its hash + state_action_mapping = defaultdict(list) + for element in _sliced_states_iterator(trackers, domain, max_history): + hashed_state = element.sliced_states_hash + if element.event.as_story_string() not in state_action_mapping[hashed_state]: + state_action_mapping[hashed_state] += [element.event.as_story_string()] + + # Keep only conflicting `state_action_mapping`s + return { + state_hash: actions + for (state_hash, actions) in state_action_mapping.items() + if len(actions) > 1 + } + + +def _build_conflicts_from_states( + trackers: List[TrackerWithCachedStates], + domain: Domain, + max_history: int, + conflicting_state_action_mapping: Dict[int, Optional[List[Text]]], +) -> List["StoryConflict"]: + """Builds a list of `StoryConflict` objects for each given conflict. + + Args: + trackers: Trackers that contain the states. + domain: The domain object. + max_history: Number of turns to take into account for the state descriptions. + conflicting_state_action_mapping: A dictionary mapping state-hashes to a list of actions + that follow from each state. + + Returns: + A list of `StoryConflict` objects that describe inconsistencies in the story + structure. These objects also contain the history that leads up to the conflict. + """ + # Iterate once more over all states and note the (unhashed) state, + # for which a conflict occurs + conflicts = {} + for element in _sliced_states_iterator(trackers, domain, max_history): + hashed_state = element.sliced_states_hash + + if hashed_state in conflicting_state_action_mapping: + if hashed_state not in conflicts: + conflicts[hashed_state] = StoryConflict(element.sliced_states) + + conflicts[hashed_state].add_conflicting_action( + action=element.event.as_story_string(), + story_name=element.tracker.sender_id, + ) + + # Return list of conflicts that arise from unpredictable actions + # (actions that start the conversation) + return [ + conflict + for (hashed_state, conflict) in conflicts.items() + if conflict.conflict_has_prior_events + ] + + +def _sliced_states_iterator( + trackers: List[TrackerWithCachedStates], domain: Domain, max_history: int +) -> Generator[TrackerEventStateTuple, None, None]: + """Creates an iterator over sliced states. + + Iterate over all given trackers and all sliced states within each tracker, + where the slicing is based on `max_history`. + + Args: + trackers: List of trackers. + domain: Domain (used for tracker.past_states). + max_history: Assumed `max_history` value for slicing. + + Yields: + A (tracker, event, sliced_states) triplet. + """ + for tracker in trackers: + states = tracker.past_states(domain) + states = [dict(state) for state in states] + + idx = 0 + for event in tracker.events: + if isinstance(event, ActionExecuted): + sliced_states = MaxHistoryTrackerFeaturizer.slice_state_history( + states[: idx + 1], max_history + ) + yield TrackerEventStateTuple(tracker, event, sliced_states) + idx += 1 + + +def _get_previous_event( + state: Optional[Dict[Text, float]] +) -> Tuple[Optional[Text], Optional[Text]]: + """Returns previous event type and name. + + Returns the type and name of the event (action or intent) previous to the + given state. + + Args: + state: Element of sliced states. + + Returns: + Tuple of (type, name) strings of the prior event. + """ + + previous_event_type = None + previous_event_name = None + + if not state: + return previous_event_type, previous_event_name + + # A typical state is, for example, + # `{'prev_action_listen': 1.0, 'intent_greet': 1.0, 'slot_cuisine_0': 1.0}`. + # We need to look out for `prev_` and `intent_` prefixes in the labels. + for turn_label in state: + if ( + turn_label.startswith(PREV_PREFIX) + and turn_label.replace(PREV_PREFIX, "") != ACTION_LISTEN_NAME + ): + # The `prev_...` was an action that was NOT `action_listen` + return "action", turn_label.replace(PREV_PREFIX, "") + elif turn_label.startswith(INTENT_ATTRIBUTE + "_"): + # We found an intent, but it is only the previous event if + # the `prev_...` was `prev_action_listen`, so we don't return. + previous_event_type = "intent" + previous_event_name = turn_label.replace(INTENT_ATTRIBUTE + "_", "") + + return previous_event_type, previous_event_name diff --git a/rasa/data.py b/rasa/data.py index 709233344ba2..5f2b378b905e 100644 --- a/rasa/data.py +++ b/rasa/data.py @@ -102,7 +102,7 @@ def _find_core_nlu_files_in_directory(directory: Text,) -> Tuple[Set[Text], Set[ story_files = set() nlu_data_files = set() - for root, _, files in os.walk(directory): + for root, _, files in os.walk(directory, followlinks=True): # we sort the files here to ensure consistent order for repeatable training results for f in sorted(files): full_path = os.path.join(root, f) diff --git a/rasa/importers/multi_project.py b/rasa/importers/multi_project.py index ee6e9f56c7fc..1c813c8adcad 100644 --- a/rasa/importers/multi_project.py +++ b/rasa/importers/multi_project.py @@ -88,7 +88,7 @@ def _is_explicitly_imported(self, path: Text) -> bool: return not self.no_skills_selected() and self.is_imported(path) def _init_from_directory(self, path: Text): - for parent, _, files in os.walk(path): + for parent, _, files in os.walk(path, followlinks=True): for file in files: full_path = os.path.join(parent, file) if not self.is_imported(full_path): diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index 6510237ab065..cc36edd08d0f 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -20,7 +20,7 @@ import rasa.utils.io as io_utils -from rasa.constants import TEST_DATA_FILE, TRAIN_DATA_FILE +from rasa.constants import TEST_DATA_FILE, TRAIN_DATA_FILE, NLG_DATA_FILE from rasa.nlu.constants import ( DEFAULT_OPEN_UTTERANCE_TYPE, RESPONSE_SELECTOR_PROPERTY_NAME, @@ -1442,9 +1442,12 @@ def compare_nlu( training_examples_per_run.append(len(train.training_examples)) model_output_path = os.path.join(run_path, percent_string) - train_split_path = os.path.join(model_output_path, TRAIN_DATA_FILE) - io_utils.create_path(train_split_path) - write_to_file(train_split_path, train.nlu_as_markdown()) + train_split_path = os.path.join(model_output_path, "train") + train_nlu_split_path = os.path.join(train_split_path, TRAIN_DATA_FILE) + train_nlg_split_path = os.path.join(train_split_path, NLG_DATA_FILE) + io_utils.create_path(train_nlu_split_path) + write_to_file(train_nlu_split_path, train.nlu_as_markdown()) + write_to_file(train_nlg_split_path, train.nlg_as_markdown()) for nlu_config, model_name in zip(configs, model_names): logger.info( diff --git a/rasa/utils/io.py b/rasa/utils/io.py index 406a1e47a60c..bed851312433 100644 --- a/rasa/utils/io.py +++ b/rasa/utils/io.py @@ -355,7 +355,7 @@ def list_directory(path: Text) -> List[Text]: return [path] elif os.path.isdir(path): results = [] - for base, dirs, files in os.walk(path): + for base, dirs, files in os.walk(path, followlinks=True): # sort files for same order across runs files = sorted(files, key=_filename_without_prefix) # add not hidden files diff --git a/rasa/core/validator.py b/rasa/validator.py similarity index 80% rename from rasa/core/validator.py rename to rasa/validator.py index bb3674935d80..492e4366c21f 100644 --- a/rasa/core/validator.py +++ b/rasa/validator.py @@ -1,13 +1,16 @@ import logging from collections import defaultdict -from typing import List, Set, Text - -from rasa.constants import DOCS_URL_DOMAINS, DOCS_URL_ACTIONS -from rasa.core.constants import UTTER_PREFIX +from typing import Set, Text, Optional from rasa.core.domain import Domain -from rasa.core.training.dsl import ActionExecuted, StoryStep, UserUttered +from rasa.core.training.generator import TrainingDataGenerator from rasa.importers.importer import TrainingDataImporter from rasa.nlu.training_data import TrainingData +from rasa.core.training.structures import StoryGraph +from rasa.core.training.dsl import UserUttered +from rasa.core.training.dsl import ActionExecuted +from rasa.core.constants import UTTER_PREFIX +import rasa.core.training.story_conflict +from rasa.constants import DOCS_URL_DOMAINS, DOCS_URL_ACTIONS from rasa.utils.common import raise_warning logger = logging.getLogger(__name__) @@ -16,22 +19,24 @@ class Validator: """A class used to verify usage of intents and utterances.""" - def __init__(self, domain: Domain, intents: TrainingData, stories: List[StoryStep]): + def __init__( + self, domain: Domain, intents: TrainingData, story_graph: StoryGraph + ) -> None: """Initializes the Validator object. """ self.domain = domain self.intents = intents - self.stories = stories + self.story_graph = story_graph @classmethod async def from_importer(cls, importer: TrainingDataImporter) -> "Validator": """Create an instance from the domain, nlu and story files.""" domain = await importer.get_domain() - stories = await importer.get_stories() + story_graph = await importer.get_stories() intents = await importer.get_nlu_data() - return cls(domain, intents, stories.story_steps) + return cls(domain, intents, story_graph) def verify_intents(self, ignore_warnings: bool = True) -> bool: """Compares list of intents in domain with intents in NLU training data.""" @@ -95,7 +100,7 @@ def verify_intents_in_stories(self, ignore_warnings: bool = True) -> bool: stories_intents = { event.intent["name"] - for story in self.stories + for story in self.story_graph.story_steps for event in story.events if type(event) == UserUttered } @@ -165,7 +170,7 @@ def verify_utterances_in_stories(self, ignore_warnings: bool = True) -> bool: utterance_actions = self._gather_utterance_actions() stories_utterances = set() - for story in self.stories: + for story in self.story_graph.story_steps: for event in story.events: if not isinstance(event, ActionExecuted): continue @@ -195,13 +200,49 @@ def verify_utterances_in_stories(self, ignore_warnings: bool = True) -> bool: return everything_is_alright - def verify_all(self, ignore_warnings: bool = True) -> bool: + def verify_story_structure( + self, ignore_warnings: bool = True, max_history: Optional[int] = None + ) -> bool: + """Verifies that the bot behaviour in stories is deterministic. + + Args: + ignore_warnings: When `True`, return `True` even if conflicts were found. + max_history: Maximal number of events to take into account for conflict identification. + + Returns: + `False` is a conflict was found and `ignore_warnings` is `False`. + `True` otherwise. + """ + + logger.info("Story structure validation...") + + trackers = TrainingDataGenerator( + self.story_graph, + domain=self.domain, + remove_duplicates=False, + augmentation_factor=0, + ).generate() + + # Create a list of `StoryConflict` objects + conflicts = rasa.core.training.story_conflict.find_story_conflicts( + trackers, self.domain, max_history + ) + + if not conflicts: + logger.info("No story structure conflicts found.") + else: + for conflict in conflicts: + logger.warning(conflict) + + return ignore_warnings or not conflicts + + def verify_nlu(self, ignore_warnings: bool = True) -> bool: """Runs all the validations on intents and utterances.""" logger.info("Validating intents...") intents_are_valid = self.verify_intents_in_stories(ignore_warnings) - logger.info("Validating there is no duplications...") + logger.info("Validating uniqueness of intents and stories...") there_is_no_duplication = self.verify_example_repetition_in_intents( ignore_warnings ) diff --git a/tests/cli/conftest.py b/tests/cli/conftest.py index 99c3acd4631b..2ac6e97f47e3 100644 --- a/tests/cli/conftest.py +++ b/tests/cli/conftest.py @@ -52,6 +52,8 @@ def run_in_default_project( def do_run(*args): args = ["rasa"] + list(args) - return testdir.run(*args) + result = testdir.run(*args) + os.environ["LOG_LEVEL"] = "INFO" + return result return do_run diff --git a/tests/cli/test_rasa_data.py b/tests/cli/test_rasa_data.py index 3021e9ab12e7..07e54e8c47ed 100644 --- a/tests/cli/test_rasa_data.py +++ b/tests/cli/test_rasa_data.py @@ -1,9 +1,15 @@ +import argparse import os +from unittest.mock import Mock import pytest from collections import namedtuple -from typing import Callable +from typing import Callable, Text + +from _pytest.monkeypatch import MonkeyPatch from _pytest.pytester import RunResult from rasa.cli import data +from rasa.importers.importer import TrainingDataImporter +from rasa.validator import Validator def test_data_split_nlu(run_in_default_project: Callable[..., RunResult]): @@ -60,8 +66,8 @@ def test_data_convert_help(run: Callable[..., RunResult]): def test_data_validate_help(run: Callable[..., RunResult]): output = run("data", "validate", "--help") - help_text = """usage: rasa data validate [-h] [-v] [-vv] [--quiet] [--fail-on-warnings] - [-d DOMAIN] [--data DATA]""" + help_text = """usage: rasa data validate [-h] [-v] [-vv] [--quiet] + [--max-history MAX_HISTORY] [--fail-on-warnings]""" lines = help_text.split("\n") @@ -69,9 +75,37 @@ def test_data_validate_help(run: Callable[..., RunResult]): assert output.outlines[i] == line +def _text_is_part_of_output_error(text: Text, output: RunResult) -> bool: + found_info_string = False + for line in output.errlines: + if text in line: + found_info_string = True + return found_info_string + + +def test_data_validate_stories_with_max_history_zero(monkeypatch: MonkeyPatch): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(help="Rasa commands") + data.add_subparser(subparsers, parents=[]) + + args = parser.parse_args(["data", "validate", "stories", "--max-history", 0]) + + async def mock_from_importer(importer: TrainingDataImporter) -> Validator: + return Mock() + + monkeypatch.setattr("rasa.validator.Validator.from_importer", mock_from_importer) + + with pytest.raises(argparse.ArgumentTypeError): + data.validate_files(args) + + def test_validate_files_exit_early(): with pytest.raises(SystemExit) as pytest_e: - args = {"domain": "data/test_domains/duplicate_intents.yml", "data": None} + args = { + "domain": "data/test_domains/duplicate_intents.yml", + "data": None, + "max_history": None, + } data.validate_files(namedtuple("Args", args.keys())(*args.values())) assert pytest_e.type == SystemExit diff --git a/tests/core/test_broker.py b/tests/core/test_broker.py index 4e3453013f0f..5c33d62665d3 100644 --- a/tests/core/test_broker.py +++ b/tests/core/test_broker.py @@ -39,12 +39,12 @@ def test_pika_message_property_app_id(monkeypatch: MonkeyPatch): # unset RASA_ENVIRONMENT env var results in empty App ID monkeypatch.delenv("RASA_ENVIRONMENT", raising=False) - assert not pika_producer._message_properties.app_id + assert not pika_producer._get_message_properties().app_id # setting it to some value results in that value as the App ID rasa_environment = "some-test-environment" monkeypatch.setenv("RASA_ENVIRONMENT", rasa_environment) - assert pika_producer._message_properties.app_id == rasa_environment + assert pika_producer._get_message_properties().app_id == rasa_environment def test_no_broker_in_config(): diff --git a/tests/core/test_domain.py b/tests/core/test_domain.py index 8f114be45e9e..b8351d978072 100644 --- a/tests/core/test_domain.py +++ b/tests/core/test_domain.py @@ -201,10 +201,7 @@ def test_custom_slot_type(tmpdir: Path): responses: utter_greet: - - text: hey there! - - actions: - - utter_greet """, + - text: hey there! """, domain_path, ) Domain.load(domain_path) @@ -220,10 +217,7 @@ def test_custom_slot_type(tmpdir: Path): responses: utter_greet: - - text: hey there! - - actions: - - utter_greet""", + - text: hey there!""", """ slots: custom: @@ -231,10 +225,7 @@ def test_custom_slot_type(tmpdir: Path): responses: utter_greet: - - text: hey there! - - actions: - - utter_greet""", + - text: hey there!""", ], ) def test_domain_fails_on_unknown_custom_slot_type(tmpdir, domain_unkown_slot_type): @@ -245,9 +236,7 @@ def test_domain_fails_on_unknown_custom_slot_type(tmpdir, domain_unkown_slot_typ def test_domain_to_yaml(): - test_yaml = """actions: -- utter_greet -config: + test_yaml = """config: store_entities_as_slots: true entities: [] forms: [] @@ -306,9 +295,7 @@ def test_domain_to_yaml_deprecated_templates(): def test_merge_yaml_domains(): - test_yaml_1 = """actions: -- utter_greet -config: + test_yaml_1 = """config: store_entities_as_slots: true entities: [] intents: [] @@ -317,10 +304,7 @@ def test_merge_yaml_domains(): utter_greet: - text: hey there!""" - test_yaml_2 = """actions: -- utter_greet -- utter_goodbye -config: + test_yaml_2 = """config: store_entities_as_slots: false session_config: session_expiration_time: 20 @@ -333,6 +317,8 @@ def test_merge_yaml_domains(): cuisine: type: text responses: + utter_goodbye: + - text: bye! utter_greet: - text: hey you!""" @@ -342,7 +328,10 @@ def test_merge_yaml_domains(): # single attribute should be taken from domain_1 assert domain.store_entities_as_slots # conflicts should be taken from domain_1 - assert domain.templates == {"utter_greet": [{"text": "hey there!"}]} + assert domain.templates == { + "utter_greet": [{"text": "hey there!"}], + "utter_goodbye": [{"text": "bye!"}], + } # lists should be deduplicated and merged assert domain.intents == ["greet"] assert domain.entities == ["cuisine"] @@ -355,7 +344,10 @@ def test_merge_yaml_domains(): # single attribute should be taken from domain_2 assert not domain.store_entities_as_slots # conflicts should take value from domain_2 - assert domain.templates == {"utter_greet": [{"text": "hey you!"}]} + assert domain.templates == { + "utter_greet": [{"text": "hey you!"}], + "utter_goodbye": [{"text": "bye!"}], + } assert domain.session_config == SessionConfig(20, True) @@ -771,3 +763,42 @@ def test_domain_as_dict_with_session_config(): ) def test_are_sessions_enabled(session_config: SessionConfig, enabled: bool): assert session_config.are_sessions_enabled() == enabled + + +def test_domain_utterance_actions_deprecated_templates(): + new_yaml = """actions: +- utter_greet +- utter_goodbye +config: + store_entities_as_slots: true +entities: [] +forms: [] +intents: [] +templates: + utter_greet: + - text: hey there! + utter_goodbye: + - text: bye! +session_config: + carry_over_slots_to_new_session: true + session_expiration_time: 60 +slots: {}""" + + old_yaml = """config: + store_entities_as_slots: true +entities: [] +forms: [] +intents: [] +responses: + utter_greet: + - text: hey there! + utter_goodbye: + - text: bye! +session_config: + carry_over_slots_to_new_session: true + session_expiration_time: 60 +slots: {}""" + + old_domain = Domain.from_yaml(old_yaml) + new_domain = Domain.from_yaml(new_yaml) + assert hash(old_domain) == hash(new_domain) diff --git a/tests/core/test_exporter.py b/tests/core/test_exporter.py index a91be89f159b..6c858e9f3855 100644 --- a/tests/core/test_exporter.py +++ b/tests/core/test_exporter.py @@ -4,9 +4,11 @@ from unittest.mock import Mock import pytest -from _pytest.logging import LogCaptureFixture import rasa.utils.io as io_utils +from rasa.core.brokers.pika import PikaEventBroker +from rasa.core.brokers.sql import SQLEventBroker +from rasa.core.constants import RASA_EXPORT_PROCESS_ID_HEADER_NAME from rasa.core.trackers import DialogueStateTracker from rasa.exceptions import ( NoConversationsInTrackerStoreError, @@ -197,8 +199,52 @@ def test_sort_and_select_events_by_timestamp_error(): exporter._sort_and_select_events_by_timestamp(events) -def _add_conversation_id_to_event(event: Dict, conversation_id: Text): - event["sender_id"] = conversation_id +def test_get_message_headers_pika_event_broker(): + event_broker = Mock(spec=PikaEventBroker) + exporter = MockExporter(event_broker=event_broker) + + # noinspection PyProtectedMember + headers = exporter._get_message_headers() + + assert headers.get(RASA_EXPORT_PROCESS_ID_HEADER_NAME) + + +def test_get_message_headers_non_pika_broker(): + event_broker = Mock() + exporter = MockExporter(event_broker=event_broker) + + # noinspection PyProtectedMember + assert exporter._get_message_headers() is None + + +def test_publish_with_headers_pika_event_broker(): + event_broker = Mock(spec=PikaEventBroker) + exporter = MockExporter(event_broker=event_broker) + + headers = {"some": "header"} + event = {"some": "event"} + + # noinspection PyProtectedMember + exporter._publish_with_message_headers(event, headers) + + # the `PikaEventBroker`'s `publish()` method was called with both + # the `event` and `headers` arguments + event_broker.publish.assert_called_with(event=event, headers=headers) + + +def test_publish_with_headers_non_pika_event_broker(): + event_broker = Mock(SQLEventBroker) + exporter = MockExporter(event_broker=event_broker) + + headers = {"some": "header"} + event = {"some": "event"} + + # noinspection PyProtectedMember + exporter._publish_with_message_headers(event, headers) + + # the `SQLEventBroker`'s `publish()` method was called with only the `event` + # argument + event_broker.publish.assert_called_with(event) def test_publishing_error(): diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py index 876fa687efd9..7dd73f476229 100644 --- a/tests/core/test_policies.py +++ b/tests/core/test_policies.py @@ -806,9 +806,6 @@ def create_policy(self, featurizer, priority): @pytest.fixture(scope="class") def default_domain(self): content = """ - actions: - - utter_hello - intents: - greet - bye diff --git a/tests/core/test_story_conflict.py b/tests/core/test_story_conflict.py new file mode 100644 index 000000000000..1a426850a6b9 --- /dev/null +++ b/tests/core/test_story_conflict.py @@ -0,0 +1,160 @@ +from typing import Text, List, Tuple + +from rasa.core.domain import Domain +from rasa.core.training.story_conflict import ( + StoryConflict, + find_story_conflicts, + _get_previous_event, +) +from rasa.core.training.generator import TrainingDataGenerator, TrackerWithCachedStates +from rasa.validator import Validator +from rasa.importers.rasa import RasaFileImporter +from tests.core.conftest import DEFAULT_STORIES_FILE, DEFAULT_DOMAIN_PATH_WITH_SLOTS + + +async def _setup_trackers_for_testing( + domain_path: Text, training_data_file: Text +) -> Tuple[List[TrackerWithCachedStates], Domain]: + importer = RasaFileImporter( + domain_path=domain_path, training_data_paths=[training_data_file], + ) + validator = await Validator.from_importer(importer) + + trackers = TrainingDataGenerator( + validator.story_graph, + domain=validator.domain, + remove_duplicates=False, + augmentation_factor=0, + ).generate() + + return trackers, validator.domain + + +async def test_find_no_conflicts(): + trackers, domain = await _setup_trackers_for_testing( + DEFAULT_DOMAIN_PATH_WITH_SLOTS, DEFAULT_STORIES_FILE + ) + + # Create a list of `StoryConflict` objects + conflicts = find_story_conflicts(trackers, domain, 5) + + assert conflicts == [] + + +async def test_find_conflicts_in_short_history(): + trackers, domain = await _setup_trackers_for_testing( + "data/test_domains/default.yml", "data/test_stories/stories_conflicting_1.md" + ) + + # `max_history = 3` is too small, so a conflict must arise + conflicts = find_story_conflicts(trackers, domain, 3) + assert len(conflicts) == 1 + + # With `max_history = 4` the conflict should disappear + conflicts = find_story_conflicts(trackers, domain, 4) + assert len(conflicts) == 0 + + +async def test_find_conflicts_checkpoints(): + trackers, domain = await _setup_trackers_for_testing( + "data/test_domains/default.yml", "data/test_stories/stories_conflicting_2.md" + ) + + # Create a list of `StoryConflict` objects + conflicts = find_story_conflicts(trackers, domain, 5) + + assert len(conflicts) == 1 + assert conflicts[0].conflicting_actions == ["utter_goodbye", "utter_default"] + + +async def test_find_conflicts_or(): + trackers, domain = await _setup_trackers_for_testing( + "data/test_domains/default.yml", "data/test_stories/stories_conflicting_3.md" + ) + + # Create a list of `StoryConflict` objects + conflicts = find_story_conflicts(trackers, domain, 5) + + assert len(conflicts) == 1 + assert conflicts[0].conflicting_actions == ["utter_default", "utter_goodbye"] + + +async def test_find_conflicts_slots_that_break(): + trackers, domain = await _setup_trackers_for_testing( + "data/test_domains/default.yml", "data/test_stories/stories_conflicting_4.md" + ) + + # Create a list of `StoryConflict` objects + conflicts = find_story_conflicts(trackers, domain, 5) + + assert len(conflicts) == 1 + assert conflicts[0].conflicting_actions == ["utter_default", "utter_greet"] + + +async def test_find_conflicts_slots_that_dont_break(): + trackers, domain = await _setup_trackers_for_testing( + "data/test_domains/default.yml", "data/test_stories/stories_conflicting_5.md" + ) + + # Create a list of `StoryConflict` objects + conflicts = find_story_conflicts(trackers, domain, 5) + + assert len(conflicts) == 0 + + +async def test_find_conflicts_multiple_stories(): + trackers, domain = await _setup_trackers_for_testing( + "data/test_domains/default.yml", "data/test_stories/stories_conflicting_6.md" + ) + + # Create a list of `StoryConflict` objects + conflicts = find_story_conflicts(trackers, domain, 5) + + assert len(conflicts) == 1 + assert "and 2 other trackers" in str(conflicts[0]) + + +async def test_add_conflicting_action(): + sliced_states = [ + None, + {}, + {"intent_greet": 1.0, "prev_action_listen": 1.0}, + {"prev_utter_greet": 1.0, "intent_greet": 1.0}, + ] + conflict = StoryConflict(sliced_states) + + conflict.add_conflicting_action("utter_greet", "xyz") + conflict.add_conflicting_action("utter_default", "uvw") + assert conflict.conflicting_actions == ["utter_greet", "utter_default"] + + +async def test_has_prior_events(): + sliced_states = [ + None, + {}, + {"intent_greet": 1.0, "prev_action_listen": 1.0}, + {"prev_utter_greet": 1.0, "intent_greet": 1.0}, + ] + conflict = StoryConflict(sliced_states) + assert conflict.conflict_has_prior_events + + +async def test_get_previous_event(): + assert _get_previous_event({"prev_utter_greet": 1.0, "intent_greet": 1.0}) == ( + "action", + "utter_greet", + ) + assert _get_previous_event({"intent_greet": 1.0, "prev_utter_greet": 1.0}) == ( + "action", + "utter_greet", + ) + assert _get_previous_event({"intent_greet": 1.0, "prev_action_listen": 1.0}) == ( + "intent", + "greet", + ) + + +async def test_has_no_prior_events(): + sliced_states = [None] + conflict = StoryConflict(sliced_states) + assert not conflict.conflict_has_prior_events diff --git a/tests/core/test_validator.py b/tests/core/test_validator.py index 3395732547ee..c9a9665f18a8 100644 --- a/tests/core/test_validator.py +++ b/tests/core/test_validator.py @@ -1,14 +1,10 @@ import pytest -import logging -from rasa.core.validator import Validator +from rasa.validator import Validator from rasa.importers.rasa import RasaFileImporter from tests.core.conftest import ( - DEFAULT_DOMAIN_PATH_WITH_SLOTS, DEFAULT_STORIES_FILE, DEFAULT_NLU_DATA, ) -from rasa.core.domain import Domain -from rasa.nlu.training_data import TrainingData import rasa.utils.io as io_utils @@ -40,6 +36,33 @@ async def test_verify_valid_utterances(): assert validator.verify_utterances() +async def test_verify_story_structure(): + importer = RasaFileImporter( + domain_path="data/test_domains/default.yml", + training_data_paths=[DEFAULT_STORIES_FILE], + ) + validator = await Validator.from_importer(importer) + assert validator.verify_story_structure(ignore_warnings=False) + + +async def test_verify_bad_story_structure(): + importer = RasaFileImporter( + domain_path="data/test_domains/default.yml", + training_data_paths=["data/test_stories/stories_conflicting_2.md"], + ) + validator = await Validator.from_importer(importer) + assert not validator.verify_story_structure(ignore_warnings=False) + + +async def test_verify_bad_story_structure_ignore_warnings(): + importer = RasaFileImporter( + domain_path="data/test_domains/default.yml", + training_data_paths=["data/test_stories/stories_conflicting_2.md"], + ) + validator = await Validator.from_importer(importer) + assert validator.verify_story_structure(ignore_warnings=True) + + async def test_fail_on_invalid_utterances(tmpdir): # domain and stories are from different domain and should produce warnings invalid_domain = str(tmpdir / "invalid_domain.yml") diff --git a/tests/nlu/base/test_evaluation.py b/tests/nlu/base/test_evaluation.py index 64426d806c5f..95436865d55c 100644 --- a/tests/nlu/base/test_evaluation.py +++ b/tests/nlu/base/test_evaluation.py @@ -717,12 +717,11 @@ def test_get_evaluation_metrics( def test_nlu_comparison(tmpdir): configs = [ NLU_DEFAULT_CONFIG_PATH, - "sample_configs/config_supervised_embeddings.yml", + "sample_configs/config_embedding_intent_response_selector.yml", ] output = tmpdir.strpath - compare_nlu_models( - configs, DEFAULT_DATA_PATH, output, runs=2, exclusion_percentages=[50, 80] + configs, DEFAULT_DATA_PATH, output, runs=2, exclusion_percentages=[50, 80], ) assert set(os.listdir(output)) == { @@ -735,6 +734,16 @@ def test_nlu_comparison(tmpdir): run_1_path = os.path.join(output, "run_1") assert set(os.listdir(run_1_path)) == {"50%_exclusion", "80%_exclusion", "test.md"} + exclude_50_path = os.path.join(run_1_path, "50%_exclusion") + modelnames = [os.path.splitext(os.path.basename(config))[0] for config in configs] + + modeloutputs = set( + ["train"] + + [f"{m}_report" for m in modelnames] + + [f"{m}.tar.gz" for m in modelnames] + ) + assert set(os.listdir(exclude_50_path)) == modeloutputs + @pytest.mark.parametrize( "entity_results,targets,predictions,successes,errors",