Merge branch 'master' into 5984_domain_split

RasaHQ · Jul 8, 2020 · aba94b6 · aba94b6
2 parents aa1d35a + 3918d9a
commit aba94b6
Show file tree

Hide file tree

Showing 16 changed files with 98 additions and 71 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -62,7 +62,7 @@ Bugfixes
 Bugfixes
 --------
 - `#5521 <https://github.com/rasahq/rasa/issues/5521>`_: Responses used in ResponseSelector now support new lines with explicitly adding ``\n`` between them.
-- `#5758 <https://github.com/rasahq/rasa/issues/5758>`_: Fixed a bug in `rasa export <https://rasa.com/docs/rasa-x/installation-and-setup/existing-deployment/#migrate-conversations>`_ (:ref:`section_export`) which caused Rasa Open Source to only migrate conversation events from the last :ref:`session_config`.
+- `#5758 <https://github.com/rasahq/rasa/issues/5758>`_: Fixed a bug in `rasa export <https://rasa.com/docs/rasa-x/installation-and-setup/deploy#connect-rasa-deployment>`_ (:ref:`section_export`) which caused Rasa Open Source to only migrate conversation events from the last :ref:`session_config`.
 
 
 [1.10.1] - 2020-05-15

diff --git a/changelog/5759.improvement.rst b/changelog/5759.improvement.rst
@@ -0,0 +1,3 @@
+Move option ``case_sensitive`` from the tokenizers to the featurizers.
+- Remove the option from the ``WhitespaceTokenizer`` and ``ConveRTTokenizer``.
+- Add option ``case_sensitive`` to the ``RegexFeaturizer``.
diff --git a/docs/api/event-brokers.rst b/docs/api/event-brokers.rst
@@ -10,7 +10,7 @@ Event Brokers
 
 An event broker allows you to connect your running assistant to other services that process the data coming 
 in from conversations. For example, you could `connect your live assistant to 
-Rasa X <https://rasa.com/docs/rasa-x/installation-and-setup/existing-deployment/>`_
+Rasa X <https://rasa.com/docs/rasa-x/installation-and-setup/deploy#connect-rasa-deployment/>`_
 to review and annotate conversations or forward messages to an external analytics
 service. The event broker publishes messages to a message streaming service, 
 also known as a message broker, to forward Rasa :ref:`events` from the Rasa server to other services.

diff --git a/docs/core/interactive-learning.rst b/docs/core/interactive-learning.rst
@@ -23,7 +23,7 @@ Some people call this `Software 2.0 <https://medium.com/@karpathy/software-2-0-a
 
     Rasa X provides a UI for interactive learning, and you can use any user conversation
     as a starting point. See
-    `Talking to Your Assistant <https://rasa.com/docs/rasa-x/user-guide/enable-workflows#talking-to-your-assistant/>`_
+    `Talk to Your Bot <https://rasa.com/docs/rasa-x/user-guide/share-assistant/#talk-to-your-bot>`_
     in the Rasa X docs.
 
 .. contents::

diff --git a/docs/core/reminders-and-external-events.rst b/docs/core/reminders-and-external-events.rst
@@ -75,7 +75,7 @@ But here we want to make use of the fact that the reminder can carry entities, a
 
   Reminders currently (Rasa 1.8) don't work in `rasa shell`.
   You have to test them with a
-  `running Rasa X server <https://rasa.com/docs/rasa-x/installation-and-setup/docker-compose-script/>`_ instead.
+  `running Rasa X server <https://rasa.com/docs/rasa-x/installation-and-setup/installation-guide/>`_ instead.
 
 .. note::
 

diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst
@@ -169,8 +169,6 @@ WhitespaceTokenizer
 :Description:
     Creates a token for every whitespace separated character sequence.
 :Configuration:
-    Make the tokenizer case insensitive by adding the ``case_sensitive: False`` option, the
-    default being ``case_sensitive: True``.
 
     .. code-block:: yaml
 
@@ -180,8 +178,6 @@ WhitespaceTokenizer
           "intent_tokenization_flag": False
           # Symbol on which intent should be split
           "intent_split_symbol": "_"
-          # Text will be tokenized with case sensitive as default
-          "case_sensitive": True
           # Regular expression to detect tokens
           "token_pattern": None
 
@@ -277,8 +273,6 @@ ConveRTTokenizer
 
 
 :Configuration:
-    Make the tokenizer case insensitive by adding the ``case_sensitive: False`` option, the
-    default being ``case_sensitive: True``.
 
     .. code-block:: yaml
 
@@ -288,8 +282,6 @@ ConveRTTokenizer
           "intent_tokenization_flag": False
           # Symbol on which intent should be split
           "intent_split_symbol": "_"
-          # Text will be tokenized with case sensitive as default
-          "case_sensitive": True
           # Regular expression to detect tokens
           "token_pattern": None
 
@@ -478,11 +470,15 @@ RegexFeaturizer
     :ref:`diet-classifier` components!
 
 :Configuration:
+    Make the featurizer case insensitive by adding the ``case_sensitive: False`` option, the default being
+    ``case_sensitive: True``.
 
     .. code-block:: yaml
 
         pipeline:
         - name: "RegexFeaturizer"
+          # Text will be processed with case sensitive as default
+          "case_sensitive": True
 
 .. _CountVectorsFeaturizer:
 

diff --git a/docs/user-guide/building-assistants.rst b/docs/user-guide/building-assistants.rst
@@ -686,7 +686,8 @@ The decision to handle these types of user input should always come from reviewi
 real conversations. You should first build part of your assistant, test it with real users
 (whether that's your end user, or your colleague) and then add what's missing. You shouldn't
 try to implement every possible edge case that you think might happen, because in the end
-your users may never actually behave in that way. `Rasa X <https://rasa.com/docs/rasa-x/installation-and-setup/docker-compose-script/>`__
+your users may never actually behave in that way.
+`Rasa X <https://rasa.com/docs/rasa-x/>`__
 is a tool that can help you review conversations and make these types of decisions.
 
 Generic interjections

diff --git a/docs/user-guide/connectors/your-own-website.rst b/docs/user-guide/connectors/your-own-website.rst
@@ -9,7 +9,7 @@ Your Own Website
 
 If you just want an easy way for users to test your bot, the best option
 is usually the chat interface that ships with Rasa X, where you can `invite users
-to test your bot <https://rasa.com/docs/rasa-x/user-guide/enable-workflows/#conversations-with-test-users>`_.
+to test your bot <https://rasa.com/docs/rasa-x/user-guide/share-assistant/#share-your-bot>`_.
 
 If you already have an existing website and want to add a Rasa assistant to it,
 you can use `Chatroom <https://github.com/scalableminds/chatroom>`_, a widget which you can incorporate into your existing webpage by adding a HTML snippet.

diff --git a/docs/user-guide/how-to-deploy.rst b/docs/user-guide/how-to-deploy.rst
@@ -23,7 +23,7 @@ important happy paths or is what we call a `minimum viable assistant <https://ra
 
 The recommended deployment methods described below make it easy to share your assistant
 with test users via the `share your assistant feature in
-Rasa X <https://rasa.com/docs/rasa-x/user-guide/enable-workflows#conversations-with-test-users>`_.
+Rasa X <https://rasa.com/docs/rasa-x/user-guide/share-assistant/#share-your-bot>`_.
 Then, when you’re ready to make your assistant available via one or more :ref:`messaging-and-voice-channels`,
 you can easily add them to your existing deployment set up.
 
@@ -32,35 +32,36 @@ you can easily add them to your existing deployment set up.
 Recommended Deployment Methods
 ------------------------------
 
-The recommended way to deploy an assistant is using either the One-Line Deployment or Kubernetes/Openshift
+The recommended way to deploy an assistant is using either the Server Quick-Install or Helm Chart
 options we support. Both deploy Rasa X and your assistant. They are the easiest ways to deploy your assistant,
 allow you to use Rasa X to view conversations and turn them into training data, and are production-ready.
+For more details on deployment methods see the `Rasa X Installation Guide <https://rasa.com/docs/rasa-x/installation-and-setup/installation-guide/>`_.
 
-One-Line Deploy Script
-~~~~~~~~~~~~~~~~~~~~~~
+Server Quick-Install
+~~~~~~~~~~~~~~~~~~~~
 
-The one-line deployment script is the easiest way to deploy Rasa X and your assistant. It installs a Kubernetes
+The Server Quick-Install script is the easiest way to deploy Rasa X and your assistant. It installs a Kubernetes
 cluster on your machine with sensible defaults, getting you up and running in one command.
 
-    - Default: Make sure you meet the `OS Requirements <https://rasa.com/docs/rasa-x/installation-and-setup/one-line-deploy-script/#hardware-os-requirements>`_,
+    - Default: Make sure you meet the `OS Requirements <https://rasa.com/docs/rasa-x/installation-and-setup/install/quick-install-script/#hardware-os-requirements>`_,
       then run:
 
       .. copyable::
 
          curl -s get-rasa-x.rasa.com | sudo bash
 
-    - Custom: See `Customizing the Script <https://rasa.com/docs/rasa-x/installation-and-setup/one-line-deploy-script/#customizing-the-script>`_
-      in the `One-Line Deploy Script <https://rasa.com/docs/rasa-x/installation-and-setup/one-line-deploy-script/#customizing-the-script>`_ docs.
+    - Custom: See `Customizing the Script <https://rasa.com/docs/rasa-x/installation-and-setup/customize/#server-quick-install>`_
+      and the `Server Quick-Install docs <https://rasa.com/docs/rasa-x/installation-and-setup/install/quick-install-script>`_ docs.
 
-Kubernetes/Openshift
-~~~~~~~~~~~~~~~~~~~~
+Helm Chart
+~~~~~~~~~~
 
 For assistants that will receive a lot of user traffic, setting up a Kubernetes or Openshift deployment via
-our helm charts is the best option. This provides a scalable architecture that is also straightforward to deploy.
+our Helm charts is the best option. This provides a scalable architecture that is also straightforward to deploy.
 However, you can also customize the Helm charts if you have specific requirements.
 
-    - Default: Read the `Deploying in Openshift or Kubernetes <https://rasa.com/docs/rasa-x/installation-and-setup/openshift-kubernetes/>`_ docs.
-    - Custom: Read the above, as well as the `Advanced Configuration <https://rasa.com/docs/rasa-x/installation-and-setup/openshift-kubernetes/#advanced-configuration>`_
+    - Default: Read the `Helm Chart Installation <https://rasa.com/docs/rasa-x/installation-and-setup/install/helm-chart/>`_ docs.
+    - Custom: Read the above, as well as the `Advanced Configuration <https://rasa.com/docs/rasa-x/installation-and-setup/customize/#helm-chart>`_
       documentation, and customize the `open source Helm charts <https://github.com/RasaHQ/rasa-x-helm>`_ to your needs.
 
 .. _rasa-only-deployment:
@@ -71,11 +72,11 @@ Alternative Deployment Methods
 Docker Compose
 ~~~~~~~~~~~~~~
 
-You can also run Rasa X in a Docker Compose setup, without the cluster environment. We have a quick install script
+You can also run Rasa X in a Docker Compose setup, without the cluster environment. We have an install script
 for doing so, as well as manual instructions for any custom setups.
 
-    - Default: Read the `Docker Compose Quick Install <https://rasa.com/docs/rasa-x/installation-and-setup/docker-compose-script/>`_ docs or watch the `Masterclass Video <https://www.youtube.com/watch?v=IUYdwy8HPVc>`_ on deploying Rasa X.
-    - Custom: Read the docs `Docker Compose Manual Install <https://rasa.com/docs/rasa-x/installation-and-setup/docker-compose-manual/>`_ documentation for full customization options.
+    - Default: Read the `Docker Compose Install Script <https://rasa.com/docs/rasa-x/installation-and-setup/install/docker-compose/#docker-compose-install-script>`_ docs or watch the `Masterclass Video <https://www.youtube.com/watch?v=IUYdwy8HPVc>`_ on deploying Rasa X.
+    - Custom: Read the `Docker Compose Manual Install <https://rasa.com/docs/rasa-x/installation-and-setup/install/docker-compose/#docker-compose-manual-install>`_ documentation for full customization options.
 
 Rasa Open Source Only Deployment
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -186,7 +187,7 @@ your chosen container registry.
 How you reference the custom action image will depend on your deployment. Pick the relevant documentation for
 your deployment:
 
-    - `One-Line Deployment <https://rasa.com/docs/rasa-x/installation-and-setup/one-line-deploy-script/#customizing-the-script>`_
-    - `Kubernetes or Openshift <https://rasa.com/docs/rasa-x/installation-and-setup/openshift-kubernetes/#adding-a-custom-action-server>`_
-    - `Docker Compose <https://rasa.com/docs/rasa-x/installation-and-setup/docker-compose-script/#connect-a-custom-action-server>`_
+    - `Server Quick-Install <https://rasa.com/docs/rasa-x/installation-and-setup/customize/#quick-install-script-customizing>`_
+    - `Helm Chart <https://rasa.com/docs/rasa-x/installation-and-setup/customize/#adding-a-custom-action-server>`_
+    - `Docker Compose <https://rasa.com/docs/rasa-x/installation-and-setup/customize/#connecting-a-custom-action-server>`_
     - :ref:`Rasa Open Source Only <running-multiple-services>`
diff --git a/docs/user-guide/setting-up-ci-cd.rst b/docs/user-guide/setting-up-ci-cd.rst
@@ -37,7 +37,7 @@ Continuous Integration (CI)
 ---------------------------
 
 The best way to improve an assistant is with frequent `incremental updates
-<https://rasa.com/docs/rasa-x/user-guide/improve-assistant/>`_.
+<https://rasa.com/docs/rasa-x/user-guide/fix-problems>`_.
 No matter how small a change is, you want to be sure that it doesn't introduce
 new problems or negatively impact the performance of your assistant.
 
@@ -110,7 +110,7 @@ End-to-end testing is only as thorough and accurate as the test
 cases you include, so you should continue to grow your set of test conversations
 as you make improvements to your assistant. A good rule of thumb to follow is that you should aim for your test conversations
 to be representative of the true distribution of real conversations.
-Rasa X makes it easy to `add test conversations based on real conversations <https://rasa.com/docs/rasa-x/user-guide/improve-assistant.html#add-test-conversation>`_.
+Rasa X makes it easy to `add test conversations based on real conversations <https://rasa.com/docs/rasa-x/user-guide/test-assistant/#how-to-create-tests>`_.
 
 Note: End-to-end testing does **not** execute your action code. You will need to
 :ref:`test your action code <testing-action-code>` in a seperate step.

diff --git a/rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py b/rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py
@@ -32,6 +32,11 @@ class RegexFeaturizer(SparseFeaturizer):
     def required_components(cls) -> List[Type[Component]]:
         return [Tokenizer]
 
+    defaults = {
+        # Text will be processed with case sensitive as default
+        "case_sensitive": True
+    }
+
     def __init__(
         self,
         component_config: Optional[Dict[Text, Any]] = None,
@@ -41,6 +46,7 @@ def __init__(
         super().__init__(component_config)
 
         self.known_patterns = known_patterns if known_patterns else []
+        self.case_sensitive = self.component_config["case_sensitive"]
 
     def add_lookup_tables(self, lookup_tables: List[Dict[Text, Union[Text, List]]]):
         self.known_patterns.extend(self._lookup_table_regexes(lookup_tables))
@@ -118,13 +124,17 @@ def _features_for_patterns(
             # nothing to featurize
             return None, None
 
+        flags = 0  # default flag
+        if not self.case_sensitive:
+            flags = re.IGNORECASE
+
         sequence_length = len(tokens)
 
         sequence_features = np.zeros([sequence_length, len(self.known_patterns)])
         sentence_features = np.zeros([1, len(self.known_patterns)])
 
         for pattern_index, pattern in enumerate(self.known_patterns):
-            matches = re.finditer(pattern["pattern"], message.text)
+            matches = re.finditer(pattern["pattern"], message.text, flags=flags)
             matches = list(matches)
 
             for token_index, t in enumerate(tokens):

diff --git a/rasa/nlu/tokenizers/convert_tokenizer.py b/rasa/nlu/tokenizers/convert_tokenizer.py
@@ -28,8 +28,6 @@ class ConveRTTokenizer(WhitespaceTokenizer):
         "intent_split_symbol": "_",
         # Regular expression to detect tokens
         "token_pattern": None,
-        # Text will be tokenized with case sensitive as default
-        "case_sensitive": True,
     }
 
     def __init__(self, component_config: Dict[Text, Any] = None) -> None:

diff --git a/rasa/nlu/tokenizers/mitie_tokenizer.py b/rasa/nlu/tokenizers/mitie_tokenizer.py
@@ -3,7 +3,6 @@
 from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
 from rasa.nlu.training_data import Message
 
-from rasa.nlu.constants import TOKENS_NAMES, MESSAGE_ATTRIBUTES
 from rasa.utils.io import DEFAULT_ENCODING
 
 

diff --git a/rasa/nlu/tokenizers/whitespace_tokenizer.py b/rasa/nlu/tokenizers/whitespace_tokenizer.py
@@ -3,8 +3,10 @@
 import regex
 import re
 
+from rasa.constants import DOCS_URL_COMPONENTS
 from rasa.nlu.tokenizers.tokenizer import Token, Tokenizer
 from rasa.nlu.training_data import Message
+import rasa.utils.common as common_utils
 
 
 class WhitespaceTokenizer(Tokenizer):
@@ -16,19 +18,22 @@ class WhitespaceTokenizer(Tokenizer):
         "intent_split_symbol": "_",
         # Regular expression to detect tokens
         "token_pattern": None,
-        # Text will be tokenized with case sensitive as default
-        "case_sensitive": True,
     }
 
     def __init__(self, component_config: Dict[Text, Any] = None) -> None:
         """Construct a new tokenizer using the WhitespaceTokenizer framework."""
 
         super().__init__(component_config)
 
-        self.case_sensitive = self.component_config["case_sensitive"]
-
         self.emoji_pattern = self.get_emoji_regex()
 
+        if "case_sensitive" in self.component_config:
+            common_utils.raise_warning(
+                "The option 'case_sensitive' was moved from the tokenizers to the "
+                "featurizers.",
+                docs=DOCS_URL_COMPONENTS,
+            )
+
     @staticmethod
     def get_emoji_regex():
         emoji_pattern = re.compile(
@@ -51,9 +56,6 @@ def remove_emoji(self, text: Text) -> Text:
     def tokenize(self, message: Message, attribute: Text) -> List[Token]:
         text = message.get(attribute)
 
-        if not self.case_sensitive:
-            text = text.lower()
-
         # we need to use regex instead of re, because of
         # https://stackoverflow.com/questions/12746458/python-unicode-regular-expression-matching-failing-with-some-unicode-characters
 

diff --git a/tests/nlu/featurizers/test_regex_featurizer.py b/tests/nlu/featurizers/test_regex_featurizer.py
@@ -1,3 +1,5 @@
+from typing import Text, List, Any
+
 import numpy as np
 import pytest
 
@@ -222,3 +224,38 @@ def test_regex_featurizer_train():
 
     assert seq_vecs is None
     assert sen_vec is None
+
+
+@pytest.mark.parametrize(
+    "sentence, sequence_vector, sentence_vector, case_sensitive",
+    [
+        ("Hey How are you today", [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], True),
+        ("Hey How are you today", [0.0, 1.0, 0.0], [0.0, 1.0, 0.0], False),
+        ("Hey 456 How are you", [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], True),
+        ("Hey 456 How are you", [0.0, 1.0, 0.0], [1.0, 1.0, 0.0], False),
+    ],
+)
+def test_regex_featurizer_case_sensitive(
+    sentence: Text,
+    sequence_vector: List[float],
+    sentence_vector: List[float],
+    case_sensitive: bool,
+    spacy_nlp: Any,
+):
+
+    patterns = [
+        {"pattern": "[0-9]+", "name": "number", "usage": "intent"},
+        {"pattern": "\\bhey*", "name": "hello", "usage": "intent"},
+        {"pattern": "[0-1]+", "name": "binary", "usage": "intent"},
+    ]
+    ftr = RegexFeaturizer({"case_sensitive": case_sensitive}, known_patterns=patterns)
+
+    # adds tokens to the message
+    tokenizer = SpacyTokenizer()
+    message = Message(sentence)
+    message.set(SPACY_DOCS[TEXT], spacy_nlp(sentence))
+    tokenizer.process(message)
+
+    sequence_featrures, sentence_features = ftr._features_for_patterns(message, TEXT)
+    assert np.allclose(sequence_featrures.toarray()[0], sequence_vector, atol=1e-10)
+    assert np.allclose(sentence_features.toarray()[-1], sentence_vector, atol=1e-10)