RasaHQ · tabergma · Dec 8, 2020 · Dec 3, 2020 · Dec 3, 2020 · Dec 4, 2020
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
@@ -11,6 +11,9 @@
 from typing import Any, List, Optional, Text, Dict, Tuple, Union, TYPE_CHECKING
 
 import rasa.utils.io as io_utils
+import rasa.core.actions.action
+from rasa.nlu.constants import TOKENS_NAMES
+from rasa.nlu.extractors.extractor import EntityExtractor
 from rasa.nlu.classifiers.diet_classifier import EntityTagSpec
 from rasa.shared.core.domain import Domain
 from rasa.core.featurizers.tracker_featurizers import (
@@ -29,6 +32,7 @@
     FEATURE_TYPE_SENTENCE,
     ENTITY_ATTRIBUTE_TYPE,
     ENTITY_TAGS,
+    EXTRACTOR,
 )
 from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter
 from rasa.core.policies.policy import Policy, PolicyPrediction
@@ -97,7 +101,8 @@
     FEATURIZERS,
     ENTITY_RECOGNITION,
 )
-
+from rasa.shared.core.events import UserUttered, DefinePrevUserUtteredEntities, Event
+from rasa.shared.nlu.training_data.message import Message
 
 if TYPE_CHECKING:
     from rasa.shared.nlu.training_data.features import Features
@@ -280,7 +285,6 @@ def __init__(
         **kwargs: Any,
     ) -> None:
         """Declare instance variables with default values."""
-
         if not featurizer:
             featurizer = self._standard_featurizer(max_history)
 
@@ -315,7 +319,6 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None:
 
     def _create_entity_tag_specs(self) -> List[EntityTagSpec]:
         """Create entity tag specifications with their respective tag id mappings."""
-
         _tag_specs = []
 
         tag_id_index_mapping = self.featurizer.state_featurizer.get_entity_tag_ids()
@@ -570,6 +573,7 @@ def predict_action_probabilities(
         model_data = self._create_model_data(tracker_state_features)
 
         output = self.model.predict(model_data)
+
         # take the last prediction in the sequence
         similarities = output["similarities"].numpy()[:, -1, :]
         confidences = output["action_scores"].numpy()[:, -1, :]
@@ -579,10 +583,58 @@ def predict_action_probabilities(
         if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0:
             confidence = train_utils.normalize(confidence, self.config[RANKING_LENGTH])
 
+        optional_events = self._create_optional_event_for_entities(
+            output, interpreter, tracker
+        )
+
         return self._prediction(
-            confidence.tolist(), is_end_to_end_prediction=is_e2e_prediction
+            confidence.tolist(),
+            is_end_to_end_prediction=is_e2e_prediction,
+            optional_events=optional_events,
+        )
+
+    def _create_optional_event_for_entities(
+        self,
+        prediction_output: Dict[Text, tf.Tensor],
+        interpreter: NaturalLanguageInterpreter,
+        tracker: DialogueStateTracker,
+    ) -> Optional[List[Event]]:
+        if not self.config[ENTITY_RECOGNITION]:
+            # entity recognition is not turned on, no entities can be predicted
+            return None
+
+        (
+            predicted_tags,
+            confidence_values,
+        ) = rasa.utils.train_utils.entity_label_to_tags(
+            prediction_output, self._entity_tag_specs
         )
 
+        if ENTITY_ATTRIBUTE_TYPE not in predicted_tags:
+            # no entities detected
+            return None
+
+        # find last user uttered event as the predicted entities belong to
+        # that utterance
+        if tracker.latest_action_name == ACTION_LISTEN_NAME:
+            last_user_utterance = tracker.latest_message
+        else:
+            return None
+
+        # convert the predicted tags to actual entities
+        text = last_user_utterance.text
+        parsed_message = interpreter.featurize_message(Message(data={TEXT: text}))
+        tokens = parsed_message.get(TOKENS_NAMES[TEXT])
+        entities = EntityExtractor.convert_predictions_into_entities(
+            text, tokens, predicted_tags, confidences=confidence_values
+        )
+
+        # add the extractor name
+        for entity in entities:
+            entity[EXTRACTOR] = "TEDPolicy"
+
+        return [DefinePrevUserUtteredEntities(entities)]
+
     def persist(self, path: Union[Text, Path]) -> None:
         """Persists the policy to a storage."""
         if self.model is None:
@@ -635,6 +687,7 @@ def persist(self, path: Union[Text, Path]) -> None:
     @classmethod
     def load(cls, path: Union[Text, Path]) -> "TEDPolicy":
         """Loads a policy from the storage.
+
         **Needs to load its featurizer**
         """
         model_path = Path(path)
@@ -733,6 +786,15 @@ def __init__(
         label_data: RasaModelData,
         entity_tag_specs: Optional[List[EntityTagSpec]],
     ) -> None:
+        """Intializes the TED model.
+
+        Args:
+            data_signature: the data signature of the input data
+            config: the model configuration
+            use_only_last_dialogue_turns: if 'True' only the last dialogue turn will be used
+            label_data: the label data
+            entity_tag_specs: the entity tag specifications
+        """
         super().__init__("TED", config, data_signature, label_data)
 
         self.use_only_last_dialogue_turns = use_only_last_dialogue_turns
@@ -817,9 +879,10 @@ def _prepare_layers(self) -> None:
     def _prepare_sparse_dense_layer_for(
         self, name: Text, signature: Dict[Text, Dict[Text, List[FeatureSignature]]]
     ) -> None:
-        """Prepare the sparse dense layer for the given attribute name. It is used to
-        combine the sparse and dense features of the attribute at the beginning of
-        the model.
+        """Prepares the sparse dense layer for the given attribute name.
+
+        It is used to combine the sparse and dense features of the attribute at the
+        beginning of the model.
 
         Args:
             name: the attribute name
@@ -1210,7 +1273,6 @@ def _convert_to_original_shape(
         Returns:
             The converted attribute features
         """
-
         # in order to convert the attribute features with shape
         # (combined batch-size and dialogue length x 1 x units)
         # to a shape of (batch-size x dialogue length x units)
@@ -1525,6 +1587,7 @@ def batch_loss(
     # ---PREDICTION---
 
     def prepare_for_predict(self) -> None:
+        """Prepares the model for prediction."""
         _, self.all_labels_embed = self._create_all_labels_embed()
 
     def batch_predict(

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
@@ -500,7 +500,6 @@ def _extract_features(
 
     def _check_input_dimension_consistency(self, model_data: RasaModelData) -> None:
         """Checks if features have same dimensionality if hidden layers are shared."""
-
         if self.component_config.get(SHARE_HIDDEN_LAYERS):
             num_text_sentence_features = model_data.number_of_units(TEXT, SENTENCE)
             num_label_sentence_features = model_data.number_of_units(LABEL, SENTENCE)
@@ -519,7 +518,6 @@ def _extract_labels_precomputed_features(
         self, label_examples: List[Message], attribute: Text = INTENT
     ) -> Tuple[List[FeatureArray], List[FeatureArray]]:
         """Collects precomputed encodings."""
-
         features = defaultdict(list)
 
         for e in label_examples:
@@ -546,7 +544,6 @@ def _compute_default_label_features(
         labels_example: List[Message],
     ) -> List[FeatureArray]:
         """Computes one-hot representation for the labels."""
-
         logger.debug("No label features found. Computing default label features.")
 
         eye_matrix = np.eye(len(labels_example), dtype=np.float32)
@@ -571,7 +568,6 @@ def _create_label_data(
         If the features are already computed, fetch them from the message object
         else compute a one hot encoding for the label as the feature vector.
         """
-
         # Collect one example for each label
         labels_idx_examples = []
         for label_name, idx in label_id_dict.items():
@@ -731,7 +727,6 @@ def preprocess_train_data(self, training_data: TrainingData) -> RasaModelData:
 
         Performs sanity checks on training data, extracts encodings for labels.
         """
-
         if self.component_config[BILOU_FLAG]:
             bilou_utils.apply_bilou_schema(training_data)
 
@@ -887,7 +882,9 @@ def _predict_entities(
         if predict_out is None:
             return []
 
-        predicted_tags, confidence_values = self._entity_label_to_tags(predict_out)
+        predicted_tags, confidence_values = train_utils.entity_label_to_tags(
+            predict_out, self._entity_tag_specs, self.component_config[BILOU_FLAG]
+        )
 
         entities = self.convert_predictions_into_entities(
             message.get(TEXT),
@@ -902,31 +899,8 @@ def _predict_entities(
 
         return entities
 
-    def _entity_label_to_tags(
-        self, predict_out: Dict[Text, Any]
-    ) -> Tuple[Dict[Text, List[Text]], Dict[Text, List[float]]]:
-        predicted_tags = {}
-        confidence_values = {}
-
-        for tag_spec in self._entity_tag_specs:
-            predictions = predict_out[f"e_{tag_spec.tag_name}_ids"].numpy()
-            confidences = predict_out[f"e_{tag_spec.tag_name}_scores"].numpy()
-            confidences = [float(c) for c in confidences[0]]
-            tags = [tag_spec.ids_to_tags[p] for p in predictions[0]]
-
-            if self.component_config[BILOU_FLAG]:
-                tags, confidences = bilou_utils.ensure_consistent_bilou_tagging(
-                    tags, confidences
-                )
-
-            predicted_tags[tag_spec.tag_name] = tags
-            confidence_values[tag_spec.tag_name] = confidences
-
-        return predicted_tags, confidence_values
-
     def process(self, message: Message, **kwargs: Any) -> None:
         """Return the most likely label and its similarity to the input."""
-
         out = self._predict(message)
 
         if self.component_config[INTENT_CLASSIFICATION]:

diff --git a/rasa/nlu/extractors/extractor.py b/rasa/nlu/extractors/extractor.py
@@ -128,8 +128,8 @@ def filter_trainable_entities(
 
         return filtered
 
+    @staticmethod
     def convert_predictions_into_entities(
-        self,
         text: Text,
         tokens: List[Token],
         tags: Dict[Text, List[Text]],
@@ -158,16 +158,22 @@ def convert_predictions_into_entities(
         last_token_end = -1
 
         for idx, token in enumerate(tokens):
-            current_entity_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_TYPE, idx)
+            current_entity_tag = EntityExtractor.get_tag_for(
+                tags, ENTITY_ATTRIBUTE_TYPE, idx
+            )
 
             if current_entity_tag == NO_ENTITY_TAG:
                 last_entity_tag = NO_ENTITY_TAG
                 last_token_end = token.end
                 continue
 
-            current_group_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_GROUP, idx)
+            current_group_tag = EntityExtractor.get_tag_for(
+                tags, ENTITY_ATTRIBUTE_GROUP, idx
+            )
             current_group_tag = bilou_utils.tag_without_prefix(current_group_tag)
-            current_role_tag = self.get_tag_for(tags, ENTITY_ATTRIBUTE_ROLE, idx)
+            current_role_tag = EntityExtractor.get_tag_for(
+                tags, ENTITY_ATTRIBUTE_ROLE, idx
+            )
             current_role_tag = bilou_utils.tag_without_prefix(current_role_tag)
 
             group_or_role_changed = (
@@ -207,7 +213,7 @@ def convert_predictions_into_entities(
 
             if new_tag_found:
                 # new entity found
-                entity = self._create_new_entity(
+                entity = EntityExtractor._create_new_entity(
                     list(tags.keys()),
                     current_entity_tag,
                     current_group_tag,
@@ -217,7 +223,7 @@ def convert_predictions_into_entities(
                     confidences,
                 )
                 entities.append(entity)
-            elif self._check_is_single_entity(
+            elif EntityExtractor._check_is_single_entity(
                 text, token, last_token_end, split_entities_config, current_entity_tag
             ):
                 # current token has the same entity tag as the token before and
@@ -226,14 +232,16 @@ def convert_predictions_into_entities(
                 # and a whitespace.
                 entities[-1][ENTITY_ATTRIBUTE_END] = token.end
                 if confidences is not None:
-                    self._update_confidence_values(entities, confidences, idx)
+                    EntityExtractor._update_confidence_values(
+                        entities, confidences, idx
+                    )
 
             else:
                 # the token has the same entity tag as the token before but the two
                 # tokens are separated by at least 2 symbols (e.g. multiple spaces,
                 # a comma and a space, etc.) and also shouldn't be represented as a
                 # single entity
-                entity = self._create_new_entity(
+                entity = EntityExtractor._create_new_entity(
                     list(tags.keys()),
                     current_entity_tag,
                     current_group_tag,

diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
@@ -632,13 +632,76 @@ def as_dict(self) -> Dict[Text, Any]:
         return d
 
 
+# noinspection PyProtectedMember
+class DefinePrevUserUtteredEntities(Event):
+    """Event that is used to set entities on a previous user uttered event."""
+
+    type_name = "user_entities"
+
+    def __init__(
+        self,
+        entities: List[Dict[Text, Any]],
+        timestamp: Optional[float] = None,
+        metadata: Optional[Dict[Text, Any]] = None,
+    ) -> None:
+        """Initializes a DefinePrevUserUtteredEntities event.
+
+        Args:
+            entities: the entities of a previous user uttered event
+            timestamp: the timestamp
+            metadata: some optional metadata
+        """
+        self.entities = entities
+        super().__init__(timestamp, metadata)
+
+    def __str__(self) -> Text:
+        """Returns the string representation of the event."""
+        entity_str = [e[ENTITY_ATTRIBUTE_TYPE] for e in self.entities]
+        return f"DefinePrevUserUtteredEntities({entity_str})"
+
+    def __hash__(self) -> int:
+        """Returns the hash value of the event."""
+        return hash(self.entities)
+
+    def __eq__(self, other) -> bool:
+        """Compares this event with another event."""
+        return isinstance(other, DefinePrevUserUtteredEntities)
+
+    def as_story_string(self) -> None:
+        """Returns the event as story string.
+
+        Returns:
+            None, as this event should not appear inside the story.
+        """
+        return None
+
+    @classmethod
+    def _from_parameters(cls, parameters) -> "DefinePrevUserUtteredEntities":
+        return DefinePrevUserUtteredEntities(
+            parameters.get(ENTITIES),
+            parameters.get("timestamp"),
+            parameters.get("metadata"),
+        )
+
+    def as_dict(self) -> Dict[Text, Any]:
+        """Converts the event into a dict.
+
+        Returns:
+            A dict that represents this event.
+        """
+        d = super().as_dict()
+        d.update({ENTITIES: self.entities})
+        return d
+
+
 # noinspection PyProtectedMember
 class BotUttered(Event):
     """The bot has said something to the user.
 
     This class is not used in the story training as it is contained in the
 
-    ``ActionExecuted`` class. An entry is made in the ``Tracker``."""
+    ``ActionExecuted`` class. An entry is made in the ``Tracker``.
+    """
 
     type_name = "bot"