From d813bd571e2bf98aeb1934e6a4aa0a8f12eae31d Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 02:07:32 -0300 Subject: [PATCH 01/12] change variables names --- rasa/shared/core/slots.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/rasa/shared/core/slots.py b/rasa/shared/core/slots.py index 6e7b244efb83..ed722ec699d4 100644 --- a/rasa/shared/core/slots.py +++ b/rasa/shared/core/slots.py @@ -33,7 +33,6 @@ def __init__( influence_conversation: bool = True, ) -> None: """Create a Slot. - Args: name: The name of the slot. initial_value: The initial value of the slot. @@ -54,7 +53,6 @@ def __init__( def feature_dimensionality(self) -> int: """How many features this single slot creates. - Returns: The number of features. `0` if the slot is unfeaturized. The dimensionality of the array returned by `as_feature` needs to correspond to this value. @@ -74,7 +72,6 @@ def has_features(self) -> bool: def value_reset_delay(self) -> Optional[int]: """After how many turns the slot should be reset to the initial_value. - If the delay is set to `None`, the slot will keep its value forever.""" # TODO: FUTURE this needs to be implemented - slots are not reset yet return self._value_reset_delay @@ -186,7 +183,8 @@ def __init__( def _as_feature(self) -> List[float]: try: - capped_value = max(self.min_value, min(self.max_value, float(self.value))) + capped_value = max(self.min_value, min( + self.max_value, float(self.value))) if abs(self.max_value - self.min_value) > 0: covered_range = abs(self.max_value - self.min_value) else: @@ -197,10 +195,10 @@ def _as_feature(self) -> List[float]: def persistence_info(self) -> Dict[Text, Any]: """Returns relevant information to persist this slot.""" - d = super().persistence_info() - d["max_value"] = self.max_value - d["min_value"] = self.min_value - return d + dictionary_persistence = super().persistence_info() + dictionary_persistence["max_value"] = self.max_value + dictionary_persistence["min_value"] = self.min_value + return dictionary_persistence def _feature_dimensionality(self) -> int: return len(self.as_feature()) @@ -330,15 +328,15 @@ def add_default_value(self) -> None: def persistence_info(self) -> Dict[Text, Any]: """Returns serialized slot.""" - d = super().persistence_info() - d["values"] = [ + dictionary_serialized = super().persistence_info() + dictionary_serialized["values"] = [ value for value in self.values # Don't add default slot when persisting it. # We'll re-add it on the fly when creating the domain. if value != rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE ] - return d + return dictionary_serialized def _as_feature(self) -> List[float]: r = [0.0] * self.feature_dimensionality() From 279e6072c0dfdf7578978da44dd08ebbf566fb90 Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 02:09:27 -0300 Subject: [PATCH 02/12] change variable names to a expressive name --- .../core/training_data/visualization.py | 105 +++++++++--------- 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/rasa/shared/core/training_data/visualization.py b/rasa/shared/core/training_data/visualization.py index 53e73653af4d..90d593a2c37d 100644 --- a/rasa/shared/core/training_data/visualization.py +++ b/rasa/shared/core/training_data/visualization.py @@ -43,24 +43,23 @@ def _create_reverse_mapping( data: "TrainingData", ) -> Dict[Dict[Text, Any], List["Message"]]: """Create a mapping from intent to messages - This allows a faster intent lookup.""" - d = defaultdict(list) + dictionary_intent_to_messages = defaultdict(list) for example in data.training_examples: if example.get(INTENT, {}) is not None: - d[example.get(INTENT, {})].append(example) - return d + dictionary_intent_to_messages[example.get( + INTENT, {})].append(example) + return dictionary_intent_to_messages @staticmethod - def _contains_same_entity(entities: Dict[Text, Any], e: Dict[Text, Any]) -> bool: - return entities.get(e.get(ENTITY_ATTRIBUTE_TYPE)) is None or entities.get( - e.get(ENTITY_ATTRIBUTE_TYPE) - ) != e.get(ENTITY_ATTRIBUTE_VALUE) + def _contains_same_entity(entities: Dict[Text, Any], entity: Dict[Text, Any]) -> bool: + return entities.get(entity.get(ENTITY_ATTRIBUTE_TYPE)) is None or entities.get( + entity.get(ENTITY_ATTRIBUTE_TYPE) + ) != entity.get(ENTITY_ATTRIBUTE_VALUE) def message_for_data(self, structured_info: Dict[Text, Any]) -> Any: """Find a data sample with the same intent and entities. - Given the parsed data from a message (intent and entities) finds a message in the data that has the same intent and entities.""" @@ -70,11 +69,11 @@ def message_for_data(self, structured_info: Dict[Text, Any]) -> Any: random.shuffle(usable_examples) for example in usable_examples: entities = { - e.get(ENTITY_ATTRIBUTE_TYPE): e.get(ENTITY_ATTRIBUTE_VALUE) - for e in example.get(ENTITIES, []) + entity.get(ENTITY_ATTRIBUTE_TYPE): entity.get(ENTITY_ATTRIBUTE_VALUE) + for entity in example.get(ENTITIES, []) } - for e in structured_info.get(ENTITIES, []): - if self._contains_same_entity(entities, e): + for entity in structured_info.get(ENTITIES, []): + if self._contains_same_entity(entities, entity): break else: return example.get(TEXT) @@ -85,7 +84,6 @@ def _fingerprint_node( graph: "networkx.MultiDiGraph", node: int, max_history: int ) -> Set[Text]: """Fingerprint a node in a graph. - Can be used to identify nodes that are similar and can be merged within the graph. Generates all paths starting at `node` following the directed graph up to @@ -138,7 +136,6 @@ def _outgoing_edges_are_similar( ) -> bool: """If the outgoing edges from the two nodes are similar enough, it doesn't matter if you are in a or b. - As your path will be the same because the outgoing edges will lead you to the same nodes anyways.""" @@ -170,8 +167,8 @@ def _nodes_are_equivalent( def _add_edge( graph: "networkx.MultiDiGraph", - u: int, - v: int, + vertex_u: int, + vertex_v: int, key: Optional[Text], label: Optional[Text] = None, **kwargs: Any, @@ -185,18 +182,17 @@ def _add_edge( if key == EDGE_NONE_LABEL: label = "" - if not graph.has_edge(u, v, key=EDGE_NONE_LABEL): - graph.add_edge(u, v, key=key, label=label, **kwargs) + if not graph.has_edge(vertex_u, vertex_v, key=EDGE_NONE_LABEL): + graph.add_edge(vertex_u, vertex_v, key=key, label=label, **kwargs) else: - d = graph.get_edge_data(u, v, key=EDGE_NONE_LABEL) - _transfer_style(kwargs, d) + data = graph.get_edge_data(vertex_u, vertex_v, key=EDGE_NONE_LABEL) + _transfer_style(kwargs, data) def _transfer_style( source: Dict[Text, Any], target: Dict[Text, Any] ) -> Dict[Text, Any]: """Copy over class names from source to target for all special classes. - Used if a node is highlighted and merged with another node.""" clazzes = source.get("class", "") @@ -206,9 +202,9 @@ def _transfer_style( if "class" not in target: target["class"] = "" - for c in special_classes: - if c in clazzes and c not in target["class"]: - target["class"] += " " + c + for copy in special_classes: + if copy in clazzes and copy not in target["class"]: + target["class"] += " " + copy target["class"] = target["class"].strip() return target @@ -224,16 +220,17 @@ def _merge_equivalent_nodes(graph: "networkx.MultiDiGraph", max_history: int) -> while changed: changed = False remaining_node_ids = [n for n in graph.nodes() if n > 0] - for idx, i in enumerate(remaining_node_ids): + for index, i in enumerate(remaining_node_ids): if graph.has_node(i): # assumes node equivalence is cumulative - for j in remaining_node_ids[idx + 1 :]: + for j in remaining_node_ids[index + 1:]: if graph.has_node(j) and _nodes_are_equivalent( graph, i, j, max_history ): # make sure we keep special styles _transfer_style( - graph.nodes(data=True)[j], graph.nodes(data=True)[i] + graph.nodes(data=True)[ + j], graph.nodes(data=True)[i] ) changed = True @@ -252,7 +249,8 @@ def _merge_equivalent_nodes(graph: "networkx.MultiDiGraph", max_history: int) -> ) graph.remove_edge(j, succ_node) # moves all incoming edges to the other node - j_incoming_edges = list(graph.in_edges(j, keys=True, data=True)) + j_incoming_edges = list( + graph.in_edges(j, keys=True, data=True)) for prev_node, _, k, d in j_incoming_edges: _add_edge( graph, @@ -274,7 +272,6 @@ async def _replace_edge_labels_with_nodes( ) -> None: """User messages are created as edge labels. This removes the labels and creates nodes instead. - The algorithms (e.g. merging) are simpler if the user messages are labels on the edges. But it sometimes looks better if in the final graphs the user messages are nodes instead @@ -286,25 +283,27 @@ async def _replace_edge_labels_with_nodes( message_generator = None edges = list(graph.edges(keys=True, data=True)) - for s, e, k, d in edges: - if k != EDGE_NONE_LABEL: - if message_generator and d.get("label", k) is not None: - parsed_info = await interpreter.parse(d.get("label", k)) + for edge_label_s, edge_label_e, edge_label_k, edge_label_d in edges: + if edge_label_k != EDGE_NONE_LABEL: + if message_generator and edge_label_d.get("label", edge_label_k) is not None: + parsed_info = await interpreter.parse(edge_label_d.get("label", edge_label_k)) label = message_generator.message_for_data(parsed_info) else: - label = d.get("label", k) + label = edge_label_d.get("label", edge_label_k) next_id += 1 - graph.remove_edge(s, e, k) + graph.remove_edge(edge_label_s, edge_label_e, edge_label_k) graph.add_node( next_id, label=label, shape="rect", style="filled", fillcolor="lightblue", - **_transfer_style(d, {"class": "intent"}), + **_transfer_style(edge_label_d, {"class": "intent"}), ) - graph.add_edge(s, next_id, **{"class": d.get("class", "")}) - graph.add_edge(next_id, e, **{"class": d.get("class", "")}) + graph.add_edge(edge_label_s, next_id, ** + {"class": edge_label_d.get("class", "")}) + graph.add_edge(next_id, edge_label_e, ** + {"class": edge_label_d.get("class", "")}) def visualization_html_path() -> Text: @@ -326,7 +325,8 @@ def persist_graph(graph: "networkx.Graph", output_file: Text) -> None: graph_as_text = expg.to_string() # escape backslashes graph_as_text = graph_as_text.replace("\\", "\\\\") - template = template.replace("// { graph-content }", f"graph = `{graph_as_text}`", 1) + template = template.replace( + "// { graph-content }", f"graph = `{graph_as_text}`", 1) rasa.shared.utils.io.write_text_file(template, output_file) @@ -373,7 +373,8 @@ def _add_default_nodes(graph: "networkx.MultiDiGraph", fontsize: int = 12) -> No fontsize=fontsize, **{"class": "end"}, ) - graph.add_node(TMP_NODE_ID, label="TMP", style="invis", **{"class": "invisible"}) + graph.add_node(TMP_NODE_ID, label="TMP", + style="invis", **{"class": "invisible"}) def _create_graph(fontsize: int = 12) -> "networkx.MultiDiGraph": @@ -439,12 +440,12 @@ async def visualize_neighborhood( message = None current_node = START_NODE_ID - idx = 0 + index = 0 is_current = events == current - for idx, el in enumerate(events): + for index, el in enumerate(events): if not prefix: - idx -= 1 + index -= 1 break if isinstance(el, UserUttered): if not el.intent: @@ -452,7 +453,8 @@ async def visualize_neighborhood( else: message = el.parse_data elif ( - isinstance(el, ActionExecuted) and el.action_name != ACTION_LISTEN_NAME + isinstance( + el, ActionExecuted) and el.action_name != ACTION_LISTEN_NAME ): next_node_idx += 1 graph.add_node( @@ -475,8 +477,8 @@ async def visualize_neighborhood( # "END" or a "TMP" node if this is the active conversation if is_current: if ( - isinstance(events[idx], ActionExecuted) - and events[idx].action_name == ACTION_LISTEN_NAME + isinstance(events[index], ActionExecuted) + and events[index].action_name == ACTION_LISTEN_NAME ): next_node_idx += 1 graph.add_node( @@ -494,10 +496,11 @@ async def visualize_neighborhood( target = TMP_NODE_ID else: target = TMP_NODE_ID - elif idx == len(events) - 1: + elif index == len(events) - 1: target = END_NODE_ID elif current_node and current_node not in path_ellipsis_ends: - graph.add_node(special_node_idx, label="...", **{"class": "ellipsis"}) + graph.add_node(special_node_idx, label="...", + **{"class": "ellipsis"}) target = special_node_idx path_ellipsis_ends.add(current_node) special_node_idx -= 1 @@ -550,7 +553,6 @@ async def visualize_stories( fontsize: int = 12, ) -> "networkx.MultiDiGraph": """Given a set of stories, generates a graph visualizing the flows in the stories. - Visualization is always a trade off between making the graph as small as possible while at the same time making sure the meaning doesn't change to "much". The @@ -560,17 +562,14 @@ async def visualize_stories( the algorithm might create paths through the graph that aren't actually specified in the stories, but we try to minimize that. - Output file defines if and where a file containing the plotted graph should be stored. - The history defines how much 'memory' the graph has. This influences in which situations the algorithm will merge nodes. Nodes will only be merged if they are equal within the history, this means the larger the history is we take into account the less likely it is we merge any nodes. - The training data parameter can be used to pass in a Rasa NLU training data instance. It will be used to replace the user messages from the story file with actual From 01058347b44fed9297cf9a55ca09cc3ca57893fe Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 02:12:52 -0300 Subject: [PATCH 03/12] change variables names to a expressive one --- rasa/shared/nlu/training_data/message.py | 56 +++++++++--------------- 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py index f98b218185fc..6d80afacffb0 100644 --- a/rasa/shared/nlu/training_data/message.py +++ b/rasa/shared/nlu/training_data/message.py @@ -55,7 +55,6 @@ def add_features(self, features: Optional["Features"]) -> None: def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None: """Adds diagnostic data from the `origin` component. - Args: origin: Name of the component that created the data. data: The diagnostic data. @@ -71,7 +70,6 @@ def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None: def set(self, prop: Text, info: Any, add_to_output: bool = False) -> None: """Sets the message's property to the given value. - Args: prop: Name of the property to be set. info: Value to be assigned to that property. @@ -87,26 +85,26 @@ def get(self, prop: Text, default: Optional[Any] = None) -> Any: def as_dict_nlu(self) -> dict: """Get dict representation of message as it would appear in training data""" - d = self.as_dict() - if d.get(INTENT, None): - d[INTENT] = self.get_full_intent() - d.pop(RESPONSE, None) - d.pop(INTENT_RESPONSE_KEY, None) - return d + dictionary_message = self.as_dict() + if dictionary_message.get(INTENT, None): + dictionary_message[INTENT] = self.get_full_intent() + dictionary_message.pop(RESPONSE, None) + dictionary_message.pop(INTENT_RESPONSE_KEY, None) + return dictionary_message def as_dict(self, only_output_properties: bool = False) -> Dict: if only_output_properties: - d = { + dictionary_data = { key: value for key, value in self.data.items() if key in self.output_properties } else: - d = self.data + dictionary_data = self.data # Filter all keys with None value. These could have come while building the # Message object in markdown format - return {key: value for key, value in d.items() if value is not None} + return {key: value for key, value in dictionary_data.items() if value is not None} def __eq__(self, other: Any) -> bool: if not isinstance(other, Message): @@ -116,7 +114,6 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: """Calculate a hash for the message. - Returns: Hash of the message. """ @@ -124,7 +121,6 @@ def __hash__(self) -> int: def fingerprint(self) -> Text: """Calculate a string fingerprint for the message. - Returns: Fingerprint of the message. """ @@ -141,20 +137,19 @@ def build( **kwargs: Any, ) -> "Message": """Builds a Message from `UserUttered` data. - Args: text: text of a user's utterance intent: an intent of the user utterance entities: entities in the user's utterance intent_metadata: optional metadata for the intent example_metadata: optional metadata for the intent example - Returns: Message """ data: Dict[Text, Any] = {TEXT: text} if intent: - split_intent, response_key = cls.separate_intent_response_key(intent) + split_intent, response_key = cls.separate_intent_response_key( + intent) if split_intent: data[INTENT] = split_intent if response_key: @@ -209,13 +204,10 @@ def get_sparse_features( self, attribute: Text, featurizers: Optional[List[Text]] = None ) -> Tuple[Optional["Features"], Optional["Features"]]: """Gets all sparse features for the attribute given the list of featurizers. - If no featurizers are provided, all available features will be considered. - Args: attribute: message attribute featurizers: names of featurizers to consider - Returns: Sparse features. """ @@ -226,8 +218,10 @@ def get_sparse_features( attribute, featurizers ) - sequence_features = self._combine_features(sequence_features, featurizers) - sentence_features = self._combine_features(sentence_features, featurizers) + sequence_features = self._combine_features( + sequence_features, featurizers) + sentence_features = self._combine_features( + sentence_features, featurizers) return sequence_features, sentence_features @@ -235,13 +229,10 @@ def get_dense_features( self, attribute: Text, featurizers: Optional[List[Text]] = None ) -> Tuple[Optional["Features"], Optional["Features"]]: """Gets all dense features for the attribute given the list of featurizers. - If no featurizers are provided, all available features will be considered. - Args: attribute: message attribute featurizers: names of featurizers to consider - Returns: Dense features. """ @@ -252,8 +243,10 @@ def get_dense_features( attribute, featurizers ) - sequence_features = self._combine_features(sequence_features, featurizers) - sentence_features = self._combine_features(sentence_features, featurizers) + sequence_features = self._combine_features( + sequence_features, featurizers) + sentence_features = self._combine_features( + sentence_features, featurizers) return sequence_features, sentence_features @@ -261,13 +254,10 @@ def get_all_features( self, attribute: Text, featurizers: Optional[List[Text]] = None ) -> List["Features"]: """Gets all features for the attribute given the list of featurizers. - If no featurizers are provided, all available features will be considered. - Args: attribute: message attribute featurizers: names of featurizers to consider - Returns: Features. """ @@ -280,13 +270,10 @@ def features_present( self, attribute: Text, featurizers: Optional[List[Text]] = None ) -> bool: """Checks if there are any features present for the attribute and featurizers. - If no featurizers are provided, all available features will be considered. - Args: attribute: Message attribute. featurizers: Names of featurizers to consider. - Returns: ``True``, if features are present, ``False`` otherwise. """ @@ -368,10 +355,8 @@ def _combine_features( def is_core_or_domain_message(self) -> bool: """Checks whether the message is a core message or from the domain. - E.g. a core message is created from a story or a domain action, not from the NLU data. - Returns: True, if message is a core or domain message, false otherwise. """ @@ -390,7 +375,6 @@ def is_core_or_domain_message(self) -> bool: def is_e2e_message(self) -> bool: """Checks whether the message came from an e2e story. - Returns: `True`, if message is a from an e2e story, `False` otherwise. """ @@ -412,7 +396,7 @@ def find_overlapping_entities( entities_with_location.sort(key=lambda e: e[ENTITY_ATTRIBUTE_START]) overlapping_pairs: List[Tuple[Dict[Text, Any], Dict[Text, Any]]] = [] for i, entity in enumerate(entities_with_location): - for other_entity in entities_with_location[i + 1 :]: + for other_entity in entities_with_location[i + 1:]: if other_entity[ENTITY_ATTRIBUTE_START] < entity[ENTITY_ATTRIBUTE_END]: overlapping_pairs.append((entity, other_entity)) else: From 0f2886d28e08171e34f57502cd9f243f205b1449 Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 02:35:20 -0300 Subject: [PATCH 04/12] fix code quality issues --- rasa/shared/core/training_data/visualization.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rasa/shared/core/training_data/visualization.py b/rasa/shared/core/training_data/visualization.py index 90d593a2c37d..0581df48ce64 100644 --- a/rasa/shared/core/training_data/visualization.py +++ b/rasa/shared/core/training_data/visualization.py @@ -300,10 +300,10 @@ async def _replace_edge_labels_with_nodes( fillcolor="lightblue", **_transfer_style(edge_label_d, {"class": "intent"}), ) - graph.add_edge(edge_label_s, next_id, ** - {"class": edge_label_d.get("class", "")}) - graph.add_edge(next_id, edge_label_e, ** - {"class": edge_label_d.get("class", "")}) + graph.add_edge(edge_label_s, next_id, + ** {"class": edge_label_d.get("class", "")}) + graph.add_edge(next_id, edge_label_e, + ** {"class": edge_label_d.get("class", "")}) def visualization_html_path() -> Text: From 0b4e0e53ea76cf8f8338bbaa0375641d72ca3b44 Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 02:49:59 -0300 Subject: [PATCH 05/12] fix code quality issues --- .../core/training_data/visualization.py | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/rasa/shared/core/training_data/visualization.py b/rasa/shared/core/training_data/visualization.py index 0581df48ce64..d7f9ccd1ffef 100644 --- a/rasa/shared/core/training_data/visualization.py +++ b/rasa/shared/core/training_data/visualization.py @@ -48,12 +48,13 @@ def _create_reverse_mapping( dictionary_intent_to_messages = defaultdict(list) for example in data.training_examples: if example.get(INTENT, {}) is not None: - dictionary_intent_to_messages[example.get( - INTENT, {})].append(example) + dictionary_intent_to_messages[example.get(INTENT, {})].append(example) return dictionary_intent_to_messages @staticmethod - def _contains_same_entity(entities: Dict[Text, Any], entity: Dict[Text, Any]) -> bool: + def _contains_same_entity( + entities: Dict[Text, Any], entity: Dict[Text, Any] + ) -> bool: return entities.get(entity.get(ENTITY_ATTRIBUTE_TYPE)) is None or entities.get( entity.get(ENTITY_ATTRIBUTE_TYPE) ) != entity.get(ENTITY_ATTRIBUTE_VALUE) @@ -69,7 +70,9 @@ def message_for_data(self, structured_info: Dict[Text, Any]) -> Any: random.shuffle(usable_examples) for example in usable_examples: entities = { - entity.get(ENTITY_ATTRIBUTE_TYPE): entity.get(ENTITY_ATTRIBUTE_VALUE) + entity.get(ENTITY_ATTRIBUTE_TYPE): entity.get( + ENTITY_ATTRIBUTE_VALUE + ) for entity in example.get(ENTITIES, []) } for entity in structured_info.get(ENTITIES, []): @@ -223,14 +226,13 @@ def _merge_equivalent_nodes(graph: "networkx.MultiDiGraph", max_history: int) -> for index, i in enumerate(remaining_node_ids): if graph.has_node(i): # assumes node equivalence is cumulative - for j in remaining_node_ids[index + 1:]: + for j in remaining_node_ids[index + 1 :]: if graph.has_node(j) and _nodes_are_equivalent( graph, i, j, max_history ): # make sure we keep special styles _transfer_style( - graph.nodes(data=True)[ - j], graph.nodes(data=True)[i] + graph.nodes(data=True)[j], graph.nodes(data=True)[i] ) changed = True @@ -249,8 +251,7 @@ def _merge_equivalent_nodes(graph: "networkx.MultiDiGraph", max_history: int) -> ) graph.remove_edge(j, succ_node) # moves all incoming edges to the other node - j_incoming_edges = list( - graph.in_edges(j, keys=True, data=True)) + j_incoming_edges = list(graph.in_edges(j, keys=True, data=True)) for prev_node, _, k, d in j_incoming_edges: _add_edge( graph, @@ -285,8 +286,13 @@ async def _replace_edge_labels_with_nodes( edges = list(graph.edges(keys=True, data=True)) for edge_label_s, edge_label_e, edge_label_k, edge_label_d in edges: if edge_label_k != EDGE_NONE_LABEL: - if message_generator and edge_label_d.get("label", edge_label_k) is not None: - parsed_info = await interpreter.parse(edge_label_d.get("label", edge_label_k)) + if ( + message_generator + and edge_label_d.get("label", edge_label_k) is not None + ): + parsed_info = await interpreter.parse( + edge_label_d.get("label", edge_label_k) + ) label = message_generator.message_for_data(parsed_info) else: label = edge_label_d.get("label", edge_label_k) @@ -300,10 +306,12 @@ async def _replace_edge_labels_with_nodes( fillcolor="lightblue", **_transfer_style(edge_label_d, {"class": "intent"}), ) - graph.add_edge(edge_label_s, next_id, - ** {"class": edge_label_d.get("class", "")}) - graph.add_edge(next_id, edge_label_e, - ** {"class": edge_label_d.get("class", "")}) + graph.add_edge( + edge_label_s, next_id, **{"class": edge_label_d.get("class", "")} + ) + graph.add_edge( + next_id, edge_label_e, **{"class": edge_label_d.get("class", "")} + ) def visualization_html_path() -> Text: @@ -325,8 +333,7 @@ def persist_graph(graph: "networkx.Graph", output_file: Text) -> None: graph_as_text = expg.to_string() # escape backslashes graph_as_text = graph_as_text.replace("\\", "\\\\") - template = template.replace( - "// { graph-content }", f"graph = `{graph_as_text}`", 1) + template = template.replace("// { graph-content }", f"graph = `{graph_as_text}`", 1) rasa.shared.utils.io.write_text_file(template, output_file) @@ -373,8 +380,7 @@ def _add_default_nodes(graph: "networkx.MultiDiGraph", fontsize: int = 12) -> No fontsize=fontsize, **{"class": "end"}, ) - graph.add_node(TMP_NODE_ID, label="TMP", - style="invis", **{"class": "invisible"}) + graph.add_node(TMP_NODE_ID, label="TMP", style="invis", **{"class": "invisible"}) def _create_graph(fontsize: int = 12) -> "networkx.MultiDiGraph": @@ -453,8 +459,7 @@ async def visualize_neighborhood( else: message = el.parse_data elif ( - isinstance( - el, ActionExecuted) and el.action_name != ACTION_LISTEN_NAME + isinstance(el, ActionExecuted) and el.action_name != ACTION_LISTEN_NAME ): next_node_idx += 1 graph.add_node( @@ -499,8 +504,7 @@ async def visualize_neighborhood( elif index == len(events) - 1: target = END_NODE_ID elif current_node and current_node not in path_ellipsis_ends: - graph.add_node(special_node_idx, label="...", - **{"class": "ellipsis"}) + graph.add_node(special_node_idx, label="...", **{"class": "ellipsis"}) target = special_node_idx path_ellipsis_ends.add(current_node) special_node_idx -= 1 From fca85e01a1c3c7451e5a4b9fd7611741bdaf37b7 Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 02:51:01 -0300 Subject: [PATCH 06/12] fix code quality issues --- rasa/shared/core/slots.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rasa/shared/core/slots.py b/rasa/shared/core/slots.py index ed722ec699d4..51dcef00f779 100644 --- a/rasa/shared/core/slots.py +++ b/rasa/shared/core/slots.py @@ -183,8 +183,7 @@ def __init__( def _as_feature(self) -> List[float]: try: - capped_value = max(self.min_value, min( - self.max_value, float(self.value))) + capped_value = max(self.min_value, min(self.max_value, float(self.value))) if abs(self.max_value - self.min_value) > 0: covered_range = abs(self.max_value - self.min_value) else: From e3622a277f3d3eb750bc54bfe3a94d023c742030 Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 02:52:38 -0300 Subject: [PATCH 07/12] fix code quality issues --- rasa/shared/nlu/training_data/message.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py index 6d80afacffb0..b6a75f9d1b71 100644 --- a/rasa/shared/nlu/training_data/message.py +++ b/rasa/shared/nlu/training_data/message.py @@ -104,7 +104,9 @@ def as_dict(self, only_output_properties: bool = False) -> Dict: # Filter all keys with None value. These could have come while building the # Message object in markdown format - return {key: value for key, value in dictionary_data.items() if value is not None} + return { + key: value for key, value in dictionary_data.items() if value is not None + } def __eq__(self, other: Any) -> bool: if not isinstance(other, Message): @@ -148,8 +150,7 @@ def build( """ data: Dict[Text, Any] = {TEXT: text} if intent: - split_intent, response_key = cls.separate_intent_response_key( - intent) + split_intent, response_key = cls.separate_intent_response_key(intent) if split_intent: data[INTENT] = split_intent if response_key: @@ -218,10 +219,8 @@ def get_sparse_features( attribute, featurizers ) - sequence_features = self._combine_features( - sequence_features, featurizers) - sentence_features = self._combine_features( - sentence_features, featurizers) + sequence_features = self._combine_features(sequence_features, featurizers) + sentence_features = self._combine_features(sentence_features, featurizers) return sequence_features, sentence_features @@ -243,10 +242,8 @@ def get_dense_features( attribute, featurizers ) - sequence_features = self._combine_features( - sequence_features, featurizers) - sentence_features = self._combine_features( - sentence_features, featurizers) + sequence_features = self._combine_features(sequence_features, featurizers) + sentence_features = self._combine_features(sentence_features, featurizers) return sequence_features, sentence_features @@ -396,7 +393,7 @@ def find_overlapping_entities( entities_with_location.sort(key=lambda e: e[ENTITY_ATTRIBUTE_START]) overlapping_pairs: List[Tuple[Dict[Text, Any], Dict[Text, Any]]] = [] for i, entity in enumerate(entities_with_location): - for other_entity in entities_with_location[i + 1:]: + for other_entity in entities_with_location[i + 1 :]: if other_entity[ENTITY_ATTRIBUTE_START] < entity[ENTITY_ATTRIBUTE_END]: overlapping_pairs.append((entity, other_entity)) else: From 7a9b08f89b8a64978d9e4d083dcbe83419467eed Mon Sep 17 00:00:00 2001 From: joao vitor silva Date: Thu, 22 Apr 2021 10:05:15 -0300 Subject: [PATCH 08/12] Fix code quality --- rasa/shared/core/slots.py | 831 +++++------ .../core/training_data/visualization.py | 1210 ++++++++--------- rasa/shared/nlu/training_data/message.py | 802 +++++------ 3 files changed, 1422 insertions(+), 1421 deletions(-) diff --git a/rasa/shared/core/slots.py b/rasa/shared/core/slots.py index 51dcef00f779..45a086ddf8fe 100644 --- a/rasa/shared/core/slots.py +++ b/rasa/shared/core/slots.py @@ -1,415 +1,416 @@ -import logging - -from typing import Any, Dict, List, Optional, Text, Type - -import rasa.shared.core.constants -from rasa.shared.exceptions import RasaException -import rasa.shared.utils.common -import rasa.shared.utils.io -from rasa.shared.constants import DOCS_URL_SLOTS - -logger = logging.getLogger(__name__) - - -class InvalidSlotTypeException(RasaException): - """Raised if a slot type is invalid.""" - - -class InvalidSlotConfigError(RasaException, ValueError): - """Raised if a slot's config is invalid.""" - - -class Slot: - """Key-value store for storing information during a conversation.""" - - type_name = None - - def __init__( - self, - name: Text, - initial_value: Any = None, - value_reset_delay: Optional[int] = None, - auto_fill: bool = True, - influence_conversation: bool = True, - ) -> None: - """Create a Slot. - Args: - name: The name of the slot. - initial_value: The initial value of the slot. - value_reset_delay: After how many turns the slot should be reset to the - initial_value. This is behavior is currently not implemented. - auto_fill: `True` if the slot should be filled automatically by entities - with the same name. - influence_conversation: If `True` the slot will be featurized and hence - influence the predictions of the dialogue polices. - """ - self.name = name - self._value = initial_value - self.initial_value = initial_value - self._value_reset_delay = value_reset_delay - self.auto_fill = auto_fill - self.influence_conversation = influence_conversation - self._has_been_set = False - - def feature_dimensionality(self) -> int: - """How many features this single slot creates. - Returns: - The number of features. `0` if the slot is unfeaturized. The dimensionality - of the array returned by `as_feature` needs to correspond to this value. - """ - if not self.influence_conversation: - return 0 - - return self._feature_dimensionality() - - def _feature_dimensionality(self) -> int: - """See the docstring for `feature_dimensionality`.""" - return 1 - - def has_features(self) -> bool: - """Indicate if the slot creates any features.""" - return self.feature_dimensionality() != 0 - - def value_reset_delay(self) -> Optional[int]: - """After how many turns the slot should be reset to the initial_value. - If the delay is set to `None`, the slot will keep its value forever.""" - # TODO: FUTURE this needs to be implemented - slots are not reset yet - return self._value_reset_delay - - def as_feature(self) -> List[float]: - if not self.influence_conversation: - return [] - - return self._as_feature() - - def _as_feature(self) -> List[float]: - raise NotImplementedError( - "Each slot type needs to specify how its " - "value can be converted to a feature. Slot " - "'{}' is a generic slot that can not be used " - "for predictions. Make sure you add this " - "slot to your domain definition, specifying " - "the type of the slot. If you implemented " - "a custom slot type class, make sure to " - "implement `.as_feature()`." - "".format(self.name) - ) - - def reset(self) -> None: - """Resets the slot's value to the initial value.""" - self.value = self.initial_value - self._has_been_set = False - - @property - def value(self) -> Any: - """Gets the slot's value.""" - return self._value - - @value.setter - def value(self, value: Any) -> None: - """Sets the slot's value.""" - self._value = value - self._has_been_set = True - - @property - def has_been_set(self) -> bool: - """Indicates if the slot's value has been set.""" - return self._has_been_set - - def __str__(self) -> Text: - return f"{self.__class__.__name__}({self.name}: {self.value})" - - def __repr__(self) -> Text: - return f"<{self.__class__.__name__}({self.name}: {self.value})>" - - @staticmethod - def resolve_by_type(type_name: Text) -> Type["Slot"]: - """Returns a slots class by its type name.""" - for cls in rasa.shared.utils.common.all_subclasses(Slot): - if cls.type_name == type_name: - return cls - try: - return rasa.shared.utils.common.class_from_module_path(type_name) - except (ImportError, AttributeError): - raise InvalidSlotTypeException( - f"Failed to find slot type, '{type_name}' is neither a known type nor " - f"user-defined. If you are creating your own slot type, make " - f"sure its module path is correct. " - f"You can find all build in types at {DOCS_URL_SLOTS}" - ) - - def persistence_info(self) -> Dict[str, Any]: - return { - "type": rasa.shared.utils.common.module_path_from_instance(self), - "initial_value": self.initial_value, - "auto_fill": self.auto_fill, - "influence_conversation": self.influence_conversation, - } - - -class FloatSlot(Slot): - type_name = "float" - - def __init__( - self, - name: Text, - initial_value: Optional[float] = None, - value_reset_delay: Optional[int] = None, - auto_fill: bool = True, - max_value: float = 1.0, - min_value: float = 0.0, - influence_conversation: bool = True, - ) -> None: - super().__init__( - name, initial_value, value_reset_delay, auto_fill, influence_conversation - ) - self.max_value = max_value - self.min_value = min_value - - if min_value >= max_value: - raise InvalidSlotConfigError( - "Float slot ('{}') created with an invalid range " - "using min ({}) and max ({}) values. Make sure " - "min is smaller than max." - "".format(self.name, self.min_value, self.max_value) - ) - - if initial_value is not None and not (min_value <= initial_value <= max_value): - rasa.shared.utils.io.raise_warning( - f"Float slot ('{self.name}') created with an initial value " - f"{self.value}. This value is outside of the configured min " - f"({self.min_value}) and max ({self.max_value}) values." - ) - - def _as_feature(self) -> List[float]: - try: - capped_value = max(self.min_value, min(self.max_value, float(self.value))) - if abs(self.max_value - self.min_value) > 0: - covered_range = abs(self.max_value - self.min_value) - else: - covered_range = 1 - return [1.0, (capped_value - self.min_value) / covered_range] - except (TypeError, ValueError): - return [0.0, 0.0] - - def persistence_info(self) -> Dict[Text, Any]: - """Returns relevant information to persist this slot.""" - dictionary_persistence = super().persistence_info() - dictionary_persistence["max_value"] = self.max_value - dictionary_persistence["min_value"] = self.min_value - return dictionary_persistence - - def _feature_dimensionality(self) -> int: - return len(self.as_feature()) - - -class BooleanSlot(Slot): - """A slot storing a truth value.""" - - type_name = "bool" - - def _as_feature(self) -> List[float]: - try: - if self.value is not None: - return [1.0, float(bool_from_any(self.value))] - else: - return [0.0, 0.0] - except (TypeError, ValueError): - # we couldn't convert the value to float - using default value - return [0.0, 0.0] - - def _feature_dimensionality(self) -> int: - return len(self.as_feature()) - - -def bool_from_any(x: Any) -> bool: - """ Converts bool/float/int/str to bool or raises error """ - - if isinstance(x, bool): - return x - elif isinstance(x, (float, int)): - return x == 1.0 - elif isinstance(x, str): - if x.isnumeric(): - return float(x) == 1.0 - elif x.strip().lower() == "true": - return True - elif x.strip().lower() == "false": - return False - else: - raise ValueError("Cannot convert string to bool") - else: - raise TypeError("Cannot convert to bool") - - -class TextSlot(Slot): - type_name = "text" - - def _as_feature(self) -> List[float]: - return [1.0 if self.value is not None else 0.0] - - -class ListSlot(Slot): - type_name = "list" - - def _as_feature(self) -> List[float]: - try: - if self.value is not None and len(self.value) > 0: - return [1.0] - else: - return [0.0] - except (TypeError, ValueError): - # we couldn't convert the value to a list - using default value - return [0.0] - - -class UnfeaturizedSlot(Slot): - type_name = "unfeaturized" - - def __init__( - self, - name: Text, - initial_value: Any = None, - value_reset_delay: Optional[int] = None, - auto_fill: bool = True, - influence_conversation: bool = False, - ) -> None: - if influence_conversation: - raise InvalidSlotConfigError( - f"An {UnfeaturizedSlot.__name__} cannot be featurized. " - f"Please use a different slot type for slot '{name}' instead. See the " - f"documentation for more information: {DOCS_URL_SLOTS}" - ) - - rasa.shared.utils.io.raise_warning( - f"{UnfeaturizedSlot.__name__} is deprecated " - f"and will be removed in Rasa Open Source " - f"3.0. Please change the type and configure the 'influence_conversation' " - f"flag for slot '{name}' instead.", - docs=DOCS_URL_SLOTS, - category=FutureWarning, - ) - - super().__init__( - name, initial_value, value_reset_delay, auto_fill, influence_conversation - ) - - def _as_feature(self) -> List[float]: - return [] - - def _feature_dimensionality(self) -> int: - return 0 - - -class CategoricalSlot(Slot): - type_name = "categorical" - - def __init__( - self, - name: Text, - values: Optional[List[Any]] = None, - initial_value: Any = None, - value_reset_delay: Optional[int] = None, - auto_fill: bool = True, - influence_conversation: bool = True, - ) -> None: - super().__init__( - name, initial_value, value_reset_delay, auto_fill, influence_conversation - ) - self.values = [str(v).lower() for v in values] if values else [] - - def add_default_value(self) -> None: - values = set(self.values) - if rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE not in values: - self.values.append( - rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE - ) - - def persistence_info(self) -> Dict[Text, Any]: - """Returns serialized slot.""" - dictionary_serialized = super().persistence_info() - dictionary_serialized["values"] = [ - value - for value in self.values - # Don't add default slot when persisting it. - # We'll re-add it on the fly when creating the domain. - if value != rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE - ] - return dictionary_serialized - - def _as_feature(self) -> List[float]: - r = [0.0] * self.feature_dimensionality() - - try: - for i, v in enumerate(self.values): - if v == str(self.value).lower(): - r[i] = 1.0 - break - else: - if self.value is not None: - if ( - rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE - in self.values - ): - i = self.values.index( - rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE - ) - r[i] = 1.0 - else: - rasa.shared.utils.io.raise_warning( - f"Categorical slot '{self.name}' is set to a value " - f"('{self.value}') " - "that is not specified in the domain. " - "Value will be ignored and the slot will " - "behave as if no value is set. " - "Make sure to add all values a categorical " - "slot should store to the domain." - ) - except (TypeError, ValueError): - logger.exception("Failed to featurize categorical slot.") - return r - return r - - def _feature_dimensionality(self) -> int: - return len(self.values) - - -class AnySlot(Slot): - """Slot which can be used to store any value. Users need to create a subclass of - `Slot` in case the information is supposed to get featurized.""" - - type_name = "any" - - def __init__( - self, - name: Text, - initial_value: Any = None, - value_reset_delay: Optional[int] = None, - auto_fill: bool = True, - influence_conversation: bool = False, - ) -> None: - if influence_conversation: - raise InvalidSlotConfigError( - f"An {AnySlot.__name__} cannot be featurized. " - f"Please use a different slot type for slot '{name}' instead. If you " - f"need to featurize a data type which is not supported out of the box, " - f"implement a custom slot type by subclassing '{Slot.__name__}'. " - f"See the documentation for more information: {DOCS_URL_SLOTS}" - ) - - super().__init__( - name, initial_value, value_reset_delay, auto_fill, influence_conversation - ) - - def __eq__(self, other: Any) -> bool: - """Compares object with other object.""" - if not isinstance(other, AnySlot): - return NotImplemented - - return ( - self.name == other.name - and self.initial_value == other.initial_value - and self._value_reset_delay == other._value_reset_delay - and self.auto_fill == other.auto_fill - and self.value == other.value - ) +import logging + +from typing import Any, Dict, List, Optional, Text, Type + +import rasa.shared.core.constants +from rasa.shared.exceptions import RasaException +import rasa.shared.utils.common +import rasa.shared.utils.io +from rasa.shared.constants import DOCS_URL_SLOTS + +logger = logging.getLogger(__name__) + + +class InvalidSlotTypeException(RasaException): + """Raised if a slot type is invalid.""" + + +class InvalidSlotConfigError(RasaException, ValueError): + """Raised if a slot's config is invalid.""" + + +class Slot: + """Key-value store for storing information during a conversation.""" + + type_name = None + + def __init__( + self, + name: Text, + initial_value: Any = None, + value_reset_delay: Optional[int] = None, + auto_fill: bool = True, + influence_conversation: bool = True, + ) -> None: + + """Create a Slot. + Args: + name: The name of the slot. + initial_value: The initial value of the slot. + value_reset_delay: After how many turns the slot should be reset to the + initial_value. This is behavior is currently not implemented. + auto_fill: `True` if the slot should be filled automatically by entities + with the same name. + influence_conversation: If `True` the slot will be featurized and hence + influence the predictions of the dialogue polices. + """ + self.name = name + self._value = initial_value + self.initial_value = initial_value + self._value_reset_delay = value_reset_delay + self.auto_fill = auto_fill + self.influence_conversation = influence_conversation + self._has_been_set = False + + def feature_dimensionality(self) -> int: + """How many features this single slot creates. + Returns: + The number of features. `0` if the slot is unfeaturized. The dimensionality + of the array returned by `as_feature` needs to correspond to this value. + """ + if not self.influence_conversation: + return 0 + + return self._feature_dimensionality() + + def _feature_dimensionality(self) -> int: + """See the docstring for `feature_dimensionality`.""" + return 1 + + def has_features(self) -> bool: + """Indicate if the slot creates any features.""" + return self.feature_dimensionality() != 0 + + def value_reset_delay(self) -> Optional[int]: + """After how many turns the slot should be reset to the initial_value. + If the delay is set to `None`, the slot will keep its value forever.""" + # TODO: FUTURE this needs to be implemented - slots are not reset yet + return self._value_reset_delay + + def as_feature(self) -> List[float]: + if not self.influence_conversation: + return [] + + return self._as_feature() + + def _as_feature(self) -> List[float]: + raise NotImplementedError( + "Each slot type needs to specify how its " + "value can be converted to a feature. Slot " + "'{}' is a generic slot that can not be used " + "for predictions. Make sure you add this " + "slot to your domain definition, specifying " + "the type of the slot. If you implemented " + "a custom slot type class, make sure to " + "implement `.as_feature()`." + "".format(self.name) + ) + + def reset(self) -> None: + """Resets the slot's value to the initial value.""" + self.value = self.initial_value + self._has_been_set = False + + @property + def value(self) -> Any: + """Gets the slot's value.""" + return self._value + + @value.setter + def value(self, value: Any) -> None: + """Sets the slot's value.""" + self._value = value + self._has_been_set = True + + @property + def has_been_set(self) -> bool: + """Indicates if the slot's value has been set.""" + return self._has_been_set + + def __str__(self) -> Text: + return f"{self.__class__.__name__}({self.name}: {self.value})" + + def __repr__(self) -> Text: + return f"<{self.__class__.__name__}({self.name}: {self.value})>" + + @staticmethod + def resolve_by_type(type_name: Text) -> Type["Slot"]: + """Returns a slots class by its type name.""" + for cls in rasa.shared.utils.common.all_subclasses(Slot): + if cls.type_name == type_name: + return cls + try: + return rasa.shared.utils.common.class_from_module_path(type_name) + except (ImportError, AttributeError): + raise InvalidSlotTypeException( + f"Failed to find slot type, '{type_name}' is neither a known type nor " + f"user-defined. If you are creating your own slot type, make " + f"sure its module path is correct. " + f"You can find all build in types at {DOCS_URL_SLOTS}" + ) + + def persistence_info(self) -> Dict[str, Any]: + return { + "type": rasa.shared.utils.common.module_path_from_instance(self), + "initial_value": self.initial_value, + "auto_fill": self.auto_fill, + "influence_conversation": self.influence_conversation, + } + + +class FloatSlot(Slot): + type_name = "float" + + def __init__( + self, + name: Text, + initial_value: Optional[float] = None, + value_reset_delay: Optional[int] = None, + auto_fill: bool = True, + max_value: float = 1.0, + min_value: float = 0.0, + influence_conversation: bool = True, + ) -> None: + super().__init__( + name, initial_value, value_reset_delay, auto_fill, influence_conversation + ) + self.max_value = max_value + self.min_value = min_value + + if min_value >= max_value: + raise InvalidSlotConfigError( + "Float slot ('{}') created with an invalid range " + "using min ({}) and max ({}) values. Make sure " + "min is smaller than max." + "".format(self.name, self.min_value, self.max_value) + ) + + if initial_value is not None and not (min_value <= initial_value <= max_value): + rasa.shared.utils.io.raise_warning( + f"Float slot ('{self.name}') created with an initial value " + f"{self.value}. This value is outside of the configured min " + f"({self.min_value}) and max ({self.max_value}) values." + ) + + def _as_feature(self) -> List[float]: + try: + capped_value = max(self.min_value, min(self.max_value, float(self.value))) + if abs(self.max_value - self.min_value) > 0: + covered_range = abs(self.max_value - self.min_value) + else: + covered_range = 1 + return [1.0, (capped_value - self.min_value) / covered_range] + except (TypeError, ValueError): + return [0.0, 0.0] + + def persistence_info(self) -> Dict[Text, Any]: + """Returns relevant information to persist this slot.""" + dictionary_persistence = super().persistence_info() + dictionary_persistence["max_value"] = self.max_value + dictionary_persistence["min_value"] = self.min_value + return dictionary_persistence + + def _feature_dimensionality(self) -> int: + return len(self.as_feature()) + + +class BooleanSlot(Slot): + """A slot storing a truth value.""" + + type_name = "bool" + + def _as_feature(self) -> List[float]: + try: + if self.value is not None: + return [1.0, float(bool_from_any(self.value))] + else: + return [0.0, 0.0] + except (TypeError, ValueError): + # we couldn't convert the value to float - using default value + return [0.0, 0.0] + + def _feature_dimensionality(self) -> int: + return len(self.as_feature()) + + +def bool_from_any(x: Any) -> bool: + """ Converts bool/float/int/str to bool or raises error """ + + if isinstance(x, bool): + return x + elif isinstance(x, (float, int)): + return x == 1.0 + elif isinstance(x, str): + if x.isnumeric(): + return float(x) == 1.0 + elif x.strip().lower() == "true": + return True + elif x.strip().lower() == "false": + return False + else: + raise ValueError("Cannot convert string to bool") + else: + raise TypeError("Cannot convert to bool") + + +class TextSlot(Slot): + type_name = "text" + + def _as_feature(self) -> List[float]: + return [1.0 if self.value is not None else 0.0] + + +class ListSlot(Slot): + type_name = "list" + + def _as_feature(self) -> List[float]: + try: + if self.value is not None and len(self.value) > 0: + return [1.0] + else: + return [0.0] + except (TypeError, ValueError): + # we couldn't convert the value to a list - using default value + return [0.0] + + +class UnfeaturizedSlot(Slot): + type_name = "unfeaturized" + + def __init__( + self, + name: Text, + initial_value: Any = None, + value_reset_delay: Optional[int] = None, + auto_fill: bool = True, + influence_conversation: bool = False, + ) -> None: + if influence_conversation: + raise InvalidSlotConfigError( + f"An {UnfeaturizedSlot.__name__} cannot be featurized. " + f"Please use a different slot type for slot '{name}' instead. See the " + f"documentation for more information: {DOCS_URL_SLOTS}" + ) + + rasa.shared.utils.io.raise_warning( + f"{UnfeaturizedSlot.__name__} is deprecated " + f"and will be removed in Rasa Open Source " + f"3.0. Please change the type and configure the 'influence_conversation' " + f"flag for slot '{name}' instead.", + docs=DOCS_URL_SLOTS, + category=FutureWarning, + ) + + super().__init__( + name, initial_value, value_reset_delay, auto_fill, influence_conversation + ) + + def _as_feature(self) -> List[float]: + return [] + + def _feature_dimensionality(self) -> int: + return 0 + + +class CategoricalSlot(Slot): + type_name = "categorical" + + def __init__( + self, + name: Text, + values: Optional[List[Any]] = None, + initial_value: Any = None, + value_reset_delay: Optional[int] = None, + auto_fill: bool = True, + influence_conversation: bool = True, + ) -> None: + super().__init__( + name, initial_value, value_reset_delay, auto_fill, influence_conversation + ) + self.values = [str(v).lower() for v in values] if values else [] + + def add_default_value(self) -> None: + values = set(self.values) + if rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE not in values: + self.values.append( + rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE + ) + + def persistence_info(self) -> Dict[Text, Any]: + """Returns serialized slot.""" + dictionary_serialized = super().persistence_info() + dictionary_serialized["values"] = [ + value + for value in self.values + # Don't add default slot when persisting it. + # We'll re-add it on the fly when creating the domain. + if value != rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE + ] + return dictionary_serialized + + def _as_feature(self) -> List[float]: + r = [0.0] * self.feature_dimensionality() + + try: + for i, v in enumerate(self.values): + if v == str(self.value).lower(): + r[i] = 1.0 + break + else: + if self.value is not None: + if ( + rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE + in self.values + ): + i = self.values.index( + rasa.shared.core.constants.DEFAULT_CATEGORICAL_SLOT_VALUE + ) + r[i] = 1.0 + else: + rasa.shared.utils.io.raise_warning( + f"Categorical slot '{self.name}' is set to a value " + f"('{self.value}') " + "that is not specified in the domain. " + "Value will be ignored and the slot will " + "behave as if no value is set. " + "Make sure to add all values a categorical " + "slot should store to the domain." + ) + except (TypeError, ValueError): + logger.exception("Failed to featurize categorical slot.") + return r + return r + + def _feature_dimensionality(self) -> int: + return len(self.values) + + +class AnySlot(Slot): + """Slot which can be used to store any value. Users need to create a subclass of + `Slot` in case the information is supposed to get featurized.""" + + type_name = "any" + + def __init__( + self, + name: Text, + initial_value: Any = None, + value_reset_delay: Optional[int] = None, + auto_fill: bool = True, + influence_conversation: bool = False, + ) -> None: + if influence_conversation: + raise InvalidSlotConfigError( + f"An {AnySlot.__name__} cannot be featurized. " + f"Please use a different slot type for slot '{name}' instead. If you " + f"need to featurize a data type which is not supported out of the box, " + f"implement a custom slot type by subclassing '{Slot.__name__}'. " + f"See the documentation for more information: {DOCS_URL_SLOTS}" + ) + + super().__init__( + name, initial_value, value_reset_delay, auto_fill, influence_conversation + ) + + def __eq__(self, other: Any) -> bool: + """Compares object with other object.""" + if not isinstance(other, AnySlot): + return NotImplemented + + return ( + self.name == other.name + and self.initial_value == other.initial_value + and self._value_reset_delay == other._value_reset_delay + and self.auto_fill == other.auto_fill + and self.value == other.value + ) diff --git a/rasa/shared/core/training_data/visualization.py b/rasa/shared/core/training_data/visualization.py index d7f9ccd1ffef..18d7749ddcd5 100644 --- a/rasa/shared/core/training_data/visualization.py +++ b/rasa/shared/core/training_data/visualization.py @@ -1,605 +1,605 @@ -from collections import defaultdict, deque - -import random -from typing import Any, Text, List, Dict, Optional, TYPE_CHECKING, Set - -import rasa.shared.utils.io -from rasa.shared.core.constants import ACTION_LISTEN_NAME -from rasa.shared.core.domain import Domain -from rasa.shared.core.events import UserUttered, ActionExecuted, Event -from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter, RegexInterpreter -from rasa.shared.core.generator import TrainingDataGenerator -from rasa.shared.core.training_data.structures import StoryGraph, StoryStep -from rasa.shared.nlu.constants import ( - ENTITY_ATTRIBUTE_VALUE, - INTENT, - TEXT, - ENTITY_ATTRIBUTE_TYPE, - ENTITIES, - INTENT_NAME_KEY, -) - -if TYPE_CHECKING: - from rasa.shared.nlu.training_data.training_data import TrainingData - from rasa.shared.nlu.training_data.message import Message - import networkx - -EDGE_NONE_LABEL = "NONE" - -START_NODE_ID = 0 -END_NODE_ID = -1 -TMP_NODE_ID = -2 - -VISUALIZATION_TEMPLATE_PATH = "/visualization.html" - - -class UserMessageGenerator: - def __init__(self, nlu_training_data: "TrainingData") -> None: - self.nlu_training_data = nlu_training_data - self.mapping = self._create_reverse_mapping(self.nlu_training_data) - - @staticmethod - def _create_reverse_mapping( - data: "TrainingData", - ) -> Dict[Dict[Text, Any], List["Message"]]: - """Create a mapping from intent to messages - This allows a faster intent lookup.""" - - dictionary_intent_to_messages = defaultdict(list) - for example in data.training_examples: - if example.get(INTENT, {}) is not None: - dictionary_intent_to_messages[example.get(INTENT, {})].append(example) - return dictionary_intent_to_messages - - @staticmethod - def _contains_same_entity( - entities: Dict[Text, Any], entity: Dict[Text, Any] - ) -> bool: - return entities.get(entity.get(ENTITY_ATTRIBUTE_TYPE)) is None or entities.get( - entity.get(ENTITY_ATTRIBUTE_TYPE) - ) != entity.get(ENTITY_ATTRIBUTE_VALUE) - - def message_for_data(self, structured_info: Dict[Text, Any]) -> Any: - """Find a data sample with the same intent and entities. - Given the parsed data from a message (intent and entities) finds a - message in the data that has the same intent and entities.""" - - if structured_info.get(INTENT) is not None: - intent_name = structured_info.get(INTENT, {}).get(INTENT_NAME_KEY) - usable_examples = self.mapping.get(intent_name, [])[:] - random.shuffle(usable_examples) - for example in usable_examples: - entities = { - entity.get(ENTITY_ATTRIBUTE_TYPE): entity.get( - ENTITY_ATTRIBUTE_VALUE - ) - for entity in example.get(ENTITIES, []) - } - for entity in structured_info.get(ENTITIES, []): - if self._contains_same_entity(entities, entity): - break - else: - return example.get(TEXT) - return structured_info.get(TEXT) - - -def _fingerprint_node( - graph: "networkx.MultiDiGraph", node: int, max_history: int -) -> Set[Text]: - """Fingerprint a node in a graph. - Can be used to identify nodes that are similar and can be merged within the - graph. - Generates all paths starting at `node` following the directed graph up to - the length of `max_history`, and returns a set of strings describing the - found paths. If the fingerprint creation for two nodes results in the same - sets these nodes are indistinguishable if we walk along the path and only - remember max history number of nodes we have visited. Hence, if we randomly - walk on our directed graph, always only remembering the last `max_history` - nodes we have visited, we can never remember if we have visited node A or - node B if both have the same fingerprint.""" - - # the candidate list contains all node paths that haven't been - # extended till `max_history` length yet. - candidates = deque() - candidates.append([node]) - continuations = [] - while len(candidates) > 0: - candidate = candidates.pop() - last = candidate[-1] - empty = True - for _, succ_node in graph.out_edges(last): - next_candidate = candidate[:] - next_candidate.append(succ_node) - # if the path is already long enough, we add it to the results, - # otherwise we add it to the candidates - # that we still need to visit - if len(next_candidate) == max_history: - continuations.append(next_candidate) - else: - candidates.append(next_candidate) - empty = False - if empty: - continuations.append(candidate) - return { - " - ".join([graph.nodes[node]["label"] for node in continuation]) - for continuation in continuations - } - - -def _incoming_edges(graph: "networkx.MultiDiGraph", node: int) -> set: - return {(prev_node, k) for prev_node, _, k in graph.in_edges(node, keys=True)} - - -def _outgoing_edges(graph: "networkx.MultiDiGraph", node: int) -> set: - return {(succ_node, k) for _, succ_node, k in graph.out_edges(node, keys=True)} - - -def _outgoing_edges_are_similar( - graph: "networkx.MultiDiGraph", node_a: int, node_b: int -) -> bool: - """If the outgoing edges from the two nodes are similar enough, - it doesn't matter if you are in a or b. - As your path will be the same because the outgoing edges will lead you to - the same nodes anyways.""" - - ignored = {node_b, node_a} - a_edges = { - (target, k) - for target, k in _outgoing_edges(graph, node_a) - if target not in ignored - } - b_edges = { - (target, k) - for target, k in _outgoing_edges(graph, node_b) - if target not in ignored - } - return a_edges == b_edges or not a_edges or not b_edges - - -def _nodes_are_equivalent( - graph: "networkx.MultiDiGraph", node_a: int, node_b: int, max_history: int -) -> bool: - """Decides if two nodes are equivalent based on their fingerprints.""" - return graph.nodes[node_a]["label"] == graph.nodes[node_b]["label"] and ( - _outgoing_edges_are_similar(graph, node_a, node_b) - or _incoming_edges(graph, node_a) == _incoming_edges(graph, node_b) - or _fingerprint_node(graph, node_a, max_history) - == _fingerprint_node(graph, node_b, max_history) - ) - - -def _add_edge( - graph: "networkx.MultiDiGraph", - vertex_u: int, - vertex_v: int, - key: Optional[Text], - label: Optional[Text] = None, - **kwargs: Any, -) -> None: - """Adds an edge to the graph if the edge is not already present. Uses the - label as the key.""" - - if key is None: - key = EDGE_NONE_LABEL - - if key == EDGE_NONE_LABEL: - label = "" - - if not graph.has_edge(vertex_u, vertex_v, key=EDGE_NONE_LABEL): - graph.add_edge(vertex_u, vertex_v, key=key, label=label, **kwargs) - else: - data = graph.get_edge_data(vertex_u, vertex_v, key=EDGE_NONE_LABEL) - _transfer_style(kwargs, data) - - -def _transfer_style( - source: Dict[Text, Any], target: Dict[Text, Any] -) -> Dict[Text, Any]: - """Copy over class names from source to target for all special classes. - Used if a node is highlighted and merged with another node.""" - - clazzes = source.get("class", "") - - special_classes = {"dashed", "active"} - - if "class" not in target: - target["class"] = "" - - for copy in special_classes: - if copy in clazzes and copy not in target["class"]: - target["class"] += " " + copy - - target["class"] = target["class"].strip() - return target - - -def _merge_equivalent_nodes(graph: "networkx.MultiDiGraph", max_history: int) -> None: - """Searches for equivalent nodes in the graph and merges them.""" - - changed = True - # every node merge changes the graph and can trigger previously - # impossible node merges - we need to repeat until - # the graph doesn't change anymore - while changed: - changed = False - remaining_node_ids = [n for n in graph.nodes() if n > 0] - for index, i in enumerate(remaining_node_ids): - if graph.has_node(i): - # assumes node equivalence is cumulative - for j in remaining_node_ids[index + 1 :]: - if graph.has_node(j) and _nodes_are_equivalent( - graph, i, j, max_history - ): - # make sure we keep special styles - _transfer_style( - graph.nodes(data=True)[j], graph.nodes(data=True)[i] - ) - - changed = True - # moves all outgoing edges to the other node - j_outgoing_edges = list( - graph.out_edges(j, keys=True, data=True) - ) - for _, succ_node, k, d in j_outgoing_edges: - _add_edge( - graph, - i, - succ_node, - k, - d.get("label"), - **{"class": d.get("class", "")}, - ) - graph.remove_edge(j, succ_node) - # moves all incoming edges to the other node - j_incoming_edges = list(graph.in_edges(j, keys=True, data=True)) - for prev_node, _, k, d in j_incoming_edges: - _add_edge( - graph, - prev_node, - i, - k, - d.get("label"), - **{"class": d.get("class", "")}, - ) - graph.remove_edge(prev_node, j) - graph.remove_node(j) - - -async def _replace_edge_labels_with_nodes( - graph: "networkx.MultiDiGraph", - next_id: int, - interpreter: NaturalLanguageInterpreter, - nlu_training_data: "TrainingData", -) -> None: - """User messages are created as edge labels. This removes the labels and - creates nodes instead. - The algorithms (e.g. merging) are simpler if the user messages are labels - on the edges. But it sometimes - looks better if in the final graphs the user messages are nodes instead - of edge labels. - """ - if nlu_training_data: - message_generator = UserMessageGenerator(nlu_training_data) - else: - message_generator = None - - edges = list(graph.edges(keys=True, data=True)) - for edge_label_s, edge_label_e, edge_label_k, edge_label_d in edges: - if edge_label_k != EDGE_NONE_LABEL: - if ( - message_generator - and edge_label_d.get("label", edge_label_k) is not None - ): - parsed_info = await interpreter.parse( - edge_label_d.get("label", edge_label_k) - ) - label = message_generator.message_for_data(parsed_info) - else: - label = edge_label_d.get("label", edge_label_k) - next_id += 1 - graph.remove_edge(edge_label_s, edge_label_e, edge_label_k) - graph.add_node( - next_id, - label=label, - shape="rect", - style="filled", - fillcolor="lightblue", - **_transfer_style(edge_label_d, {"class": "intent"}), - ) - graph.add_edge( - edge_label_s, next_id, **{"class": edge_label_d.get("class", "")} - ) - graph.add_edge( - next_id, edge_label_e, **{"class": edge_label_d.get("class", "")} - ) - - -def visualization_html_path() -> Text: - import pkg_resources - - return pkg_resources.resource_filename(__name__, VISUALIZATION_TEMPLATE_PATH) - - -def persist_graph(graph: "networkx.Graph", output_file: Text) -> None: - """Plots the graph and persists it into a html file.""" - import networkx as nx - - expg = nx.nx_pydot.to_pydot(graph) - - template = rasa.shared.utils.io.read_file(visualization_html_path()) - - # Insert graph into template - template = template.replace("// { is-client }", "isClient = true", 1) - graph_as_text = expg.to_string() - # escape backslashes - graph_as_text = graph_as_text.replace("\\", "\\\\") - template = template.replace("// { graph-content }", f"graph = `{graph_as_text}`", 1) - - rasa.shared.utils.io.write_text_file(template, output_file) - - -def _length_of_common_action_prefix(this: List[Event], other: List[Event]) -> int: - """Calculate number of actions that two conversations have in common.""" - - num_common_actions = 0 - t_cleaned = [e for e in this if e.type_name in {"user", "action"}] - o_cleaned = [e for e in other if e.type_name in {"user", "action"}] - - for i, e in enumerate(t_cleaned): - if i == len(o_cleaned): - break - elif isinstance(e, UserUttered) and isinstance(o_cleaned[i], UserUttered): - continue - elif ( - isinstance(e, ActionExecuted) - and isinstance(o_cleaned[i], ActionExecuted) - and o_cleaned[i].action_name == e.action_name - ): - num_common_actions += 1 - else: - break - return num_common_actions - - -def _add_default_nodes(graph: "networkx.MultiDiGraph", fontsize: int = 12) -> None: - """Add the standard nodes we need.""" - - graph.add_node( - START_NODE_ID, - label="START", - fillcolor="green", - style="filled", - fontsize=fontsize, - **{"class": "start active"}, - ) - graph.add_node( - END_NODE_ID, - label="END", - fillcolor="red", - style="filled", - fontsize=fontsize, - **{"class": "end"}, - ) - graph.add_node(TMP_NODE_ID, label="TMP", style="invis", **{"class": "invisible"}) - - -def _create_graph(fontsize: int = 12) -> "networkx.MultiDiGraph": - """Create a graph and adds the default nodes.""" - - import networkx as nx - - graph = nx.MultiDiGraph() - _add_default_nodes(graph, fontsize) - return graph - - -def _add_message_edge( - graph: "networkx.MultiDiGraph", - message: Optional[Dict[Text, Any]], - current_node: int, - next_node_idx: int, - is_current: bool, -) -> None: - """Create an edge based on the user message.""" - - if message: - message_key = message.get("intent", {}).get("name", None) - message_label = message.get("text", None) - else: - message_key = None - message_label = None - - _add_edge( - graph, - current_node, - next_node_idx, - message_key, - message_label, - **{"class": "active" if is_current else ""}, - ) - - -async def visualize_neighborhood( - current: Optional[List[Event]], - event_sequences: List[List[Event]], - output_file: Optional[Text] = None, - max_history: int = 2, - interpreter: NaturalLanguageInterpreter = RegexInterpreter(), - nlu_training_data: Optional["TrainingData"] = None, - should_merge_nodes: bool = True, - max_distance: int = 1, - fontsize: int = 12, -) -> "networkx.MultiDiGraph": - """Given a set of event lists, visualizing the flows.""" - graph = _create_graph(fontsize) - _add_default_nodes(graph) - - next_node_idx = START_NODE_ID - special_node_idx = -3 - path_ellipsis_ends = set() - - for events in event_sequences: - if current and max_distance: - prefix = _length_of_common_action_prefix(current, events) - else: - prefix = len(events) - - message = None - current_node = START_NODE_ID - index = 0 - is_current = events == current - - for index, el in enumerate(events): - if not prefix: - index -= 1 - break - if isinstance(el, UserUttered): - if not el.intent: - message = await interpreter.parse(el.text) - else: - message = el.parse_data - elif ( - isinstance(el, ActionExecuted) and el.action_name != ACTION_LISTEN_NAME - ): - next_node_idx += 1 - graph.add_node( - next_node_idx, - label=el.action_name, - fontsize=fontsize, - **{"class": "active" if is_current else ""}, - ) - - _add_message_edge( - graph, message, current_node, next_node_idx, is_current - ) - current_node = next_node_idx - - message = None - prefix -= 1 - - # determine what the end node of the conversation is going to be - # this can either be an ellipsis "...", the conversation end node - # "END" or a "TMP" node if this is the active conversation - if is_current: - if ( - isinstance(events[index], ActionExecuted) - and events[index].action_name == ACTION_LISTEN_NAME - ): - next_node_idx += 1 - graph.add_node( - next_node_idx, - label=" ? " - if not message - else message.get("intent", {}).get("name", " ? "), - shape="rect", - **{"class": "intent dashed active"}, - ) - target = next_node_idx - elif current_node: - d = graph.nodes(data=True)[current_node] - d["class"] = "dashed active" - target = TMP_NODE_ID - else: - target = TMP_NODE_ID - elif index == len(events) - 1: - target = END_NODE_ID - elif current_node and current_node not in path_ellipsis_ends: - graph.add_node(special_node_idx, label="...", **{"class": "ellipsis"}) - target = special_node_idx - path_ellipsis_ends.add(current_node) - special_node_idx -= 1 - else: - target = END_NODE_ID - - _add_message_edge(graph, message, current_node, target, is_current) - - if should_merge_nodes: - _merge_equivalent_nodes(graph, max_history) - await _replace_edge_labels_with_nodes( - graph, next_node_idx, interpreter, nlu_training_data - ) - - _remove_auxiliary_nodes(graph, special_node_idx) - - if output_file: - persist_graph(graph, output_file) - return graph - - -def _remove_auxiliary_nodes( - graph: "networkx.MultiDiGraph", special_node_idx: int -) -> None: - """Remove any temporary or unused nodes.""" - - graph.remove_node(TMP_NODE_ID) - - if not len(list(graph.predecessors(END_NODE_ID))): - graph.remove_node(END_NODE_ID) - - # remove duplicated "..." nodes after merging - ps = set() - for i in range(special_node_idx + 1, TMP_NODE_ID): - for pred in list(graph.predecessors(i)): - if pred in ps: - graph.remove_node(i) - else: - ps.add(pred) - - -async def visualize_stories( - story_steps: List[StoryStep], - domain: Domain, - output_file: Optional[Text], - max_history: int, - interpreter: NaturalLanguageInterpreter = RegexInterpreter(), - nlu_training_data: Optional["TrainingData"] = None, - should_merge_nodes: bool = True, - fontsize: int = 12, -) -> "networkx.MultiDiGraph": - """Given a set of stories, generates a graph visualizing the flows in the stories. - Visualization is always a trade off between making the graph as small as - possible while - at the same time making sure the meaning doesn't change to "much". The - algorithm will - compress the graph generated from the stories to merge nodes that are - similar. Hence, - the algorithm might create paths through the graph that aren't actually - specified in the - stories, but we try to minimize that. - Output file defines if and where a file containing the plotted graph - should be stored. - The history defines how much 'memory' the graph has. This influences in - which situations the - algorithm will merge nodes. Nodes will only be merged if they are equal - within the history, this - means the larger the history is we take into account the less likely it - is we merge any nodes. - The training data parameter can be used to pass in a Rasa NLU training - data instance. It will - be used to replace the user messages from the story file with actual - messages from the training data. - """ - story_graph = StoryGraph(story_steps) - - g = TrainingDataGenerator( - story_graph, - domain, - use_story_concatenation=False, - tracker_limit=100, - augmentation_factor=0, - ) - completed_trackers = g.generate() - event_sequences = [t.events for t in completed_trackers] - - graph = await visualize_neighborhood( - None, - event_sequences, - output_file, - max_history, - interpreter, - nlu_training_data, - should_merge_nodes, - max_distance=1, - fontsize=fontsize, - ) - return graph +from collections import defaultdict, deque + +import random +from typing import Any, Text, List, Dict, Optional, TYPE_CHECKING, Set + +import rasa.shared.utils.io +from rasa.shared.core.constants import ACTION_LISTEN_NAME +from rasa.shared.core.domain import Domain +from rasa.shared.core.events import UserUttered, ActionExecuted, Event +from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter, RegexInterpreter +from rasa.shared.core.generator import TrainingDataGenerator +from rasa.shared.core.training_data.structures import StoryGraph, StoryStep +from rasa.shared.nlu.constants import ( + ENTITY_ATTRIBUTE_VALUE, + INTENT, + TEXT, + ENTITY_ATTRIBUTE_TYPE, + ENTITIES, + INTENT_NAME_KEY, +) + +if TYPE_CHECKING: + from rasa.shared.nlu.training_data.training_data import TrainingData + from rasa.shared.nlu.training_data.message import Message + import networkx + +EDGE_NONE_LABEL = "NONE" + +START_NODE_ID = 0 +END_NODE_ID = -1 +TMP_NODE_ID = -2 + +VISUALIZATION_TEMPLATE_PATH = "/visualization.html" + + +class UserMessageGenerator: + def __init__(self, nlu_training_data: "TrainingData") -> None: + self.nlu_training_data = nlu_training_data + self.mapping = self._create_reverse_mapping(self.nlu_training_data) + + @staticmethod + def _create_reverse_mapping( + data: "TrainingData", + ) -> Dict[Dict[Text, Any], List["Message"]]: + """Create a mapping from intent to messages + This allows a faster intent lookup.""" + + dictionary_intent_to_messages = defaultdict(list) + for example in data.training_examples: + if example.get(INTENT, {}) is not None: + dictionary_intent_to_messages[example.get(INTENT, {})].append(example) + return dictionary_intent_to_messages + + @staticmethod + def _contains_same_entity( + entities: Dict[Text, Any], entity: Dict[Text, Any] + ) -> bool: + return entities.get(entity.get(ENTITY_ATTRIBUTE_TYPE)) is None or entities.get( + entity.get(ENTITY_ATTRIBUTE_TYPE) + ) != entity.get(ENTITY_ATTRIBUTE_VALUE) + + def message_for_data(self, structured_info: Dict[Text, Any]) -> Any: + """Find a data sample with the same intent and entities. + Given the parsed data from a message (intent and entities) finds a + message in the data that has the same intent and entities.""" + + if structured_info.get(INTENT) is not None: + intent_name = structured_info.get(INTENT, {}).get(INTENT_NAME_KEY) + usable_examples = self.mapping.get(intent_name, [])[:] + random.shuffle(usable_examples) + for example in usable_examples: + entities = { + entity.get(ENTITY_ATTRIBUTE_TYPE): entity.get( + ENTITY_ATTRIBUTE_VALUE + ) + for entity in example.get(ENTITIES, []) + } + for entity in structured_info.get(ENTITIES, []): + if self._contains_same_entity(entities, entity): + break + else: + return example.get(TEXT) + return structured_info.get(TEXT) + + +def _fingerprint_node( + graph: "networkx.MultiDiGraph", node: int, max_history: int +) -> Set[Text]: + """Fingerprint a node in a graph. + Can be used to identify nodes that are similar and can be merged within the + graph. + Generates all paths starting at `node` following the directed graph up to + the length of `max_history`, and returns a set of strings describing the + found paths. If the fingerprint creation for two nodes results in the same + sets these nodes are indistinguishable if we walk along the path and only + remember max history number of nodes we have visited. Hence, if we randomly + walk on our directed graph, always only remembering the last `max_history` + nodes we have visited, we can never remember if we have visited node A or + node B if both have the same fingerprint.""" + + # the candidate list contains all node paths that haven't been + # extended till `max_history` length yet. + candidates = deque() + candidates.append([node]) + continuations = [] + while len(candidates) > 0: + candidate = candidates.pop() + last = candidate[-1] + empty = True + for _, succ_node in graph.out_edges(last): + next_candidate = candidate[:] + next_candidate.append(succ_node) + # if the path is already long enough, we add it to the results, + # otherwise we add it to the candidates + # that we still need to visit + if len(next_candidate) == max_history: + continuations.append(next_candidate) + else: + candidates.append(next_candidate) + empty = False + if empty: + continuations.append(candidate) + return { + " - ".join([graph.nodes[node]["label"] for node in continuation]) + for continuation in continuations + } + + +def _incoming_edges(graph: "networkx.MultiDiGraph", node: int) -> set: + return {(prev_node, k) for prev_node, _, k in graph.in_edges(node, keys=True)} + + +def _outgoing_edges(graph: "networkx.MultiDiGraph", node: int) -> set: + return {(succ_node, k) for _, succ_node, k in graph.out_edges(node, keys=True)} + + +def _outgoing_edges_are_similar( + graph: "networkx.MultiDiGraph", node_a: int, node_b: int +) -> bool: + """If the outgoing edges from the two nodes are similar enough, + it doesn't matter if you are in a or b. + As your path will be the same because the outgoing edges will lead you to + the same nodes anyways.""" + + ignored = {node_b, node_a} + a_edges = { + (target, k) + for target, k in _outgoing_edges(graph, node_a) + if target not in ignored + } + b_edges = { + (target, k) + for target, k in _outgoing_edges(graph, node_b) + if target not in ignored + } + return a_edges == b_edges or not a_edges or not b_edges + + +def _nodes_are_equivalent( + graph: "networkx.MultiDiGraph", node_a: int, node_b: int, max_history: int +) -> bool: + """Decides if two nodes are equivalent based on their fingerprints.""" + return graph.nodes[node_a]["label"] == graph.nodes[node_b]["label"] and ( + _outgoing_edges_are_similar(graph, node_a, node_b) + or _incoming_edges(graph, node_a) == _incoming_edges(graph, node_b) + or _fingerprint_node(graph, node_a, max_history) + == _fingerprint_node(graph, node_b, max_history) + ) + + +def _add_edge( + graph: "networkx.MultiDiGraph", + vertex_u: int, + vertex_v: int, + key: Optional[Text], + label: Optional[Text] = None, + **kwargs: Any, +) -> None: + """Adds an edge to the graph if the edge is not already present. Uses the + label as the key.""" + + if key is None: + key = EDGE_NONE_LABEL + + if key == EDGE_NONE_LABEL: + label = "" + + if not graph.has_edge(vertex_u, vertex_v, key=EDGE_NONE_LABEL): + graph.add_edge(vertex_u, vertex_v, key=key, label=label, **kwargs) + else: + data = graph.get_edge_data(vertex_u, vertex_v, key=EDGE_NONE_LABEL) + _transfer_style(kwargs, data) + + +def _transfer_style( + source: Dict[Text, Any], target: Dict[Text, Any] +) -> Dict[Text, Any]: + """Copy over class names from source to target for all special classes. + Used if a node is highlighted and merged with another node.""" + + clazzes = source.get("class", "") + + special_classes = {"dashed", "active"} + + if "class" not in target: + target["class"] = "" + + for copy in special_classes: + if copy in clazzes and copy not in target["class"]: + target["class"] += " " + copy + + target["class"] = target["class"].strip() + return target + + +def _merge_equivalent_nodes(graph: "networkx.MultiDiGraph", max_history: int) -> None: + """Searches for equivalent nodes in the graph and merges them.""" + + changed = True + # every node merge changes the graph and can trigger previously + # impossible node merges - we need to repeat until + # the graph doesn't change anymore + while changed: + changed = False + remaining_node_ids = [n for n in graph.nodes() if n > 0] + for index, i in enumerate(remaining_node_ids): + if graph.has_node(i): + # assumes node equivalence is cumulative + for j in remaining_node_ids[index + 1 :]: + if graph.has_node(j) and _nodes_are_equivalent( + graph, i, j, max_history + ): + # make sure we keep special styles + _transfer_style( + graph.nodes(data=True)[j], graph.nodes(data=True)[i] + ) + + changed = True + # moves all outgoing edges to the other node + j_outgoing_edges = list( + graph.out_edges(j, keys=True, data=True) + ) + for _, succ_node, k, d in j_outgoing_edges: + _add_edge( + graph, + i, + succ_node, + k, + d.get("label"), + **{"class": d.get("class", "")}, + ) + graph.remove_edge(j, succ_node) + # moves all incoming edges to the other node + j_incoming_edges = list(graph.in_edges(j, keys=True, data=True)) + for prev_node, _, k, d in j_incoming_edges: + _add_edge( + graph, + prev_node, + i, + k, + d.get("label"), + **{"class": d.get("class", "")}, + ) + graph.remove_edge(prev_node, j) + graph.remove_node(j) + + +async def _replace_edge_labels_with_nodes( + graph: "networkx.MultiDiGraph", + next_id: int, + interpreter: NaturalLanguageInterpreter, + nlu_training_data: "TrainingData", +) -> None: + """User messages are created as edge labels. This removes the labels and + creates nodes instead. + The algorithms (e.g. merging) are simpler if the user messages are labels + on the edges. But it sometimes + looks better if in the final graphs the user messages are nodes instead + of edge labels. + """ + if nlu_training_data: + message_generator = UserMessageGenerator(nlu_training_data) + else: + message_generator = None + + edges = list(graph.edges(keys=True, data=True)) + for edge_label_s, edge_label_e, edge_label_k, edge_label_d in edges: + if edge_label_k != EDGE_NONE_LABEL: + if ( + message_generator + and edge_label_d.get("label", edge_label_k) is not None + ): + parsed_info = await interpreter.parse( + edge_label_d.get("label", edge_label_k) + ) + label = message_generator.message_for_data(parsed_info) + else: + label = edge_label_d.get("label", edge_label_k) + next_id += 1 + graph.remove_edge(edge_label_s, edge_label_e, edge_label_k) + graph.add_node( + next_id, + label=label, + shape="rect", + style="filled", + fillcolor="lightblue", + **_transfer_style(edge_label_d, {"class": "intent"}), + ) + graph.add_edge( + edge_label_s, next_id, **{"class": edge_label_d.get("class", "")} + ) + graph.add_edge( + next_id, edge_label_e, **{"class": edge_label_d.get("class", "")} + ) + + +def visualization_html_path() -> Text: + import pkg_resources + + return pkg_resources.resource_filename(__name__, VISUALIZATION_TEMPLATE_PATH) + + +def persist_graph(graph: "networkx.Graph", output_file: Text) -> None: + """Plots the graph and persists it into a html file.""" + import networkx as nx + + expg = nx.nx_pydot.to_pydot(graph) + + template = rasa.shared.utils.io.read_file(visualization_html_path()) + + # Insert graph into template + template = template.replace("// { is-client }", "isClient = true", 1) + graph_as_text = expg.to_string() + # escape backslashes + graph_as_text = graph_as_text.replace("\\", "\\\\") + template = template.replace("// { graph-content }", f"graph = `{graph_as_text}`", 1) + + rasa.shared.utils.io.write_text_file(template, output_file) + + +def _length_of_common_action_prefix(this: List[Event], other: List[Event]) -> int: + """Calculate number of actions that two conversations have in common.""" + + num_common_actions = 0 + t_cleaned = [e for e in this if e.type_name in {"user", "action"}] + o_cleaned = [e for e in other if e.type_name in {"user", "action"}] + + for i, e in enumerate(t_cleaned): + if i == len(o_cleaned): + break + elif isinstance(e, UserUttered) and isinstance(o_cleaned[i], UserUttered): + continue + elif ( + isinstance(e, ActionExecuted) + and isinstance(o_cleaned[i], ActionExecuted) + and o_cleaned[i].action_name == e.action_name + ): + num_common_actions += 1 + else: + break + return num_common_actions + + +def _add_default_nodes(graph: "networkx.MultiDiGraph", fontsize: int = 12) -> None: + """Add the standard nodes we need.""" + + graph.add_node( + START_NODE_ID, + label="START", + fillcolor="green", + style="filled", + fontsize=fontsize, + **{"class": "start active"}, + ) + graph.add_node( + END_NODE_ID, + label="END", + fillcolor="red", + style="filled", + fontsize=fontsize, + **{"class": "end"}, + ) + graph.add_node(TMP_NODE_ID, label="TMP", style="invis", **{"class": "invisible"}) + + +def _create_graph(fontsize: int = 12) -> "networkx.MultiDiGraph": + """Create a graph and adds the default nodes.""" + + import networkx as nx + + graph = nx.MultiDiGraph() + _add_default_nodes(graph, fontsize) + return graph + + +def _add_message_edge( + graph: "networkx.MultiDiGraph", + message: Optional[Dict[Text, Any]], + current_node: int, + next_node_idx: int, + is_current: bool, +) -> None: + """Create an edge based on the user message.""" + + if message: + message_key = message.get("intent", {}).get("name", None) + message_label = message.get("text", None) + else: + message_key = None + message_label = None + + _add_edge( + graph, + current_node, + next_node_idx, + message_key, + message_label, + **{"class": "active" if is_current else ""}, + ) + + +async def visualize_neighborhood( + current: Optional[List[Event]], + event_sequences: List[List[Event]], + output_file: Optional[Text] = None, + max_history: int = 2, + interpreter: NaturalLanguageInterpreter = RegexInterpreter(), + nlu_training_data: Optional["TrainingData"] = None, + should_merge_nodes: bool = True, + max_distance: int = 1, + fontsize: int = 12, +) -> "networkx.MultiDiGraph": + """Given a set of event lists, visualizing the flows.""" + graph = _create_graph(fontsize) + _add_default_nodes(graph) + + next_node_idx = START_NODE_ID + special_node_idx = -3 + path_ellipsis_ends = set() + + for events in event_sequences: + if current and max_distance: + prefix = _length_of_common_action_prefix(current, events) + else: + prefix = len(events) + + message = None + current_node = START_NODE_ID + index = 0 + is_current = events == current + + for index, el in enumerate(events): + if not prefix: + index -= 1 + break + if isinstance(el, UserUttered): + if not el.intent: + message = await interpreter.parse(el.text) + else: + message = el.parse_data + elif ( + isinstance(el, ActionExecuted) and el.action_name != ACTION_LISTEN_NAME + ): + next_node_idx += 1 + graph.add_node( + next_node_idx, + label=el.action_name, + fontsize=fontsize, + **{"class": "active" if is_current else ""}, + ) + + _add_message_edge( + graph, message, current_node, next_node_idx, is_current + ) + current_node = next_node_idx + + message = None + prefix -= 1 + + # determine what the end node of the conversation is going to be + # this can either be an ellipsis "...", the conversation end node + # "END" or a "TMP" node if this is the active conversation + if is_current: + if ( + isinstance(events[index], ActionExecuted) + and events[index].action_name == ACTION_LISTEN_NAME + ): + next_node_idx += 1 + graph.add_node( + next_node_idx, + label=" ? " + if not message + else message.get("intent", {}).get("name", " ? "), + shape="rect", + **{"class": "intent dashed active"}, + ) + target = next_node_idx + elif current_node: + d = graph.nodes(data=True)[current_node] + d["class"] = "dashed active" + target = TMP_NODE_ID + else: + target = TMP_NODE_ID + elif index == len(events) - 1: + target = END_NODE_ID + elif current_node and current_node not in path_ellipsis_ends: + graph.add_node(special_node_idx, label="...", **{"class": "ellipsis"}) + target = special_node_idx + path_ellipsis_ends.add(current_node) + special_node_idx -= 1 + else: + target = END_NODE_ID + + _add_message_edge(graph, message, current_node, target, is_current) + + if should_merge_nodes: + _merge_equivalent_nodes(graph, max_history) + await _replace_edge_labels_with_nodes( + graph, next_node_idx, interpreter, nlu_training_data + ) + + _remove_auxiliary_nodes(graph, special_node_idx) + + if output_file: + persist_graph(graph, output_file) + return graph + + +def _remove_auxiliary_nodes( + graph: "networkx.MultiDiGraph", special_node_idx: int +) -> None: + """Remove any temporary or unused nodes.""" + + graph.remove_node(TMP_NODE_ID) + + if not len(list(graph.predecessors(END_NODE_ID))): + graph.remove_node(END_NODE_ID) + + # remove duplicated "..." nodes after merging + ps = set() + for i in range(special_node_idx + 1, TMP_NODE_ID): + for pred in list(graph.predecessors(i)): + if pred in ps: + graph.remove_node(i) + else: + ps.add(pred) + + +async def visualize_stories( + story_steps: List[StoryStep], + domain: Domain, + output_file: Optional[Text], + max_history: int, + interpreter: NaturalLanguageInterpreter = RegexInterpreter(), + nlu_training_data: Optional["TrainingData"] = None, + should_merge_nodes: bool = True, + fontsize: int = 12, +) -> "networkx.MultiDiGraph": + """Given a set of stories, generates a graph visualizing the flows in the stories. + Visualization is always a trade off between making the graph as small as + possible while + at the same time making sure the meaning doesn't change to "much". The + algorithm will + compress the graph generated from the stories to merge nodes that are + similar. Hence, + the algorithm might create paths through the graph that aren't actually + specified in the + stories, but we try to minimize that. + Output file defines if and where a file containing the plotted graph + should be stored. + The history defines how much 'memory' the graph has. This influences in + which situations the + algorithm will merge nodes. Nodes will only be merged if they are equal + within the history, this + means the larger the history is we take into account the less likely it + is we merge any nodes. + The training data parameter can be used to pass in a Rasa NLU training + data instance. It will + be used to replace the user messages from the story file with actual + messages from the training data. + """ + story_graph = StoryGraph(story_steps) + + g = TrainingDataGenerator( + story_graph, + domain, + use_story_concatenation=False, + tracker_limit=100, + augmentation_factor=0, + ) + completed_trackers = g.generate() + event_sequences = [t.events for t in completed_trackers] + + graph = await visualize_neighborhood( + None, + event_sequences, + output_file, + max_history, + interpreter, + nlu_training_data, + should_merge_nodes, + max_distance=1, + fontsize=fontsize, + ) + return graph diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py index b6a75f9d1b71..2aed6c6bc3fd 100644 --- a/rasa/shared/nlu/training_data/message.py +++ b/rasa/shared/nlu/training_data/message.py @@ -1,401 +1,401 @@ -from typing import Any, Optional, Tuple, Text, Dict, Set, List - -import typing -import copy - -import rasa.shared.utils.io -from rasa.shared.exceptions import RasaException -from rasa.shared.nlu.constants import ( - TEXT, - INTENT, - RESPONSE, - INTENT_RESPONSE_KEY, - METADATA, - METADATA_INTENT, - METADATA_EXAMPLE, - ENTITIES, - ENTITY_ATTRIBUTE_START, - ENTITY_ATTRIBUTE_END, - RESPONSE_IDENTIFIER_DELIMITER, - FEATURE_TYPE_SENTENCE, - FEATURE_TYPE_SEQUENCE, - ACTION_TEXT, - ACTION_NAME, -) -from rasa.shared.constants import DIAGNOSTIC_DATA - -if typing.TYPE_CHECKING: - from rasa.shared.nlu.training_data.features import Features - - -class Message: - def __init__( - self, - data: Optional[Dict[Text, Any]] = None, - output_properties: Optional[Set] = None, - time: Optional[int] = None, - features: Optional[List["Features"]] = None, - **kwargs: Any, - ) -> None: - self.time = time - self.data = data.copy() if data else {} - self.features = features if features else [] - - self.data.update(**kwargs) - - if output_properties: - self.output_properties = output_properties - else: - self.output_properties = set() - self.output_properties.add(TEXT) - - def add_features(self, features: Optional["Features"]) -> None: - if features is not None: - self.features.append(features) - - def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None: - """Adds diagnostic data from the `origin` component. - Args: - origin: Name of the component that created the data. - data: The diagnostic data. - """ - if origin in self.get(DIAGNOSTIC_DATA, {}): - rasa.shared.utils.io.raise_warning( - f"Please make sure every pipeline component has a distinct name. " - f"The name '{origin}' appears at least twice and diagnostic " - f"data will be overwritten." - ) - self.data.setdefault(DIAGNOSTIC_DATA, {}) - self.data[DIAGNOSTIC_DATA][origin] = data - - def set(self, prop: Text, info: Any, add_to_output: bool = False) -> None: - """Sets the message's property to the given value. - Args: - prop: Name of the property to be set. - info: Value to be assigned to that property. - add_to_output: Decides whether to add `prop` to the `output_properties`. - """ - self.data[prop] = info - if add_to_output: - self.output_properties.add(prop) - - def get(self, prop: Text, default: Optional[Any] = None) -> Any: - return self.data.get(prop, default) - - def as_dict_nlu(self) -> dict: - """Get dict representation of message as it would appear in training data""" - - dictionary_message = self.as_dict() - if dictionary_message.get(INTENT, None): - dictionary_message[INTENT] = self.get_full_intent() - dictionary_message.pop(RESPONSE, None) - dictionary_message.pop(INTENT_RESPONSE_KEY, None) - return dictionary_message - - def as_dict(self, only_output_properties: bool = False) -> Dict: - if only_output_properties: - dictionary_data = { - key: value - for key, value in self.data.items() - if key in self.output_properties - } - else: - dictionary_data = self.data - - # Filter all keys with None value. These could have come while building the - # Message object in markdown format - return { - key: value for key, value in dictionary_data.items() if value is not None - } - - def __eq__(self, other: Any) -> bool: - if not isinstance(other, Message): - return False - else: - return other.fingerprint() == self.fingerprint() - - def __hash__(self) -> int: - """Calculate a hash for the message. - Returns: - Hash of the message. - """ - return int(self.fingerprint(), 16) - - def fingerprint(self) -> Text: - """Calculate a string fingerprint for the message. - Returns: - Fingerprint of the message. - """ - return rasa.shared.utils.io.deep_container_fingerprint(self.data) - - @classmethod - def build( - cls, - text: Text, - intent: Optional[Text] = None, - entities: Optional[List[Dict[Text, Any]]] = None, - intent_metadata: Optional[Any] = None, - example_metadata: Optional[Any] = None, - **kwargs: Any, - ) -> "Message": - """Builds a Message from `UserUttered` data. - Args: - text: text of a user's utterance - intent: an intent of the user utterance - entities: entities in the user's utterance - intent_metadata: optional metadata for the intent - example_metadata: optional metadata for the intent example - Returns: - Message - """ - data: Dict[Text, Any] = {TEXT: text} - if intent: - split_intent, response_key = cls.separate_intent_response_key(intent) - if split_intent: - data[INTENT] = split_intent - if response_key: - # intent label can be of the form - {intent}/{response_key}, - # so store the full intent label in intent_response_key - data[INTENT_RESPONSE_KEY] = intent - if entities: - data[ENTITIES] = entities - if intent_metadata is not None: - data[METADATA] = {METADATA_INTENT: intent_metadata} - if example_metadata is not None: - data.setdefault(METADATA, {})[METADATA_EXAMPLE] = example_metadata - - return cls(data, **kwargs) - - def get_full_intent(self) -> Text: - """Get intent as it appears in training data""" - - return ( - self.get(INTENT_RESPONSE_KEY) - if self.get(INTENT_RESPONSE_KEY) - else self.get(INTENT) - ) - - def get_combined_intent_response_key(self) -> Text: - """Get intent as it appears in training data.""" - rasa.shared.utils.io.raise_warning( - "`get_combined_intent_response_key` is deprecated and " - "will be removed in Rasa 3.0.0. " - "Please use `get_full_intent` instead.", - category=DeprecationWarning, - ) - return self.get_full_intent() - - @staticmethod - def separate_intent_response_key( - original_intent: Text, - ) -> Tuple[Text, Optional[Text]]: - - split_title = original_intent.split(RESPONSE_IDENTIFIER_DELIMITER) - if len(split_title) == 2: - return split_title[0], split_title[1] - elif len(split_title) == 1: - return split_title[0], None - - raise RasaException( - f"Intent name '{original_intent}' is invalid, " - f"it cannot contain more than one '{RESPONSE_IDENTIFIER_DELIMITER}'." - ) - - def get_sparse_features( - self, attribute: Text, featurizers: Optional[List[Text]] = None - ) -> Tuple[Optional["Features"], Optional["Features"]]: - """Gets all sparse features for the attribute given the list of featurizers. - If no featurizers are provided, all available features will be considered. - Args: - attribute: message attribute - featurizers: names of featurizers to consider - Returns: - Sparse features. - """ - if featurizers is None: - featurizers = [] - - sequence_features, sentence_features = self._filter_sparse_features( - attribute, featurizers - ) - - sequence_features = self._combine_features(sequence_features, featurizers) - sentence_features = self._combine_features(sentence_features, featurizers) - - return sequence_features, sentence_features - - def get_dense_features( - self, attribute: Text, featurizers: Optional[List[Text]] = None - ) -> Tuple[Optional["Features"], Optional["Features"]]: - """Gets all dense features for the attribute given the list of featurizers. - If no featurizers are provided, all available features will be considered. - Args: - attribute: message attribute - featurizers: names of featurizers to consider - Returns: - Dense features. - """ - if featurizers is None: - featurizers = [] - - sequence_features, sentence_features = self._filter_dense_features( - attribute, featurizers - ) - - sequence_features = self._combine_features(sequence_features, featurizers) - sentence_features = self._combine_features(sentence_features, featurizers) - - return sequence_features, sentence_features - - def get_all_features( - self, attribute: Text, featurizers: Optional[List[Text]] = None - ) -> List["Features"]: - """Gets all features for the attribute given the list of featurizers. - If no featurizers are provided, all available features will be considered. - Args: - attribute: message attribute - featurizers: names of featurizers to consider - Returns: - Features. - """ - sparse_features = self.get_sparse_features(attribute, featurizers) - dense_features = self.get_dense_features(attribute, featurizers) - - return [f for f in sparse_features + dense_features if f is not None] - - def features_present( - self, attribute: Text, featurizers: Optional[List[Text]] = None - ) -> bool: - """Checks if there are any features present for the attribute and featurizers. - If no featurizers are provided, all available features will be considered. - Args: - attribute: Message attribute. - featurizers: Names of featurizers to consider. - Returns: - ``True``, if features are present, ``False`` otherwise. - """ - if featurizers is None: - featurizers = [] - - ( - sequence_sparse_features, - sentence_sparse_features, - ) = self._filter_sparse_features(attribute, featurizers) - sequence_dense_features, sentence_dense_features = self._filter_dense_features( - attribute, featurizers - ) - - return ( - len(sequence_sparse_features) > 0 - or len(sentence_sparse_features) > 0 - or len(sequence_dense_features) > 0 - or len(sentence_dense_features) > 0 - ) - - def _filter_dense_features( - self, attribute: Text, featurizers: List[Text] - ) -> Tuple[List["Features"], List["Features"]]: - sentence_features = [ - f - for f in self.features - if f.attribute == attribute - and f.is_dense() - and f.type == FEATURE_TYPE_SENTENCE - and (f.origin in featurizers or not featurizers) - ] - sequence_features = [ - f - for f in self.features - if f.attribute == attribute - and f.is_dense() - and f.type == FEATURE_TYPE_SEQUENCE - and (f.origin in featurizers or not featurizers) - ] - return sequence_features, sentence_features - - def _filter_sparse_features( - self, attribute: Text, featurizers: List[Text] - ) -> Tuple[List["Features"], List["Features"]]: - sentence_features = [ - f - for f in self.features - if f.attribute == attribute - and f.is_sparse() - and f.type == FEATURE_TYPE_SENTENCE - and (f.origin in featurizers or not featurizers) - ] - sequence_features = [ - f - for f in self.features - if f.attribute == attribute - and f.is_sparse() - and f.type == FEATURE_TYPE_SEQUENCE - and (f.origin in featurizers or not featurizers) - ] - - return sequence_features, sentence_features - - @staticmethod - def _combine_features( - features: List["Features"], featurizers: Optional[List[Text]] = None - ) -> Optional["Features"]: - combined_features = None - - for f in features: - if combined_features is None: - combined_features = copy.deepcopy(f) - combined_features.origin = featurizers - else: - combined_features.combine_with_features(f) - - return combined_features - - def is_core_or_domain_message(self) -> bool: - """Checks whether the message is a core message or from the domain. - E.g. a core message is created from a story or a domain action, - not from the NLU data. - Returns: - True, if message is a core or domain message, false otherwise. - """ - return bool( - self.data.get(ACTION_NAME) - or self.data.get(ACTION_TEXT) - or ( - (self.data.get(INTENT) or self.data.get(RESPONSE)) - and not self.data.get(TEXT) - ) - or ( - self.data.get(TEXT) - and not (self.data.get(INTENT) or self.data.get(RESPONSE)) - ) - ) - - def is_e2e_message(self) -> bool: - """Checks whether the message came from an e2e story. - Returns: - `True`, if message is a from an e2e story, `False` otherwise. - """ - return bool( - (self.get(ACTION_TEXT) and not self.get(ACTION_NAME)) - or (self.get(TEXT) and not self.get(INTENT)) - ) - - def find_overlapping_entities( - self, - ) -> List[Tuple[Dict[Text, Any], Dict[Text, Any]]]: - """Finds any overlapping entity annotations.""" - entities = self.get("entities", [])[:] - entities_with_location = [ - e - for e in entities - if (ENTITY_ATTRIBUTE_START in e.keys() and ENTITY_ATTRIBUTE_END in e.keys()) - ] - entities_with_location.sort(key=lambda e: e[ENTITY_ATTRIBUTE_START]) - overlapping_pairs: List[Tuple[Dict[Text, Any], Dict[Text, Any]]] = [] - for i, entity in enumerate(entities_with_location): - for other_entity in entities_with_location[i + 1 :]: - if other_entity[ENTITY_ATTRIBUTE_START] < entity[ENTITY_ATTRIBUTE_END]: - overlapping_pairs.append((entity, other_entity)) - else: - break - return overlapping_pairs +from typing import Any, Optional, Tuple, Text, Dict, Set, List + +import typing +import copy + +import rasa.shared.utils.io +from rasa.shared.exceptions import RasaException +from rasa.shared.nlu.constants import ( + TEXT, + INTENT, + RESPONSE, + INTENT_RESPONSE_KEY, + METADATA, + METADATA_INTENT, + METADATA_EXAMPLE, + ENTITIES, + ENTITY_ATTRIBUTE_START, + ENTITY_ATTRIBUTE_END, + RESPONSE_IDENTIFIER_DELIMITER, + FEATURE_TYPE_SENTENCE, + FEATURE_TYPE_SEQUENCE, + ACTION_TEXT, + ACTION_NAME, +) +from rasa.shared.constants import DIAGNOSTIC_DATA + +if typing.TYPE_CHECKING: + from rasa.shared.nlu.training_data.features import Features + + +class Message: + def __init__( + self, + data: Optional[Dict[Text, Any]] = None, + output_properties: Optional[Set] = None, + time: Optional[int] = None, + features: Optional[List["Features"]] = None, + **kwargs: Any, + ) -> None: + self.time = time + self.data = data.copy() if data else {} + self.features = features if features else [] + + self.data.update(**kwargs) + + if output_properties: + self.output_properties = output_properties + else: + self.output_properties = set() + self.output_properties.add(TEXT) + + def add_features(self, features: Optional["Features"]) -> None: + if features is not None: + self.features.append(features) + + def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None: + """Adds diagnostic data from the `origin` component. + Args: + origin: Name of the component that created the data. + data: The diagnostic data. + """ + if origin in self.get(DIAGNOSTIC_DATA, {}): + rasa.shared.utils.io.raise_warning( + f"Please make sure every pipeline component has a distinct name. " + f"The name '{origin}' appears at least twice and diagnostic " + f"data will be overwritten." + ) + self.data.setdefault(DIAGNOSTIC_DATA, {}) + self.data[DIAGNOSTIC_DATA][origin] = data + + def set(self, prop: Text, info: Any, add_to_output: bool = False) -> None: + """Sets the message's property to the given value. + Args: + prop: Name of the property to be set. + info: Value to be assigned to that property. + add_to_output: Decides whether to add `prop` to the `output_properties`. + """ + self.data[prop] = info + if add_to_output: + self.output_properties.add(prop) + + def get(self, prop: Text, default: Optional[Any] = None) -> Any: + return self.data.get(prop, default) + + def as_dict_nlu(self) -> dict: + """Get dict representation of message as it would appear in training data""" + + dictionary_message = self.as_dict() + if dictionary_message.get(INTENT, None): + dictionary_message[INTENT] = self.get_full_intent() + dictionary_message.pop(RESPONSE, None) + dictionary_message.pop(INTENT_RESPONSE_KEY, None) + return dictionary_message + + def as_dict(self, only_output_properties: bool = False) -> Dict: + if only_output_properties: + dictionary_data = { + key: value + for key, value in self.data.items() + if key in self.output_properties + } + else: + dictionary_data = self.data + + # Filter all keys with None value. These could have come while building the + # Message object in markdown format + return { + key: value for key, value in dictionary_data.items() if value is not None + } + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Message): + return False + else: + return other.fingerprint() == self.fingerprint() + + def __hash__(self) -> int: + """Calculate a hash for the message. + Returns: + Hash of the message. + """ + return int(self.fingerprint(), 16) + + def fingerprint(self) -> Text: + """Calculate a string fingerprint for the message. + Returns: + Fingerprint of the message. + """ + return rasa.shared.utils.io.deep_container_fingerprint(self.data) + + @classmethod + def build( + cls, + text: Text, + intent: Optional[Text] = None, + entities: Optional[List[Dict[Text, Any]]] = None, + intent_metadata: Optional[Any] = None, + example_metadata: Optional[Any] = None, + **kwargs: Any, + ) -> "Message": + """Builds a Message from `UserUttered` data. + Args: + text: text of a user's utterance + intent: an intent of the user utterance + entities: entities in the user's utterance + intent_metadata: optional metadata for the intent + example_metadata: optional metadata for the intent example + Returns: + Message + """ + data: Dict[Text, Any] = {TEXT: text} + if intent: + split_intent, response_key = cls.separate_intent_response_key(intent) + if split_intent: + data[INTENT] = split_intent + if response_key: + # intent label can be of the form - {intent}/{response_key}, + # so store the full intent label in intent_response_key + data[INTENT_RESPONSE_KEY] = intent + if entities: + data[ENTITIES] = entities + if intent_metadata is not None: + data[METADATA] = {METADATA_INTENT: intent_metadata} + if example_metadata is not None: + data.setdefault(METADATA, {})[METADATA_EXAMPLE] = example_metadata + + return cls(data, **kwargs) + + def get_full_intent(self) -> Text: + """Get intent as it appears in training data""" + + return ( + self.get(INTENT_RESPONSE_KEY) + if self.get(INTENT_RESPONSE_KEY) + else self.get(INTENT) + ) + + def get_combined_intent_response_key(self) -> Text: + """Get intent as it appears in training data.""" + rasa.shared.utils.io.raise_warning( + "`get_combined_intent_response_key` is deprecated and " + "will be removed in Rasa 3.0.0. " + "Please use `get_full_intent` instead.", + category=DeprecationWarning, + ) + return self.get_full_intent() + + @staticmethod + def separate_intent_response_key( + original_intent: Text, + ) -> Tuple[Text, Optional[Text]]: + + split_title = original_intent.split(RESPONSE_IDENTIFIER_DELIMITER) + if len(split_title) == 2: + return split_title[0], split_title[1] + elif len(split_title) == 1: + return split_title[0], None + + raise RasaException( + f"Intent name '{original_intent}' is invalid, " + f"it cannot contain more than one '{RESPONSE_IDENTIFIER_DELIMITER}'." + ) + + def get_sparse_features( + self, attribute: Text, featurizers: Optional[List[Text]] = None + ) -> Tuple[Optional["Features"], Optional["Features"]]: + """Gets all sparse features for the attribute given the list of featurizers. + If no featurizers are provided, all available features will be considered. + Args: + attribute: message attribute + featurizers: names of featurizers to consider + Returns: + Sparse features. + """ + if featurizers is None: + featurizers = [] + + sequence_features, sentence_features = self._filter_sparse_features( + attribute, featurizers + ) + + sequence_features = self._combine_features(sequence_features, featurizers) + sentence_features = self._combine_features(sentence_features, featurizers) + + return sequence_features, sentence_features + + def get_dense_features( + self, attribute: Text, featurizers: Optional[List[Text]] = None + ) -> Tuple[Optional["Features"], Optional["Features"]]: + """Gets all dense features for the attribute given the list of featurizers. + If no featurizers are provided, all available features will be considered. + Args: + attribute: message attribute + featurizers: names of featurizers to consider + Returns: + Dense features. + """ + if featurizers is None: + featurizers = [] + + sequence_features, sentence_features = self._filter_dense_features( + attribute, featurizers + ) + + sequence_features = self._combine_features(sequence_features, featurizers) + sentence_features = self._combine_features(sentence_features, featurizers) + + return sequence_features, sentence_features + + def get_all_features( + self, attribute: Text, featurizers: Optional[List[Text]] = None + ) -> List["Features"]: + """Gets all features for the attribute given the list of featurizers. + If no featurizers are provided, all available features will be considered. + Args: + attribute: message attribute + featurizers: names of featurizers to consider + Returns: + Features. + """ + sparse_features = self.get_sparse_features(attribute, featurizers) + dense_features = self.get_dense_features(attribute, featurizers) + + return [f for f in sparse_features + dense_features if f is not None] + + def features_present( + self, attribute: Text, featurizers: Optional[List[Text]] = None + ) -> bool: + """Checks if there are any features present for the attribute and featurizers. + If no featurizers are provided, all available features will be considered. + Args: + attribute: Message attribute. + featurizers: Names of featurizers to consider. + Returns: + ``True``, if features are present, ``False`` otherwise. + """ + if featurizers is None: + featurizers = [] + + ( + sequence_sparse_features, + sentence_sparse_features, + ) = self._filter_sparse_features(attribute, featurizers) + sequence_dense_features, sentence_dense_features = self._filter_dense_features( + attribute, featurizers + ) + + return ( + len(sequence_sparse_features) > 0 + or len(sentence_sparse_features) > 0 + or len(sequence_dense_features) > 0 + or len(sentence_dense_features) > 0 + ) + + def _filter_dense_features( + self, attribute: Text, featurizers: List[Text] + ) -> Tuple[List["Features"], List["Features"]]: + sentence_features = [ + f + for f in self.features + if f.attribute == attribute + and f.is_dense() + and f.type == FEATURE_TYPE_SENTENCE + and (f.origin in featurizers or not featurizers) + ] + sequence_features = [ + f + for f in self.features + if f.attribute == attribute + and f.is_dense() + and f.type == FEATURE_TYPE_SEQUENCE + and (f.origin in featurizers or not featurizers) + ] + return sequence_features, sentence_features + + def _filter_sparse_features( + self, attribute: Text, featurizers: List[Text] + ) -> Tuple[List["Features"], List["Features"]]: + sentence_features = [ + f + for f in self.features + if f.attribute == attribute + and f.is_sparse() + and f.type == FEATURE_TYPE_SENTENCE + and (f.origin in featurizers or not featurizers) + ] + sequence_features = [ + f + for f in self.features + if f.attribute == attribute + and f.is_sparse() + and f.type == FEATURE_TYPE_SEQUENCE + and (f.origin in featurizers or not featurizers) + ] + + return sequence_features, sentence_features + + @staticmethod + def _combine_features( + features: List["Features"], featurizers: Optional[List[Text]] = None + ) -> Optional["Features"]: + combined_features = None + + for f in features: + if combined_features is None: + combined_features = copy.deepcopy(f) + combined_features.origin = featurizers + else: + combined_features.combine_with_features(f) + + return combined_features + + def is_core_or_domain_message(self) -> bool: + """Checks whether the message is a core message or from the domain. + E.g. a core message is created from a story or a domain action, + not from the NLU data. + Returns: + True, if message is a core or domain message, false otherwise. + """ + return bool( + self.data.get(ACTION_NAME) + or self.data.get(ACTION_TEXT) + or ( + (self.data.get(INTENT) or self.data.get(RESPONSE)) + and not self.data.get(TEXT) + ) + or ( + self.data.get(TEXT) + and not (self.data.get(INTENT) or self.data.get(RESPONSE)) + ) + ) + + def is_e2e_message(self) -> bool: + """Checks whether the message came from an e2e story. + Returns: + `True`, if message is a from an e2e story, `False` otherwise. + """ + return bool( + (self.get(ACTION_TEXT) and not self.get(ACTION_NAME)) + or (self.get(TEXT) and not self.get(INTENT)) + ) + + def find_overlapping_entities( + self, + ) -> List[Tuple[Dict[Text, Any], Dict[Text, Any]]]: + """Finds any overlapping entity annotations.""" + entities = self.get("entities", [])[:] + entities_with_location = [ + e + for e in entities + if (ENTITY_ATTRIBUTE_START in e.keys() and ENTITY_ATTRIBUTE_END in e.keys()) + ] + entities_with_location.sort(key=lambda e: e[ENTITY_ATTRIBUTE_START]) + overlapping_pairs: List[Tuple[Dict[Text, Any], Dict[Text, Any]]] = [] + for i, entity in enumerate(entities_with_location): + for other_entity in entities_with_location[i + 1 :]: + if other_entity[ENTITY_ATTRIBUTE_START] < entity[ENTITY_ATTRIBUTE_END]: + overlapping_pairs.append((entity, other_entity)) + else: + break + return overlapping_pairs From 2f4392fc581ccee67377255dab2c6f203475325d Mon Sep 17 00:00:00 2001 From: joao vitor silva Date: Thu, 22 Apr 2021 10:15:07 -0300 Subject: [PATCH 09/12] Add zone identifier --- rasa/shared/core/slots.py:Zone.Identifier | 3 +++ .../shared/core/training_data/visualization.py:Zone.Identifier | 3 +++ rasa/shared/nlu/training_data/message.py:Zone.Identifier | 3 +++ 3 files changed, 9 insertions(+) create mode 100644 rasa/shared/core/slots.py:Zone.Identifier create mode 100644 rasa/shared/core/training_data/visualization.py:Zone.Identifier create mode 100644 rasa/shared/nlu/training_data/message.py:Zone.Identifier diff --git a/rasa/shared/core/slots.py:Zone.Identifier b/rasa/shared/core/slots.py:Zone.Identifier new file mode 100644 index 000000000000..053d1127c2fb --- /dev/null +++ b/rasa/shared/core/slots.py:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=about:internet diff --git a/rasa/shared/core/training_data/visualization.py:Zone.Identifier b/rasa/shared/core/training_data/visualization.py:Zone.Identifier new file mode 100644 index 000000000000..053d1127c2fb --- /dev/null +++ b/rasa/shared/core/training_data/visualization.py:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=about:internet diff --git a/rasa/shared/nlu/training_data/message.py:Zone.Identifier b/rasa/shared/nlu/training_data/message.py:Zone.Identifier new file mode 100644 index 000000000000..053d1127c2fb --- /dev/null +++ b/rasa/shared/nlu/training_data/message.py:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=about:internet From 84be66446b1ae08b719987bfe95012db627fa511 Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 10:20:31 -0300 Subject: [PATCH 10/12] delete message zone Identifier --- rasa/shared/nlu/training_data/message.py:Zone.Identifier | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 rasa/shared/nlu/training_data/message.py:Zone.Identifier diff --git a/rasa/shared/nlu/training_data/message.py:Zone.Identifier b/rasa/shared/nlu/training_data/message.py:Zone.Identifier deleted file mode 100644 index 053d1127c2fb..000000000000 --- a/rasa/shared/nlu/training_data/message.py:Zone.Identifier +++ /dev/null @@ -1,3 +0,0 @@ -[ZoneTransfer] -ZoneId=3 -HostUrl=about:internet From 44fe82b687ba316a7cd3dc7e3000c74fcc300a25 Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 10:21:18 -0300 Subject: [PATCH 11/12] delete slots zone Identifier --- rasa/shared/core/slots.py:Zone.Identifier | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 rasa/shared/core/slots.py:Zone.Identifier diff --git a/rasa/shared/core/slots.py:Zone.Identifier b/rasa/shared/core/slots.py:Zone.Identifier deleted file mode 100644 index 053d1127c2fb..000000000000 --- a/rasa/shared/core/slots.py:Zone.Identifier +++ /dev/null @@ -1,3 +0,0 @@ -[ZoneTransfer] -ZoneId=3 -HostUrl=about:internet From 9b73002b258b3285b9e0646264ea24e32d633b87 Mon Sep 17 00:00:00 2001 From: Rafael Teodosio Date: Thu, 22 Apr 2021 10:21:48 -0300 Subject: [PATCH 12/12] delete visualization zone Identifier --- .../shared/core/training_data/visualization.py:Zone.Identifier | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 rasa/shared/core/training_data/visualization.py:Zone.Identifier diff --git a/rasa/shared/core/training_data/visualization.py:Zone.Identifier b/rasa/shared/core/training_data/visualization.py:Zone.Identifier deleted file mode 100644 index 053d1127c2fb..000000000000 --- a/rasa/shared/core/training_data/visualization.py:Zone.Identifier +++ /dev/null @@ -1,3 +0,0 @@ -[ZoneTransfer] -ZoneId=3 -HostUrl=about:internet