diff --git a/examples/e2ebot/data/stories.yml b/examples/e2ebot/data/stories.yml index bf884abf1856..cab5ea7113ca 100644 --- a/examples/e2ebot/data/stories.yml +++ b/examples/e2ebot/data/stories.yml @@ -10,7 +10,7 @@ stories: - story: sad path (text to text) steps: - - user: "Hello" + - user: "[Hello](bla)" - bot: "Welcome to moodbot. How are you feeling today?" - user: "Horrible" - bot: "Oh no! Here is a kitten photo. Did it help?" diff --git a/examples/e2ebot/domain.yml b/examples/e2ebot/domain.yml index 17b3faba2a75..5c35c3d83a7b 100644 --- a/examples/e2ebot/domain.yml +++ b/examples/e2ebot/domain.yml @@ -1,5 +1,5 @@ version: "2.0" - +# TODO create a bot that makes sense actions: - utter_greet - utter_happy @@ -9,3 +9,6 @@ actions: intents: - greet - mood_great + +entities: + - bla diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 3b86d4962cd3..e702134a0bf6 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -1,10 +1,11 @@ import logging import numpy as np import scipy.sparse -from typing import List, Optional, Dict, Text, Set +from typing import List, Optional, Dict, Text, Set, Any from collections import defaultdict import rasa.shared.utils.io +from rasa.nlu.constants import TOKENS_NAMES from rasa.shared.core.domain import SubState, State, Domain from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter from rasa.shared.core.constants import PREVIOUS_ACTION, ACTIVE_LOOP, USER, SLOTS @@ -16,9 +17,15 @@ ACTION_TEXT, ACTION_NAME, INTENT, + TEXT, + NO_ENTITY_TAG, + ENTITY_ATTRIBUTE_TYPE, + ENTITY_TAGS, ) from rasa.shared.nlu.training_data.features import Features from rasa.shared.nlu.training_data.message import Message +from rasa.utils.tensorflow.model_data_utils import TAG_ID_ORIGIN +from rasa.utils.tensorflow.constants import IDS logger = logging.getLogger(__name__) @@ -36,6 +43,23 @@ class SingleStateFeaturizer: def __init__(self) -> None: self._default_feature_states = {} self.action_texts = [] + self.entity_tag_id_mapping = {} + + def get_entity_tag_ids(self) -> Dict[Text, int]: + """Returns the tag to index mapping for entities. + + Returns: + Tag to index mapping. + """ + if ENTITIES not in self._default_feature_states: + return {} + + tag_ids = { + tag: idx + 1 # +1 to keep 0 for the NO_ENTITY_TAG + for tag, idx in self._default_feature_states[ENTITIES].items() + } + tag_ids[NO_ENTITY_TAG] = 0 + return tag_ids def prepare_from_domain(self, domain: Domain) -> None: """Gets necessary information for featurization from domain. @@ -55,6 +79,7 @@ def convert_to_dict(feature_states: List[Text]) -> Dict[Text, int]: self._default_feature_states[SLOTS] = convert_to_dict(domain.slot_states) self._default_feature_states[ACTIVE_LOOP] = convert_to_dict(domain.form_names) self.action_texts = domain.action_texts + self.entity_tag_id_mapping = self.get_entity_tag_ids() def _state_features_for_attribute( self, sub_state: SubState, attribute: Text @@ -84,7 +109,7 @@ def _create_features( features = np.zeros(len(self._default_feature_states[attribute]), np.float32) for state_feature, value in state_features.items(): - # check that the value is in default_feature_states to be able to assigh + # check that the value is in default_feature_states to be able to assign # its value if state_feature in self._default_feature_states[attribute]: features[self._default_feature_states[attribute][state_feature]] = value @@ -215,6 +240,51 @@ def encode_state( return state_features + def encode_entities( + self, entity_data: Dict[Text, Any], interpreter: NaturalLanguageInterpreter + ) -> Dict[Text, List["Features"]]: + """Encode the given entity data with the help of the given interpreter. + + Produce numeric entity tags for tokens. + + Args: + entity_data: The dict containing the text and entity labels and locations + interpreter: The interpreter used to encode the state + + Returns: + A dictionary of entity type to list of features. + """ + from rasa.nlu.test import determine_token_labels + + # TODO + # The entity states used to create the tag-idx-mapping contains the + # entities and the concatenated entity and roles/groups. We do not + # distinguish between entities and roles/groups right now. + # TODO + # Should we support BILOU tagging? + + if TEXT not in entity_data or len(self.entity_tag_id_mapping) < 2: + # we cannot build a classifier if there are less than 2 class + return {} + + parsed_text = interpreter.featurize_message(Message({TEXT: entity_data[TEXT]})) + entities = entity_data.get(ENTITIES, []) + + _tags = [] + for token in parsed_text.get(TOKENS_NAMES[TEXT]): + _tag = determine_token_labels( + token, entities, attribute_key=ENTITY_ATTRIBUTE_TYPE + ) + # TODO handle if tag is not in mapping + _tags.append(self.entity_tag_id_mapping[_tag]) + + # transpose to have seq_len x 1 + return { + ENTITY_TAGS: [ + Features(np.array([_tags]).T, IDS, ENTITY_TAGS, TAG_ID_ORIGIN,) + ] + } + def _encode_action( self, action: Text, interpreter: NaturalLanguageInterpreter ) -> Dict[Text, List["Features"]]: diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py index 4f250c535ea8..d12b92a42e79 100644 --- a/rasa/core/featurizers/tracker_featurizers.py +++ b/rasa/core/featurizers/tracker_featurizers.py @@ -3,15 +3,15 @@ import jsonpickle import logging -from rasa.shared.nlu.constants import TEXT, INTENT +from rasa.shared.nlu.constants import TEXT, INTENT, ENTITIES from rasa.shared.exceptions import RasaException from tqdm import tqdm -from typing import Tuple, List, Optional, Dict, Text, Union +from typing import Tuple, List, Optional, Dict, Text, Union, Any import numpy as np from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer from rasa.shared.core.domain import State, Domain -from rasa.shared.core.events import ActionExecuted +from rasa.shared.core.events import ActionExecuted, UserUttered from rasa.shared.core.trackers import ( DialogueStateTracker, is_prev_action_listen_in_state, @@ -91,6 +91,43 @@ def _convert_labels_to_ids( ] ) + def _create_entity_tags( + self, + trackers_as_entities: List[List[Dict[Text, Any]]], + interpreter: NaturalLanguageInterpreter, + ) -> List[List[Dict[Text, List["Features"]]]]: + return [ + [ + self.state_featurizer.encode_entities(entity_data, interpreter) + for entity_data in trackers_entities + ] + for trackers_entities in trackers_as_entities + ] + + @staticmethod + def _entity_data(event: UserUttered) -> Dict[Text, Any]: + if event.text: + return {TEXT: event.text, ENTITIES: event.entities} + + # input is not textual, so add empty dict + return {} + + def training_states_actions_and_entities( + self, trackers: List[DialogueStateTracker], domain: Domain + ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: + """Transforms list of trackers to lists of states, actions and entity data. + + Args: + trackers: The trackers to transform + domain: The domain + + Returns: + A tuple of list of states, list of actions and list of entity data. + """ + raise NotImplementedError( + "Featurizer must have the capacity to encode trackers to feature vectors" + ) + def training_states_and_actions( self, trackers: List[DialogueStateTracker], domain: Domain ) -> Tuple[List[List[State]], List[List[Text]]]: @@ -103,16 +140,23 @@ def training_states_and_actions( Returns: A tuple of list of states and list of actions. """ - raise NotImplementedError( - "Featurizer must have the capacity to encode trackers to feature vectors" - ) + ( + trackers_as_states, + trackers_as_actions, + _, + ) = self.training_states_actions_and_entities(trackers, domain) + return trackers_as_states, trackers_as_actions def featurize_trackers( self, trackers: List[DialogueStateTracker], domain: Domain, interpreter: NaturalLanguageInterpreter, - ) -> Tuple[List[List[Dict[Text, List["Features"]]]], np.ndarray]: + ) -> Tuple[ + List[List[Dict[Text, List["Features"]]]], + np.ndarray, + List[List[Dict[Text, List["Features"]]]], + ]: """Featurize the training trackers. Args: @@ -137,14 +181,17 @@ def featurize_trackers( self.state_featurizer.prepare_from_domain(domain) - trackers_as_states, trackers_as_actions = self.training_states_and_actions( - trackers, domain - ) + ( + trackers_as_states, + trackers_as_actions, + trackers_as_entities, + ) = self.training_states_actions_and_entities(trackers, domain) tracker_state_features = self._featurize_states(trackers_as_states, interpreter) label_ids = self._convert_labels_to_ids(trackers_as_actions, domain) + entity_tags = self._create_entity_tags(trackers_as_entities, interpreter) - return tracker_state_features, label_ids + return tracker_state_features, label_ids, entity_tags @staticmethod def _choose_last_user_input( @@ -252,23 +299,22 @@ class FullDialogueTrackerFeaturizer(TrackerFeaturizer): Training data is padded up to the length of the longest dialogue with -1. """ - def training_states_and_actions( + def training_states_actions_and_entities( self, trackers: List[DialogueStateTracker], domain: Domain - ) -> Tuple[List[List[State]], List[List[Text]]]: - """Transforms list of trackers to lists of states and actions. - - Training data is padded up to the length of the longest dialogue with -1. + ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: + """Transforms list of trackers to lists of states, actions and entity data. Args: trackers: The trackers to transform domain: The domain Returns: - A tuple of list of states and list of actions. + A tuple of list of states, list of actions and list of entity data. """ trackers_as_states = [] trackers_as_actions = [] + trackers_as_entities = [] logger.debug( "Creating states and action examples from " @@ -285,7 +331,12 @@ def training_states_and_actions( delete_first_state = False actions = [] + entities = [] + entity_data = {} for event in tracker.applied_events(): + if isinstance(event, UserUttered): + entity_data = self._entity_data(event) + if not isinstance(event, ActionExecuted): continue @@ -293,6 +344,7 @@ def training_states_and_actions( # only actions which can be # predicted at a stories start actions.append(event.action_name or event.action_text) + entities.append(entity_data) else: # unpredictable actions can be # only the first in the story @@ -303,13 +355,17 @@ def training_states_and_actions( ) delete_first_state = True + # reset entity_data for the the next turn + entity_data = {} + if delete_first_state: states = states[1:] trackers_as_states.append(states[:-1]) trackers_as_actions.append(actions) + trackers_as_entities.append(entities) - return trackers_as_states, trackers_as_actions + return trackers_as_states, trackers_as_actions, trackers_as_entities def prediction_states( self, @@ -386,23 +442,22 @@ def _hash_example( frozen_actions = (action,) return hash((frozen_states, frozen_actions)) - def training_states_and_actions( + def training_states_actions_and_entities( self, trackers: List[DialogueStateTracker], domain: Domain - ) -> Tuple[List[List[State]], List[List[Text]]]: - """Transforms list of trackers to lists of states and actions. - - Training data is padded up to the length of the longest dialogue with -1. + ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: + """Transforms list of trackers to lists of states, actions and entity data. Args: trackers: The trackers to transform domain: The domain Returns: - A tuple of list of states and list of actions. + A tuple of list of states, list of actions and list of entity data. """ trackers_as_states = [] trackers_as_actions = [] + trackers_as_entities = [] # from multiple states that create equal featurizations # we only need to keep one. @@ -422,7 +477,11 @@ def training_states_and_actions( states = self._create_states(tracker, domain) states_length_for_action = 0 + entity_data = {} for event in tracker.applied_events(): + if isinstance(event, UserUttered): + entity_data = self._entity_data(event) + if not isinstance(event, ActionExecuted): continue @@ -448,15 +507,19 @@ def training_states_and_actions( trackers_as_actions.append( [event.action_name or event.action_text] ) + trackers_as_entities.append([entity_data]) else: trackers_as_states.append(sliced_states) trackers_as_actions.append([event.action_name or event.action_text]) + trackers_as_entities.append([entity_data]) + # reset entity_data for the the next turn + entity_data = {} pbar.set_postfix({"# actions": "{:d}".format(len(trackers_as_actions))}) logger.debug("Created {} action examples.".format(len(trackers_as_actions))) - return trackers_as_states, trackers_as_actions + return trackers_as_states, trackers_as_actions, trackers_as_entities def prediction_states( self, diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py index 0d5db5a6c01b..6517b8e3362b 100644 --- a/rasa/core/policies/policy.py +++ b/rasa/core/policies/policy.py @@ -143,7 +143,11 @@ def featurize_for_training( domain: Domain, interpreter: NaturalLanguageInterpreter, **kwargs: Any, - ) -> Tuple[List[List[Dict[Text, List["Features"]]]], np.ndarray]: + ) -> Tuple[ + List[List[Dict[Text, List["Features"]]]], + np.ndarray, + List[List[Dict[Text, List["Features"]]]], + ]: """Transform training trackers into a vector representation. The trackers, consisting of multiple turns, will be transformed @@ -163,7 +167,7 @@ def featurize_for_training( trackers """ - state_features, label_ids = self.featurizer.featurize_trackers( + state_features, label_ids, entity_tags = self.featurizer.featurize_trackers( training_trackers, domain, interpreter ) @@ -175,8 +179,9 @@ def featurize_for_training( ) state_features = state_features[:max_training_samples] label_ids = label_ids[:max_training_samples] + entity_tags = entity_tags[:max_training_samples] - return state_features, label_ids + return state_features, label_ids, entity_tags def train( self, diff --git a/rasa/core/policies/sklearn_policy.py b/rasa/core/policies/sklearn_policy.py index 93abf244d931..0126e60e15f7 100644 --- a/rasa/core/policies/sklearn_policy.py +++ b/rasa/core/policies/sklearn_policy.py @@ -233,7 +233,7 @@ def train( interpreter: NaturalLanguageInterpreter, **kwargs: Any, ) -> None: - tracker_state_features, label_ids = self.featurize_for_training( + tracker_state_features, label_ids, _ = self.featurize_for_training( training_trackers, domain, interpreter, **kwargs ) training_data, zero_state_features = model_data_utils.convert_to_data_format( diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 98f9c8cd3215..b228ea4ec2e8 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -11,6 +11,7 @@ from typing import Any, List, Optional, Text, Dict, Tuple, Union, TYPE_CHECKING import rasa.utils.io as io_utils +from rasa.nlu.classifiers.diet_classifier import EntityTagSpec from rasa.shared.core.domain import Domain from rasa.core.featurizers.tracker_featurizers import ( TrackerFeaturizer, @@ -26,6 +27,8 @@ ENTITIES, VALID_FEATURE_TYPES, FEATURE_TYPE_SENTENCE, + ENTITY_ATTRIBUTE_TYPE, + ENTITY_TAGS, ) from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter from rasa.core.policies.policy import Policy @@ -43,6 +46,7 @@ from rasa.utils.tensorflow.model_data_utils import convert_to_data_format from rasa.utils.tensorflow.constants import ( LABEL, + IDS, TRANSFORMER_SIZE, NUM_TRANSFORMER_LAYERS, NUM_HEADS, @@ -91,6 +95,7 @@ MASK, HIDDEN_LAYERS_SIZES, FEATURIZERS, + ENTITY_RECOGNITION, ) @@ -101,8 +106,9 @@ logger = logging.getLogger(__name__) LABEL_KEY = LABEL -LABEL_SUB_KEY = "ids" +LABEL_SUB_KEY = IDS LENGTH = "length" +INDICES = "indices" SENTENCE_FEATURES_TO_ENCODE = [INTENT, TEXT, ACTION_NAME, ACTION_TEXT] SEQUENCE_FEATURES_TO_ENCODE = [TEXT, ACTION_TEXT, f"{LABEL}_{ACTION_TEXT}"] LABEL_FEATURES_TO_ENCODE = [f"{LABEL}_{ACTION_NAME}", f"{LABEL}_{ACTION_TEXT}"] @@ -135,7 +141,6 @@ class TEDPolicy(Policy): # Hidden layer sizes for layers before the dialogue and label embedding layers. # The number of hidden layers is equal to the length of the corresponding # list. - # TODO add 2 parallel NNs: transformer for text and ffnn for names # Hidden layer sizes for layers before the embedding layers for user message # and labels. # The number of hidden layers is equal to the length of the corresponding @@ -154,10 +159,14 @@ class TEDPolicy(Policy): }, CONCAT_DIMENSION: {TEXT: 128, ACTION_TEXT: 128, f"{LABEL}_{ACTION_TEXT}": 128}, ENCODING_DIMENSION: 50, - # Number of units in transformer + # Number of units in sequence transformer TRANSFORMER_SIZE: 128, - # Number of transformer layers + # Number of sequence transformer layers NUM_TRANSFORMER_LAYERS: 1, + # Number of units in dialogue transformer + f"{DIALOGUE}_{TRANSFORMER_SIZE}": 128, + # Number of dialogue transformer layers + f"{DIALOGUE}_{NUM_TRANSFORMER_LAYERS}": 1, # Number of attention heads in transformer NUM_HEADS: 4, # If 'True' use key relative embeddings in attention @@ -250,6 +259,8 @@ class TEDPolicy(Policy): # Specify what features to use as sequence and sentence features. # By default all features in the pipeline are used. FEATURIZERS: [], + # If set to true, entities are predicted in user utterances. + ENTITY_RECOGNITION: True, } @staticmethod @@ -264,7 +275,8 @@ def __init__( priority: int = DEFAULT_POLICY_PRIORITY, max_history: Optional[int] = None, model: Optional[RasaModel] = None, - zero_state_features: Optional[Dict[Text, List["Features"]]] = None, + fake_features: Optional[Dict[Text, List["Features"]]] = None, + entity_tag_specs: Optional[List[EntityTagSpec]] = None, **kwargs: Any, ) -> None: """Declare instance variables with default values.""" @@ -282,7 +294,9 @@ def __init__( self.model = model - self.zero_state_features = zero_state_features or defaultdict(list) + self._entity_tag_specs = entity_tag_specs + + self.fake_features = fake_features or defaultdict(list) self._label_data: Optional[RasaModelData] = None self.data_example: Optional[Dict[Text, List[np.ndarray]]] = None @@ -296,6 +310,27 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self.config = train_utils.update_similarity_type(self.config) self.config = train_utils.update_evaluation_parameters(self.config) + def _create_entity_tag_specs(self) -> List[EntityTagSpec]: + """Create entity tag specifications with their respective tag id mappings.""" + + _tag_specs = [] + + tag_id_index_mapping = self.featurizer.state_featurizer.get_entity_tag_ids() + + if tag_id_index_mapping: + _tag_specs.append( + EntityTagSpec( + tag_name=ENTITY_ATTRIBUTE_TYPE, + tags_to_ids=tag_id_index_mapping, + ids_to_tags={ + value: key for key, value in tag_id_index_mapping.items() + }, + num_tags=len(tag_id_index_mapping), + ) + ) + + return _tag_specs + def _create_label_data( self, domain: Domain, interpreter: NaturalLanguageInterpreter ) -> Tuple[RasaModelData, List[Dict[Text, List["Features"]]]]: @@ -329,6 +364,7 @@ def _create_model_data( self, tracker_state_features: List[List[Dict[Text, List["Features"]]]], label_ids: Optional[np.ndarray] = None, + entity_tags: Optional[List[List[Dict[Text, List["Features"]]]]] = None, encoded_all_labels: Optional[List[Dict[Text, List["Features"]]]] = None, ) -> RasaModelData: """Combine all model related data into RasaModelData. @@ -346,7 +382,11 @@ def _create_model_data( """ model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY) - if label_ids is not None and encoded_all_labels is not None: + if ( + label_ids is not None + and entity_tags is not None + and encoded_all_labels is not None + ): label_ids = np.array( [np.expand_dims(seq_label_ids, -1) for seq_label_ids in label_ids] @@ -357,14 +397,27 @@ def _create_model_data( [FeatureArray(label_ids, number_of_dimensions=3)], ) - attribute_data, self.zero_state_features = convert_to_data_format( + attribute_data, self.fake_features = convert_to_data_format( tracker_state_features, featurizers=self.config[FEATURIZERS] ) + if self.config[ENTITY_RECOGNITION]: + # check that there are real entity tags + if any([any(turn_tags) for turn_tags in entity_tags]): + entity_tags_data, _ = convert_to_data_format(entity_tags) + model_data.add_data(entity_tags_data) + else: + # there are no "real" entity tags + logger.debug( + f"Entity recognition cannot be performed," + f"set {ENTITY_RECOGNITION} to False" + ) + self.config[ENTITY_RECOGNITION] = False + else: # method is called during prediction attribute_data, _ = convert_to_data_format( tracker_state_features, - self.zero_state_features, + self.fake_features, featurizers=self.config[FEATURIZERS], ) @@ -384,6 +437,9 @@ def _create_model_data( FeatureArray(dialogue_lengths, number_of_dimensions=1) ] + # make sure all keys are in the same order during training and prediction + model_data.sort() + return model_data def train( @@ -403,7 +459,7 @@ def train( return # dealing with training data - tracker_state_features, label_ids = self.featurize_for_training( + tracker_state_features, label_ids, entity_tags = self.featurize_for_training( training_trackers, domain, interpreter, **kwargs ) @@ -413,7 +469,7 @@ def train( # extract actual training data to feed to model model_data = self._create_model_data( - tracker_state_features, label_ids, encoded_all_labels + tracker_state_features, label_ids, entity_tags, encoded_all_labels ) if model_data.is_empty(): logger.error( @@ -422,6 +478,9 @@ def train( ) return + if self.config[ENTITY_RECOGNITION]: + self._entity_tag_specs = self._create_entity_tag_specs() + # keep one example for persisting and loading self.data_example = model_data.first_data_example() @@ -430,6 +489,7 @@ def train( self.config, isinstance(self.featurizer, MaxHistoryTrackerFeaturizer), self._label_data, + self._entity_tag_specs, ) self.model.fit( @@ -458,7 +518,7 @@ def predict_action_probabilities( # create model data from tracker tracker_state_features = [] if ( - INTENT in self.zero_state_features + INTENT in self.fake_features or not tracker.latest_action_name == ACTION_LISTEN_NAME ): # the first example in a batch uses intent @@ -467,7 +527,7 @@ def predict_action_probabilities( [tracker], domain, interpreter, use_text_for_last_user_input=False ) if ( - TEXT in self.zero_state_features + TEXT in self.fake_features and tracker.latest_action_name == ACTION_LISTEN_NAME ): # the second - text, but only after user utterance @@ -489,6 +549,7 @@ def predict_action_probabilities( if ( len(tracker_state_features) == 2 and np.max(confidences[1]) > self.config[E2E_CONFIDENCE_THRESHOLD] + # TODO maybe compare confidences is better and np.max(similarities[1]) > np.max(similarities[0]) ): batch_index = 1 @@ -499,7 +560,7 @@ def predict_action_probabilities( else: # only one tracker present batch_index = 0 if tracker.latest_action_name == ACTION_LISTEN_NAME: - if TEXT in self.zero_state_features: + if TEXT in self.fake_features: is_e2e_prediction = True else: is_e2e_prediction = False @@ -547,14 +608,24 @@ def persist(self, path: Union[Text, Path]) -> None: model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl", self.data_example ) io_utils.pickle_dump( - model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl", - self.zero_state_features, + model_path / f"{SAVE_MODEL_FILE_NAME}.fake_features.pkl", + self.fake_features, ) io_utils.pickle_dump( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl", dict(self._label_data.data), ) + entity_tag_specs = ( + [tag_spec._asdict() for tag_spec in self._entity_tag_specs] + if self._entity_tag_specs + else [] + ) + rasa.shared.utils.io.dump_obj_as_json_to_file( + model_path / f"{SAVE_MODEL_FILE_NAME}.entity_tag_specs.json", + entity_tag_specs, + ) + @classmethod def load(cls, path: Union[Text, Path]) -> "TEDPolicy": """Loads a policy from the storage. @@ -581,14 +652,30 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": label_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl" ) - zero_state_features = io_utils.pickle_load( - model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl" + fake_features = io_utils.pickle_load( + model_path / f"{SAVE_MODEL_FILE_NAME}.fake_features.pkl" ) label_data = RasaModelData(data=label_data) meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl") priority = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl" ) + entity_tag_specs = rasa.shared.utils.io.read_json_file( + model_path / f"{SAVE_MODEL_FILE_NAME}.entity_tag_specs.json" + ) + entity_tag_specs = [ + EntityTagSpec( + tag_name=tag_spec["tag_name"], + ids_to_tags={ + int(key): value for key, value in tag_spec["ids_to_tags"].items() + }, + tags_to_ids={ + key: int(value) for key, value in tag_spec["tags_to_ids"].items() + }, + num_tags=tag_spec["num_tags"], + ) + for tag_spec in entity_tag_specs + ] model_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data @@ -600,10 +687,11 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": model_data_example, data_signature=model_data_example.get_signature(), config=meta, - max_history_tracker_featurizer_used=isinstance( - featurizer, MaxHistoryTrackerFeaturizer - ), + # during prediction we don't care about previous dialogue turns, + # so to save computation time, use only the last one + use_only_last_dialogue_turns=True, label_data=label_data, + entity_tag_specs=entity_tag_specs, ) # build the graph for prediction @@ -624,7 +712,8 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": featurizer=featurizer, priority=priority, model=model, - zero_state_features=zero_state_features, + fake_features=fake_features, + entity_tag_specs=entity_tag_specs, **meta, ) @@ -634,12 +723,13 @@ def __init__( self, data_signature: Dict[Text, Dict[Text, List[FeatureSignature]]], config: Dict[Text, Any], - max_history_tracker_featurizer_used: bool, + use_only_last_dialogue_turns: bool, label_data: RasaModelData, + entity_tag_specs: Optional[List[EntityTagSpec]], ) -> None: super().__init__("TED", config, data_signature, label_data) - self.max_history_tracker_featurizer_used = max_history_tracker_featurizer_used + self.use_only_last_dialogue_turns = use_only_last_dialogue_turns self.predict_data_signature = { feature_name: features @@ -648,13 +738,19 @@ def __init__( in STATE_LEVEL_FEATURES + SENTENCE_FEATURES_TO_ENCODE + [DIALOGUE] } + self._entity_tag_specs = entity_tag_specs + # optimizer self.optimizer = tf.keras.optimizers.Adam() # metrics self.action_loss = tf.keras.metrics.Mean(name="loss") self.action_acc = tf.keras.metrics.Mean(name="acc") + self.entity_loss = tf.keras.metrics.Mean(name="e_loss") + self.entity_f1 = tf.keras.metrics.Mean(name="e_f1") self.metrics_to_log += ["loss", "acc"] + if self.config[ENTITY_RECOGNITION]: + self.metrics_to_log += ["e_loss", "e_f1"] # needed for efficient prediction self.all_labels_embed: Optional[tf.Tensor] = None @@ -681,6 +777,8 @@ def _check_data(self) -> None: f"Cannot train '{self.__class__.__name__}' model." ) + # ---CREATING LAYERS HELPERS--- + def _prepare_layers(self) -> None: for name in self.data_signature.keys(): self._prepare_sparse_dense_layer_for(name, self.data_signature) @@ -695,7 +793,11 @@ def _prepare_layers(self) -> None: self._prepare_encoding_layers(name) self._prepare_transformer_layer( - DIALOGUE, self.config[DROP_RATE_DIALOGUE], self.config[DROP_RATE_ATTENTION] + DIALOGUE, + self.config[f"{DIALOGUE}_{NUM_TRANSFORMER_LAYERS}"], + self.config[f"{DIALOGUE}_{TRANSFORMER_SIZE}"], + self.config[DROP_RATE_DIALOGUE], + self.config[DROP_RATE_ATTENTION], ) self._prepare_embed_layers(DIALOGUE) @@ -703,6 +805,9 @@ def _prepare_layers(self) -> None: self._prepare_dot_product_loss(LABEL, self.config[SCALE_LOSS]) + if self.config[ENTITY_RECOGNITION]: + self._prepare_entity_recognition_layers() + def _prepare_sparse_dense_layer_for( self, name: Text, signature: Dict[Text, Dict[Text, List[FeatureSignature]]] ) -> None: @@ -757,16 +862,39 @@ def _prepare_encoding_layers(self, name: Text) -> None: f"{name}", [self.config[ENCODING_DIMENSION]], self.config[DROP_RATE_DIALOGUE], + prefix="encoding_layer", ) + # ---GRAPH BUILDING HELPERS--- + + @staticmethod + def _compute_dialogue_indices( + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] + ) -> None: + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32) + # wrap in a list, because that's the structure of tf_batch_data + tf_batch_data[DIALOGUE][INDICES] = [ + ( + tf.map_fn( + tf.range, + dialogue_lengths, + fn_output_signature=tf.RaggedTensorSpec( + shape=[None], dtype=tf.int32 + ), + ) + ).values + ] + def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: all_label_ids = self.tf_label_data[LABEL_KEY][LABEL_SUB_KEY][0] # labels cannot have all features "fake" - all_labels_encoded = { - key: self._encode_real_features_per_attribute(self.tf_label_data, key) - for key in self.tf_label_data.keys() - if key != LABEL_KEY - } + all_labels_encoded = {} + for key in self.tf_label_data.keys(): + if key != LABEL_KEY: + attribute_features, _, _ = self._encode_real_features_per_attribute( + self.tf_label_data, key + ) + all_labels_encoded[key] = attribute_features if ( all_labels_encoded.get(f"{LABEL_KEY}_{ACTION_TEXT}") is not None @@ -788,11 +916,11 @@ def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: return all_label_ids, all_labels_embed - def _emebed_dialogue( + def _embed_dialogue( self, dialogue_in: tf.Tensor, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], - ) -> Tuple[tf.Tensor, tf.Tensor]: + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """Create dialogue level embedding and mask.""" dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) mask = self._compute_mask(dialogue_lengths) @@ -802,7 +930,7 @@ def _emebed_dialogue( ) dialogue_transformed = tfa.activations.gelu(dialogue_transformed) - if self.max_history_tracker_featurizer_used: + if self.use_only_last_dialogue_turns: # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( self._last_token(dialogue_transformed, dialogue_lengths), 1 @@ -811,11 +939,11 @@ def _emebed_dialogue( dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed) - return dialogue_embed, mask + return dialogue_embed, mask, dialogue_transformed def _encode_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> tf.Tensor: + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: # The input is a representation of 4d tensor of # shape (batch-size x dialogue-len x sequence-len x units) in 3d of shape # (sum of dialogue history length for all tensors in the batch x @@ -839,30 +967,131 @@ def _encode_features_per_attribute( lambda: self._encode_fake_features_per_attribute(tf_batch_data, attribute), ) + def _get_dense_units( + self, attribute_features_list: List[tf.Tensor], attribute: Text + ) -> int: + # TODO this should be done in corresponding layers once in init + units = 0 + for f in attribute_features_list: + if isinstance(f, tf.SparseTensor): + units += self.config[DENSE_DIMENSION][attribute] + else: + units += f.shape[-1] + return units + + def _get_concat_units( + self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text + ) -> int: + # TODO this should be done in corresponding layers once in init + # calculate concat sequence sentence dim + sentence_units = self._get_dense_units( + tf_batch_data[attribute][SENTENCE], attribute + ) + sequence_units = self._get_dense_units( + tf_batch_data[attribute][SEQUENCE], attribute + ) + + if sequence_units and not sentence_units: + return sequence_units + + if sentence_units and not sequence_units: + return sentence_units + + if sentence_units != sequence_units: + return self.config[CONCAT_DIMENSION][TEXT] + + return sentence_units + def _encode_fake_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> tf.Tensor: - attribute_features_list = tf_batch_data[attribute][SENTENCE] + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + # we need to create real zero tensors with appropriate batch and dialogue dim + # because they are passed to dialogue transformer attribute_mask = tf_batch_data[attribute][MASK][0] batch_dim = tf.shape(attribute_mask)[0] dialogue_dim = tf.shape(attribute_mask)[1] - if attribute in set(SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE): units = self.config[ENCODING_DIMENSION] else: - units = 0 - for f in attribute_features_list: - if isinstance(f, tf.SparseTensor): - units += self.config[DENSE_DIMENSION][attribute] - else: - units += f.shape[-1] + units = self._get_dense_units(tf_batch_data[attribute][SENTENCE], attribute) + + attribute_features = tf.zeros( + (batch_dim, dialogue_dim, units), dtype=tf.float32 + ) + if attribute == TEXT: + # if the input features are fake, we don't process them further, + # but we need to calculate correct last dim (units) so that tf could infer + # the last shape of the tensors + if self.config[f"{DIALOGUE}_{NUM_TRANSFORMER_LAYERS}"] > 0: + text_transformer_units = self.config[f"{DIALOGUE}_{TRANSFORMER_SIZE}"] + elif self.config[HIDDEN_LAYERS_SIZES][TEXT]: + text_transformer_units = self.config[HIDDEN_LAYERS_SIZES][TEXT][-1] + else: + text_transformer_units = self._get_concat_units( + tf_batch_data, attribute + ) + + text_transformer_output = tf.zeros( + (0, 0, text_transformer_units), dtype=tf.float32 + ) + text_sequence_lengths = tf.zeros((0, 1), dtype=tf.int32) + else: + # simulate None with empty tensor of zeros + text_transformer_output = tf.zeros((0,)) + text_sequence_lengths = tf.zeros((0,)) + + return attribute_features, text_transformer_output, text_sequence_lengths - return tf.zeros((batch_dim, dialogue_dim, units), dtype=tf.float32) + @staticmethod + def _create_last_dialogue_turns_mask( + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text + ) -> tf.Tensor: + # Since use_only_last_dialogue_turns is True, + # we need to find the locations of last dialogue turns in + # (combined batch dimension and dialogue length,) dimension, + # so that we can use `_sequence_lengths` as a boolean mask to pick + # which ones are "real" textual input in these last dialogue turns. + + # In order to do that we can use given `dialogue_lengths`. + # For example: + # If we have `dialogue_lengths = [2, 1, 3]`, than + # `dialogue_indices = [0, 1, 0, 0, 1, 2]` here we can spot that `0` + # always indicates the first dialogue turn, + # which means that previous dialogue turn is the last dialogue turn. + # Combining this with the fact that the last element in + # `dialogue_indices` is always the last dialogue turn, we can add + # a `0` to the end, getting + # `_dialogue_indices = [0, 1, 0, 0, 1, 2, 0]`. + # Then removing the first element + # `_last_dialogue_turn_inverse_indicator = [1, 0, 0, 1, 2, 0]` + # we see that `0` points to the last dialogue turn. + # We convert all positive numbers to `True` and take + # the inverse mask to get + # `last_dialogue_mask = [0, 1, 1, 0, 0, 1], + # which precisely corresponds to the fact that first dialogue is of + # length 2, the second 1 and the third 3. + last_dialogue_turn_mask = tf.math.logical_not( + tf.cast( + tf.concat( + [ + tf_batch_data[DIALOGUE][INDICES][0], + tf.zeros((1,), dtype=tf.int32), + ], + axis=0, + )[1:], + dtype=tf.bool, + ) + ) + # get only the indices of real inputs + return tf.boolean_mask( + last_dialogue_turn_mask, + tf.reshape(tf_batch_data[attribute][SEQUENCE_LENGTH][0], (-1,)), + ) def _encode_real_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> tf.Tensor: + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """Encodes features for a given attribute. Args: @@ -873,21 +1102,25 @@ def _encode_real_features_per_attribute( Returns: A tensor combining all features for `attribute` """ + # simulate None with empty tensor of zeros + text_transformer_output = tf.zeros((0,)) + text_sequence_lengths = tf.zeros((0,)) + if attribute in SEQUENCE_FEATURES_TO_ENCODE: # sequence_lengths contain `0` for "fake" features, while # tf_batch_data[attribute] contain only "real" features - _sequence_lengths = tf_batch_data[attribute][SEQUENCE_LENGTH][0] + sequence_lengths = tf_batch_data[attribute][SEQUENCE_LENGTH][0] # extract only nonzero lengths and cast to int - _sequence_lengths = tf.cast( - tf.boolean_mask(_sequence_lengths, _sequence_lengths), dtype=tf.int32 + sequence_lengths = tf.cast( + tf.boolean_mask(sequence_lengths, sequence_lengths), dtype=tf.int32 ) # boolean mask returns flat tensor - _sequence_lengths = tf.expand_dims(_sequence_lengths, axis=-1) + sequence_lengths = tf.expand_dims(sequence_lengths, axis=-1) mask_sequence_text = tf.squeeze( - self._compute_mask(_sequence_lengths), axis=1 + self._compute_mask(sequence_lengths), axis=1 ) - sequence_lengths = _sequence_lengths + 1 + sequence_lengths += 1 mask_text = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) attribute_features, _, _, _ = self._create_sequence( @@ -902,7 +1135,22 @@ def _encode_real_features_per_attribute( sequence_ids=False, ) - # TODO entities + if attribute == TEXT: + text_transformer_output = attribute_features + text_sequence_lengths = sequence_lengths + + if self.use_only_last_dialogue_turns: + # get the location of all last dialogue inputs + last_dialogue_turns_mask = self._create_last_dialogue_turns_mask( + tf_batch_data, attribute + ) + # pick last vector if max history featurizer is used + text_transformer_output = tf.boolean_mask( + text_transformer_output, last_dialogue_turns_mask + ) + text_sequence_lengths = tf.boolean_mask( + text_sequence_lengths, last_dialogue_turns_mask + ) # resulting attribute features will have shape # combined batch dimension and dialogue length x 1 x units @@ -917,43 +1165,34 @@ def _encode_real_features_per_attribute( # resulting attribute features will have shape # combined batch dimension and dialogue length x 1 x units attribute_features = self._combine_sparse_dense_features( - tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}", + tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}" ) - if attribute in set(SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE): - attribute_features = self._tf_layers[f"ffnn.{attribute}"]( + if attribute in SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: + attribute_features = self._tf_layers[f"encoding_layer.{attribute}"]( attribute_features ) - # attribute_mask has shape batch x dialogue_len x 1 - attribute_mask = tf_batch_data[attribute][MASK][0] - - if attribute in set(SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES): - dialogue_lengths = tf.cast( - tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 - ) - else: - # for labels, dialogue length is a fake dim and equal to 1 - dialogue_lengths = tf.ones((tf.shape(attribute_mask)[0],), dtype=tf.int32) - # attribute features have shape # (combined batch dimension and dialogue length x 1 x units) # convert them back to their original shape of # batch size x dialogue length x units - return self._convert_to_original_shape( - attribute_features, attribute_mask, dialogue_lengths + attribute_features = self._convert_to_original_shape( + attribute_features, tf_batch_data, attribute ) + return attribute_features, text_transformer_output, text_sequence_lengths + @staticmethod def _convert_to_original_shape( attribute_features: tf.Tensor, - attribute_mask: tf.Tensor, - dialogue_lengths: tf.Tensor, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + attribute: Text, ) -> tf.Tensor: """Transform attribute features back to original shape. - Given shape: combined batch and dialogue dimension x 1 x units - Original shape: batch x dialogue length x units + Given shape: (combined batch and dialogue dimension x 1 x units) + Original shape: (batch x dialogue length x units) Args: attribute_features: the "real" features to convert @@ -967,12 +1206,25 @@ def _convert_to_original_shape( """ # in order to convert the attribute features with shape - # combined batch-size and dialogue length x 1 x units - # to a shape of batch-size x dialogue length x units - # we use tf.scatter_nd. Therefore, we need to the target shape and the indices + # (combined batch-size and dialogue length x 1 x units) + # to a shape of (batch-size x dialogue length x units) + # we use tf.scatter_nd. Therefore, we need the target shape and the indices # mapping the values of attribute features to the position in the resulting # tensor. + # attribute_mask has shape batch x dialogue_len x 1 + attribute_mask = tf_batch_data[attribute][MASK][0] + + if attribute in SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES: + dialogue_lengths = tf.cast( + tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 + ) + dialogue_indices = tf_batch_data[DIALOGUE][INDICES][0] + else: + # for labels, dialogue length is a fake dim and equal to 1 + dialogue_lengths = tf.ones((tf.shape(attribute_mask)[0],), dtype=tf.int32) + dialogue_indices = tf.zeros((tf.shape(attribute_mask)[0],), dtype=tf.int32) + batch_dim = tf.shape(attribute_mask)[0] dialogue_dim = tf.shape(attribute_mask)[1] units = attribute_features.shape[-1] @@ -981,17 +1233,9 @@ def _convert_to_original_shape( attribute_mask = tf.cast(tf.squeeze(attribute_mask, axis=-1), dtype=tf.int32) # sum of attribute mask contains number of dialogue turns with "real" features non_fake_dialogue_lengths = tf.reduce_sum(attribute_mask, axis=-1) - + # create the batch indices batch_indices = tf.repeat(tf.range(batch_dim), non_fake_dialogue_lengths) - dialogue_indices = ( - tf.map_fn( - tf.range, - dialogue_lengths, - fn_output_signature=tf.RaggedTensorSpec(shape=[None], dtype=tf.int32), - ) - ).values - # attribute_mask has shape (batch x dialogue_len x 1), while # dialogue_indices has shape (combined_dialogue_len,) # in order to find positions of real input we need to flatten @@ -1005,12 +1249,13 @@ def _convert_to_original_shape( indices = tf.stack([batch_indices, dialogue_indices], axis=1) shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) + attribute_features = tf.squeeze(attribute_features, axis=1) - return tf.scatter_nd(indices, tf.squeeze(attribute_features, axis=1), shape) + return tf.scatter_nd(indices, attribute_features, shape) def _process_batch_data( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] - ) -> tf.Tensor: + ) -> Tuple[tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]: """Encodes batch data. Combines intent and text and action name and action text if both are present. @@ -1022,11 +1267,22 @@ def _process_batch_data( Tensor: encoding of all features in the batch, combined; """ # encode each attribute present in tf_batch_data - batch_encoded = { - key: self._encode_features_per_attribute(tf_batch_data, key) - for key in tf_batch_data.keys() - if LABEL_KEY not in key and DIALOGUE not in key - } + text_transformer_output = None + text_sequence_lengths = None + batch_encoded = {} + for attribute in tf_batch_data.keys(): + if attribute in SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES: + ( + attribute_features, + _text_transformer_output, + _text_sequence_lengths, + ) = self._encode_features_per_attribute(tf_batch_data, attribute) + + batch_encoded[attribute] = attribute_features + if attribute == TEXT: + text_transformer_output = _text_transformer_output + text_sequence_lengths = _text_sequence_lengths + # if both action text and action name are present, combine them; otherwise, # return the one which is present @@ -1060,7 +1316,131 @@ def _process_batch_data( batch_features = tf.concat(batch_features, axis=-1) - return batch_features + return batch_features, text_transformer_output, text_sequence_lengths + + def _reshape_for_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + # To calculate the loss for entities we need the output of the text + # sequence transformer (shape: real entity dim x + # sequence length x units), the output of the dialogue transformer + # (shape: batch size x dialogue length x units) and the tag ids for the + # entities (shape: real entity dim x sequence length - 1 x units) + # The real entity dimension for the text sequence transformer + # and the tag ids matches. + # In order to process the tensors, they need to have the same shape. + # Convert the output of the dialogue transformer to shape + # (real entity dim x 1 x units). + # Note: The CRF layer cannot handle 4D tensors. E.g. we cannot use the shape + # batch size x dialogue length x sequence length x units + + # convert the output of the dialogue transformer + # to shape (real entity dim x 1 x units) + attribute_mask = tf_batch_data[TEXT][MASK][0] + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + + if self.use_only_last_dialogue_turns: + # pick last vector if max history featurizer is used + attribute_mask = tf.expand_dims( + self._last_token(attribute_mask, dialogue_lengths), axis=1 + ) + dialogue_transformer_output = tf.boolean_mask( + dialogue_transformer_output, tf.squeeze(attribute_mask, axis=-1) + ) + + # boolean mask removed axis=1, add it back + dialogue_transformer_output = tf.expand_dims( + dialogue_transformer_output, axis=1 + ) + + # broadcast the dialogue transformer output sequence-length-times to get the + # same shape as the text sequence transformer output + dialogue_transformer_output = tf.broadcast_to( + dialogue_transformer_output, tf.shape(text_transformer_output) + ) + + # concat the output of the dialogue transformer to the output of the text + # sequence transformer (adding context) + # resulting shape + # (real entity dim x sequence length x 2 units) + text_transformed = tf.concat( + [text_transformer_output, dialogue_transformer_output], axis=-1 + ) + + text_mask = tf.squeeze(self._compute_mask(text_sequence_lengths), axis=1) + # add zeros to match the shape of text_transformed, because + # max sequence length might differ, since it is calculated dynamically + # based on a subset of sequence lengths + sequence_diff = tf.shape(text_transformed)[1] - tf.shape(text_mask)[1] + text_mask = tf.pad(text_mask, [[0, 0], [0, sequence_diff], [0, 0]]) + + # remove additional dims and sentence features + text_sequence_lengths = tf.reshape(text_sequence_lengths, (-1,)) - 1 + + return text_transformed, text_mask, text_sequence_lengths + + # ---TRAINING--- + + def _batch_loss_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> tf.Tensor: + # It could happen that some batches don't contain "real" features for `text`, + # e.g. large number of stories are intent only. + # Therefore actual `text_transformer_output` will be empty. + # We cannot create a loss with empty tensors. + # Since we need actual numbers to create a full loss, we output + # zero in this case. + return tf.cond( + tf.shape(text_transformer_output)[0] > 0, + lambda: self._real_batch_loss_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ), + lambda: tf.constant(0.0), + ) + + def _real_batch_loss_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> tf.Tensor: + + text_transformed, text_mask, text_sequence_lengths = self._reshape_for_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ) + + tag_ids = tf_batch_data[ENTITY_TAGS][IDS][0] + # add a zero (no entity) for the sentence features to match the shape of inputs + sequence_diff = tf.shape(text_transformed)[1] - tf.shape(tag_ids)[1] + tag_ids = tf.pad(tag_ids, [[0, 0], [0, sequence_diff], [0, 0]]) + + loss, f1, _ = self._calculate_entity_loss( + text_transformed, + tag_ids, + text_mask, + text_sequence_lengths, + ENTITY_ATTRIBUTE_TYPE, + ) + + self.entity_loss.update_state(loss) + self.entity_f1.update_state(f1) + + return loss @staticmethod def _get_labels_embed( @@ -1086,18 +1466,27 @@ def batch_loss( The loss of the given batch. """ tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) + self._compute_dialogue_indices(tf_batch_data) all_label_ids, all_labels_embed = self._create_all_labels_embed() label_ids = tf_batch_data[LABEL_KEY][LABEL_SUB_KEY][0] labels_embed = self._get_labels_embed(label_ids, all_labels_embed) - dialogue_in = self._process_batch_data(tf_batch_data) - dialogue_embed, dialogue_mask = self._emebed_dialogue( - dialogue_in, tf_batch_data - ) + ( + dialogue_in, + text_transformer_output, + text_sequence_lengths, + ) = self._process_batch_data(tf_batch_data) + ( + dialogue_embed, + dialogue_mask, + dialogue_transformer_output, + ) = self._embed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) + losses = [] + loss, acc = self._tf_layers[f"loss.{LABEL}"]( dialogue_embed, labels_embed, @@ -1106,11 +1495,28 @@ def batch_loss( all_label_ids, dialogue_mask, ) + losses.append(loss) + + if ( + self.config[ENTITY_RECOGNITION] + and text_transformer_output is not None + and text_sequence_lengths is not None + ): + losses.append( + self._batch_loss_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ) + ) self.action_loss.update_state(loss) self.action_acc.update_state(acc) - return loss + return tf.math.add_n(losses) + + # ---PREDICTION--- def prepare_for_predict(self) -> None: _, self.all_labels_embed = self._create_all_labels_embed() @@ -1135,11 +1541,18 @@ def batch_predict( tf_batch_data = self.batch_to_model_data_format( batch_in, self.predict_data_signature ) - - dialogue_in = self._process_batch_data(tf_batch_data) - dialogue_embed, dialogue_mask = self._emebed_dialogue( - dialogue_in, tf_batch_data - ) + self._compute_dialogue_indices(tf_batch_data) + + ( + dialogue_in, + text_transformer_output, + text_sequence_lengths, + ) = self._process_batch_data(tf_batch_data) + ( + dialogue_embed, + dialogue_mask, + dialogue_transformer_output, + ) = self._embed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) sim_all = self._tf_layers[f"loss.{LABEL}"].sim( @@ -1151,8 +1564,73 @@ def batch_predict( scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim( sim_all, self.config[SIMILARITY_TYPE] ) + predictions = {"action_scores": scores, "similarities": sim_all} + + if ( + self.config[ENTITY_RECOGNITION] + and text_transformer_output is not None + and text_sequence_lengths is not None + ): + pred_ids, confidences = self._batch_predict_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ) + name = ENTITY_ATTRIBUTE_TYPE + predictions[f"e_{name}_ids"] = pred_ids + predictions[f"e_{name}_scores"] = confidences + + return predictions + + def _batch_predict_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> Tuple[tf.Tensor, tf.Tensor]: + # It could happen that current prediction turn don't contain + # "real" features for `text`, + # Therefore actual `text_transformer_output` will be empty. + # We cannot predict entities with empty tensors. + # Since we need to output some tensors of the same shape, we output + # zero tensors. + return tf.cond( + tf.shape(text_transformer_output)[0] > 0, + lambda: self._real_batch_predict_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ), + lambda: ( + # the output is of shape (batch_size, max_seq_len) + tf.zeros(tf.shape(text_transformer_output)[:2], dtype=tf.int32), + tf.zeros(tf.shape(text_transformer_output)[:2], dtype=tf.float32), + ), + ) + + def _real_batch_predict_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> Tuple[tf.Tensor, tf.Tensor]: + + text_transformed, _, text_sequence_lengths = self._reshape_for_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ) + + name = ENTITY_ATTRIBUTE_TYPE + + _logits = self._tf_layers[f"embed.{name}.logits"](text_transformed) - return {"action_scores": scores, "similarities": sim_all} + return self._tf_layers[f"crf.{name}"](_logits, text_sequence_lengths) # pytype: enable=key-error diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 09e6ea59b653..eb9268c02887 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -44,6 +44,7 @@ from rasa.nlu.model import Metadata from rasa.utils.tensorflow.constants import ( LABEL, + IDS, HIDDEN_LAYERS_SIZES, SHARE_HIDDEN_LAYERS, TRANSFORMER_SIZE, @@ -101,8 +102,7 @@ SPARSE = "sparse" DENSE = "dense" LABEL_KEY = LABEL -LABEL_SUB_KEY = "ids" -TAG_IDS = "tag_ids" +LABEL_SUB_KEY = IDS POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP] @@ -1309,22 +1309,6 @@ def _prepare_label_classification_layers(self) -> None: self._prepare_dot_product_loss(LABEL, self.config[SCALE_LOSS]) - def _prepare_entity_recognition_layers(self) -> None: - for tag_spec in self._entity_tag_specs: - name = tag_spec.tag_name - num_tags = tag_spec.num_tags - self._tf_layers[f"embed.{name}.logits"] = layers.Embed( - num_tags, self.config[REGULARIZATION_CONSTANT], f"logits.{name}" - ) - self._tf_layers[f"crf.{name}"] = layers.CRF( - num_tags, self.config[REGULARIZATION_CONSTANT], self.config[SCALE_LOSS] - ) - self._tf_layers[f"embed.{name}.tags"] = layers.Embed( - self.config[EMBEDDING_DIMENSION], - self.config[REGULARIZATION_CONSTANT], - f"tags.{name}", - ) - def _create_bow( self, sequence_features: List[Union[tf.Tensor, tf.SparseTensor]], @@ -1406,33 +1390,6 @@ def _calculate_label_loss( text_embed, label_embed, label_ids, all_labels_embed, all_label_ids ) - def _calculate_entity_loss( - self, - inputs: tf.Tensor, - tag_ids: tf.Tensor, - mask: tf.Tensor, - sequence_lengths: tf.Tensor, - tag_name: Text, - entity_tags: Optional[tf.Tensor] = None, - ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: - - tag_ids = tf.cast(tag_ids[:, :, 0], tf.int32) - - if entity_tags is not None: - _tags = self._tf_layers[f"embed.{tag_name}.tags"](entity_tags) - inputs = tf.concat([inputs, _tags], axis=-1) - - logits = self._tf_layers[f"embed.{tag_name}.logits"](inputs) - - # should call first to build weights - pred_ids, _ = self._tf_layers[f"crf.{tag_name}"](logits, sequence_lengths) - loss = self._tf_layers[f"crf.{tag_name}"].loss( - logits, tag_ids, sequence_lengths - ) - f1 = self._tf_layers[f"crf.{tag_name}"].f1_score(tag_ids, pred_ids, mask) - - return loss, f1, logits - def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py index c507e5f2c74a..47c1c7744362 100644 --- a/rasa/shared/core/trackers.py +++ b/rasa/shared/core/trackers.py @@ -231,7 +231,7 @@ def _events_for_verbosity( @staticmethod def freeze_current_state(state: State) -> FrozenState: - frozen_state = frozenset( + return frozenset( { key: frozenset(values.items()) if isinstance(values, Dict) @@ -239,7 +239,6 @@ def freeze_current_state(state: State) -> FrozenState: for key, values in state.items() }.items() ) - return frozen_state def past_states(self, domain: Domain) -> List[State]: """Generate the past states of this tracker based on the history. diff --git a/rasa/shared/nlu/constants.py b/rasa/shared/nlu/constants.py index ee85a005f935..53040f0d4c53 100644 --- a/rasa/shared/nlu/constants.py +++ b/rasa/shared/nlu/constants.py @@ -26,6 +26,7 @@ TRAINABLE_EXTRACTORS = {"MitieEntityExtractor", "CRFEntityExtractor", "DIETClassifier"} ENTITIES = "entities" +ENTITY_TAGS = "entity_tags" ENTITY_ATTRIBUTE_TYPE = "entity" ENTITY_ATTRIBUTE_GROUP = "group" ENTITY_ATTRIBUTE_ROLE = "role" diff --git a/rasa/shared/nlu/training_data/features.py b/rasa/shared/nlu/training_data/features.py index c556d6e6c3ff..755215fae35e 100644 --- a/rasa/shared/nlu/training_data/features.py +++ b/rasa/shared/nlu/training_data/features.py @@ -16,21 +16,11 @@ def __init__( attribute: Text, origin: Union[Text, List[Text]], ) -> None: - self._validate_feature_type(feature_type) - self.features = features self.type = feature_type self.origin = origin self.attribute = attribute - @staticmethod - def _validate_feature_type(feature_type: Text) -> None: - if feature_type not in VALID_FEATURE_TYPES: - raise ValueError( - f"Invalid feature type '{feature_type}' used. Valid feature types are: " - f"{VALID_FEATURE_TYPES}." - ) - def is_sparse(self) -> bool: """Checks if features are sparse or not. diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 06f81775a673..7957e84f8351 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -1,6 +1,7 @@ # constants for configuration parameters of our tensorflow models LABEL = "label" +IDS = "ids" HIDDEN_LAYERS_SIZES = "hidden_layers_sizes" SHARE_HIDDEN_LAYERS = "share_hidden_layers" diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 1d64b1b26cb3..a9017094e945 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -136,7 +136,7 @@ def call(self, inputs: tf.SparseTensor) -> tf.Tensor: if len(inputs.shape) == 3: # reshape back outputs = tf.reshape( - outputs, (tf.shape(inputs)[0], tf.shape(inputs)[1], -1) + outputs, (tf.shape(inputs)[0], tf.shape(inputs)[1], self.units) ) if self.use_bias: @@ -630,13 +630,15 @@ def body(idx: tf.Tensor, out: tf.Tensor) -> List[tf.Tensor]: # create first random array of indices out1 = rand_idxs() # (1, num_neg) - return tf.while_loop( - cond, - body, - loop_vars=[idx1, out1], - shape_invariants=[idx1.shape, tf.TensorShape([None, self.num_neg])], - parallel_iterations=self.parallel_iterations, - back_prop=False, + return tf.nest.map_structure( + tf.stop_gradient, + tf.while_loop( + cond, + body, + loop_vars=[idx1, out1], + shape_invariants=[idx1.shape, tf.TensorShape([None, self.num_neg])], + parallel_iterations=self.parallel_iterations, + ), )[1] @staticmethod diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index 46cf8fd5bd66..4f5dcab1c010 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -124,7 +124,7 @@ def __setstate__(self, state, **kwargs): def _validate_number_of_dimensions( number_of_dimensions: int, input_array: np.ndarray ) -> None: - """Validates if the given number of dimensions maps the with the dimensions of the input array. + """Validates if the the input array has given number of dimensions. Args: number_of_dimensions: number of dimensions @@ -140,6 +140,10 @@ def _validate_number_of_dimensions( if isinstance(_sub_array, scipy.sparse.spmatrix): dim = i break + if isinstance(_sub_array, np.ndarray) and _sub_array.shape[0] == 0: + # sequence dimension is 0, we are dealing with "fake" features + dim = i + break # If the resulting sub_array is sparse, the remaining number of dimensions # should be at least 2 @@ -147,7 +151,15 @@ def _validate_number_of_dimensions( if dim > 2: raise ValueError( f"Given number of dimensions '{number_of_dimensions}' does not " - f"match dimensiona of given input array: {input_array}." + f"match dimensions of given input array: {input_array}." + ) + elif isinstance(_sub_array, np.ndarray) and _sub_array.shape[0] == 0: + # sequence dimension is 0, we are dealing with "fake" features, + # but they should be of dim 2 + if dim > 2: + raise ValueError( + f"Given number of dimensions '{number_of_dimensions}' does not " + f"match dimensions of given input array: {input_array}." ) # If the resulting sub_array is dense, the sub_array should be a single number elif not np.issubdtype(type(_sub_array), np.integer) and not isinstance( @@ -1122,7 +1134,7 @@ def _pad_4d_dense_data(array_of_array_of_dense: FeatureArray) -> np.ndarray: ) data_padded = np.zeros( - [combined_dialogue_len, max_seq_len, number_of_features,], + [combined_dialogue_len, max_seq_len, number_of_features], dtype=array_of_array_of_dense[0][0].dtype, ) @@ -1225,7 +1237,7 @@ def _4d_scipy_matrix_to_values( indices = np.hstack( [ np.vstack( - [sum(dialogue_len[:i]) + j * np.ones_like(x.row), x.row, x.col,] + [sum(dialogue_len[:i]) + j * np.ones_like(x.row), x.row, x.col] ) for i, array_of_sparse in enumerate(array_of_array_of_sparse) for j, x in enumerate(array_of_sparse) diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index ca3d000b7310..1f556d22914d 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -166,7 +166,7 @@ def _filter_features(features: Optional[List["Features"]], featurizers: List[Tex return [f for f in features if f.origin in featurizers] -def _create_zero_features( +def _create_fake_features( all_features: List[List[List["Features"]]], ) -> List["Features"]: """Computes default feature values. @@ -191,8 +191,8 @@ def _create_zero_features( ) ) - # create zero_features for Nones - zero_features = [] + # create fake_features for Nones + fake_features = [] for _features in example_features: new_features = copy.deepcopy(_features) if _features.is_dense(): @@ -203,16 +203,16 @@ def _create_zero_features( new_features.features = scipy.sparse.coo_matrix( (0, _features.features.shape[-1]), _features.features.dtype ) - zero_features.append(new_features) + fake_features.append(new_features) - return zero_features + return fake_features def convert_to_data_format( features: Union[ List[List[Dict[Text, List["Features"]]]], List[Dict[Text, List["Features"]]] ], - zero_features: Optional[Dict[Text, List["Features"]]] = None, + fake_features: Optional[Dict[Text, List["Features"]]] = None, consider_dialogue_dimension: bool = True, featurizers: Optional[List[Text]] = None, ) -> Tuple[Data, Optional[Dict[Text, List["Features"]]]]: @@ -228,7 +228,7 @@ def convert_to_data_format( Args: features: a dictionary of attributes to a list of features for all examples in the training data - zero_features: Contains default feature values for attributes + fake_features: Contains default feature values for attributes consider_dialogue_dimension: If set to false the dialogue dimension will be removed from the resulting sequence features. featurizers: the featurizers to consider @@ -237,9 +237,9 @@ def convert_to_data_format( Input in "Data" format and zero features """ training = False - if not zero_features: + if not fake_features: training = True - zero_features = defaultdict(list) + fake_features = defaultdict(list) # unify format of incoming features if isinstance(features[0], Dict): @@ -254,7 +254,7 @@ def convert_to_data_format( if training: attributes = list(attribute_to_features.keys()) else: - attributes = list(zero_features.keys()) + attributes = list(fake_features.keys()) # In case an attribute is not present during prediction, replace it with # None values that will then be replaced by zero features @@ -271,14 +271,14 @@ def convert_to_data_format( empty_features, attribute_to_features, training, - zero_features, + fake_features, consider_dialogue_dimension, ) # ensure that all attributes are in the same order attribute_data = OrderedDict(sorted(attribute_data.items())) - return attribute_data, zero_features + return attribute_data, fake_features def _features_for_attribute( @@ -286,7 +286,7 @@ def _features_for_attribute( empty_features: List[Any], attribute_to_features: Dict[Text, List[List[List["Features"]]]], training: bool, - zero_features: Dict[Text, List["Features"]], + fake_features: Dict[Text, List["Features"]], consider_dialogue_dimension: bool, ) -> Dict[Text, List[FeatureArray]]: """Create the features for the given attribute from the all examples features. @@ -296,9 +296,9 @@ def _features_for_attribute( empty_features: empty features attribute_to_features: features for every example training: boolean indicating whether we are currently in training or not - zero_features: zero features - consider_dialogue_dimension: If set to false the dialogue dimension will be removed from the resulting sequence - features. + fake_features: zero features + consider_dialogue_dimension: If set to false the dialogue dimension will be + removed from the resulting sequence features. Returns: A dictionary of feature type to actual features for the given attribute. @@ -312,10 +312,10 @@ def _features_for_attribute( # in case some features for a specific attribute are # missing, replace them with a feature vector of zeros if training: - zero_features[attribute] = _create_zero_features(features) + fake_features[attribute] = _create_fake_features(features) (attribute_masks, _dense_features, _sparse_features) = _extract_features( - features, zero_features[attribute], attribute + features, fake_features[attribute], attribute ) sparse_features = {} @@ -363,7 +363,7 @@ def _features_for_attribute( def _extract_features( features: List[List[List["Features"]]], - zero_features: List["Features"], + fake_features: List["Features"], attribute: Text, ) -> Tuple[ List[np.ndarray], @@ -375,7 +375,7 @@ def _extract_features( Args: features: all features - zero_features: list of zero features + fake_features: list of zero features Returns: - a list of attribute masks @@ -399,7 +399,7 @@ def _extract_features( if list_of_features is None: # use zero features and set mask to zero attribute_mask[i] = 0 - list_of_features = zero_features + list_of_features = fake_features for features in list_of_features: # in case of ENTITIES, if the attribute type matches either 'entity', diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 3708f4a8a99b..50e4903814c0 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -54,6 +54,7 @@ DENSE_DIMENSION, CONCAT_DIMENSION, DROP_RATE_ATTENTION, + SCALE_LOSS, ) from rasa.utils.tensorflow import layers from rasa.utils.tensorflow.transformer import TransformerEncoder @@ -731,14 +732,16 @@ def _prepare_ffnn_layer( def _prepare_transformer_layer( self, name: Text, + num_layers: int, + units: int, drop_rate: float, drop_rate_attention: float, prefix: Text = "transformer", ): if self.config[NUM_TRANSFORMER_LAYERS] > 0: self._tf_layers[f"{prefix}.{name}"] = TransformerEncoder( - self.config[NUM_TRANSFORMER_LAYERS], - self.config[TRANSFORMER_SIZE], + num_layers, + units, self.config[NUM_HEADS], self.config[TRANSFORMER_SIZE] * 4, self.config[REGULARIZATION_CONSTANT], @@ -800,7 +803,10 @@ def _prepare_sparse_dense_layers( if not dense: # create dense labels for the input to use in negative sampling self._tf_layers[f"sparse_to_dense_ids.{name}"] = layers.DenseForSparse( - units=2, trainable=False, name=f"sparse_to_dense_ids.{name}" + units=2, + use_bias=False, + trainable=False, + name=f"sparse_to_dense_ids.{name}", ) def _prepare_input_layers(self, name: Text) -> None: @@ -833,9 +839,29 @@ def _prepare_input_layers(self, name: Text) -> None: def _prepare_sequence_layers(self, name: Text) -> None: self._prepare_input_layers(name) self._prepare_transformer_layer( - name, self.config[DROP_RATE], self.config[DROP_RATE_ATTENTION] + name, + self.config[NUM_TRANSFORMER_LAYERS], + self.config[TRANSFORMER_SIZE], + self.config[DROP_RATE], + self.config[DROP_RATE_ATTENTION], ) + def _prepare_entity_recognition_layers(self) -> None: + for tag_spec in self._entity_tag_specs: + name = tag_spec.tag_name + num_tags = tag_spec.num_tags + self._tf_layers[f"embed.{name}.logits"] = layers.Embed( + num_tags, self.config[REGULARIZATION_CONSTANT], f"logits.{name}" + ) + self._tf_layers[f"crf.{name}"] = layers.CRF( + num_tags, self.config[REGULARIZATION_CONSTANT], self.config[SCALE_LOSS] + ) + self._tf_layers[f"embed.{name}.tags"] = layers.Embed( + self.config[EMBEDDING_DIMENSION], + self.config[REGULARIZATION_CONSTANT], + f"tags.{name}", + ) + def _combine_sparse_dense_features( self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], @@ -948,6 +974,7 @@ def _features_as_seq_ids( self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], name: Text ) -> Optional[tf.Tensor]: """Creates dense labels for negative sampling.""" + # if there are dense features - we can use them for f in features: if not isinstance(f, tf.SparseTensor): @@ -1064,6 +1091,33 @@ def _get_batch_dim(attribute_data: Dict[Text, List[tf.Tensor]]) -> int: return tf.shape(attribute_data[SENTENCE][0])[0] + def _calculate_entity_loss( + self, + inputs: tf.Tensor, + tag_ids: tf.Tensor, + mask: tf.Tensor, + sequence_lengths: tf.Tensor, + tag_name: Text, + entity_tags: Optional[tf.Tensor] = None, + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + + tag_ids = tf.cast(tag_ids[:, :, 0], tf.int32) + + if entity_tags is not None: + _tags = self._tf_layers[f"embed.{tag_name}.tags"](entity_tags) + inputs = tf.concat([inputs, _tags], axis=-1) + + logits = self._tf_layers[f"embed.{tag_name}.logits"](inputs) + + # should call first to build weights + pred_ids, _ = self._tf_layers[f"crf.{tag_name}"](logits, sequence_lengths) + loss = self._tf_layers[f"crf.{tag_name}"].loss( + logits, tag_ids, sequence_lengths + ) + f1 = self._tf_layers[f"crf.{tag_name}"].f1_score(tag_ids, pred_ids, mask) + + return loss, f1, logits + def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: diff --git a/tests/core/featurizers/test_single_state_featurizers.py b/tests/core/featurizers/test_single_state_featurizers.py index 7ff3502b13c2..2f5819e8e659 100644 --- a/tests/core/featurizers/test_single_state_featurizers.py +++ b/tests/core/featurizers/test_single_state_featurizers.py @@ -15,6 +15,11 @@ INTENT, FEATURE_TYPE_SEQUENCE, FEATURE_TYPE_SENTENCE, + ENTITY_ATTRIBUTE_TYPE, + ENTITY_ATTRIBUTE_VALUE, + ENTITY_ATTRIBUTE_START, + ENTITY_ATTRIBUTE_END, + ENTITY_TAGS, ) from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS from rasa.shared.nlu.interpreter import RegexInterpreter @@ -182,38 +187,41 @@ def test_single_state_featurizer_with_entity_roles_and_groups( from rasa.core.agent import Agent interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter - + # TODO roles and groups are not supported in e2e yet + domain = Domain( + intents=[], + entities=["city", f"city{ENTITY_LABEL_SEPARATOR}to"], + slots=[], + templates={}, + forms={}, + action_names=[], + ) f = SingleStateFeaturizer() - f._default_feature_states[INTENT] = {"a": 0, "b": 1} - f._default_feature_states[ENTITIES] = { - "c": 0, - "d": 1, - f"d{ENTITY_LABEL_SEPARATOR}e": 2, - } - f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1, "action_listen": 2} - f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2} - f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3} - encoded = f.encode_state( + f.prepare_from_domain(domain) + encoded = f.encode_entities( { - "user": { - "text": "a ball", - "intent": "b", - "entities": ["c", f"d{ENTITY_LABEL_SEPARATOR}e"], - }, - "prev_action": { - "action_name": "action_listen", - "action_text": "throw a ball", - }, - "active_loop": {"name": "k"}, - "slots": {"e": (1.0,)}, + TEXT: "I am flying from London to Paris", + ENTITIES: [ + { + ENTITY_ATTRIBUTE_TYPE: "city", + ENTITY_ATTRIBUTE_VALUE: "London", + ENTITY_ATTRIBUTE_START: 17, + ENTITY_ATTRIBUTE_END: 23, + }, + { + ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to", + ENTITY_ATTRIBUTE_VALUE: "Paris", + ENTITY_ATTRIBUTE_START: 27, + ENTITY_ATTRIBUTE_END: 32, + }, + ], }, interpreter=interpreter, ) - # check all the features are encoded and *_text features are encoded by a densefeaturizer - assert sorted(list(encoded.keys())) == sorted( - [TEXT, ENTITIES, ACTION_NAME, SLOTS, ACTIVE_LOOP, INTENT, ACTION_TEXT] + assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS]) + assert np.all( + encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [0], [2]] ) - assert np.all(encoded[ENTITIES][0].features.toarray() == [1, 0, 1]) def test_single_state_featurizer_uses_dtype_float(): @@ -241,21 +249,39 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen( interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter f = SingleStateFeaturizer() - f._default_feature_states[INTENT] = {"a": 0, "b": 1} - f._default_feature_states[ENTITIES] = {"c": 0} - f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1, "action_listen": 2} - f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2} - f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3} - + f._default_feature_states[INTENT] = {"greet": 0, "inform": 1} + f._default_feature_states[ENTITIES] = { + "city": 0, + "name": 1, + f"city{ENTITY_LABEL_SEPARATOR}to": 2, + f"city{ENTITY_LABEL_SEPARATOR}from": 3, + } + f._default_feature_states[ACTION_NAME] = { + "utter_ask_where_to": 0, + "utter_greet": 1, + "action_listen": 2, + } + # `_0` in slots represent feature dimension + f._default_feature_states[SLOTS] = {"slot_1_0": 0, "slot_2_0": 1, "slot_3_0": 2} + f._default_feature_states[ACTIVE_LOOP] = { + "active_loop_1": 0, + "active_loop_2": 1, + "active_loop_3": 2, + "active_loop_4": 3, + } encoded = f.encode_state( { - "user": {"text": "a ball", "intent": "b", "entities": ["c"]}, + "user": { + "text": "I am flying from London to Paris", + "intent": "inform", + "entities": ["city", f"city{ENTITY_LABEL_SEPARATOR}to"], + }, "prev_action": { "action_name": "action_listen", "action_text": "throw a ball", }, - "active_loop": {"name": "k"}, - "slots": {"e": (1.0,)}, + "active_loop": {"name": "active_loop_4"}, + "slots": {"slot_1": (1.0,)}, }, interpreter=interpreter, ) @@ -271,7 +297,7 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen( assert ( encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix([[0, 0, 1]]) ).nnz == 0 - assert encoded[ENTITIES][0].features.shape[-1] == 1 + assert encoded[ENTITIES][0].features.shape[-1] == 4 assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]])).nnz == 0 assert ( encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]]) diff --git a/tests/core/featurizers/test_tracker_featurizer.py b/tests/core/featurizers/test_tracker_featurizer.py index 98f323bd3279..f6b904d8397b 100644 --- a/tests/core/featurizers/test_tracker_featurizer.py +++ b/tests/core/featurizers/test_tracker_featurizer.py @@ -67,7 +67,7 @@ def test_featurize_trackers_with_full_dialogue_tracker_featurizer( tracker = tracker_from_dialogue_file( "data/test_dialogues/moodbot.json", moodbot_domain ) - state_features, labels = tracker_featurizer.featurize_trackers( + state_features, labels, entity_tags = tracker_featurizer.featurize_trackers( [tracker], moodbot_domain, RegexInterpreter() ) @@ -75,6 +75,8 @@ def test_featurize_trackers_with_full_dialogue_tracker_featurizer( assert len(state_features) > 0 assert labels is not None assert len(labels) > 0 + # moodbot doesn't contain e2e entities + assert not any([any(turn_tags) for turn_tags in entity_tags]) def test_featurize_trackers_with_max_history_tracker_featurizer(moodbot_domain: Domain): @@ -84,7 +86,7 @@ def test_featurize_trackers_with_max_history_tracker_featurizer(moodbot_domain: tracker = tracker_from_dialogue_file( "data/test_dialogues/moodbot.json", moodbot_domain ) - state_features, labels = tracker_featurizer.featurize_trackers( + state_features, labels, entity_tags = tracker_featurizer.featurize_trackers( [tracker], moodbot_domain, RegexInterpreter() ) @@ -92,3 +94,5 @@ def test_featurize_trackers_with_max_history_tracker_featurizer(moodbot_domain: assert len(state_features) > 0 assert labels is not None assert len(labels) > 0 + # moodbot doesn't contain e2e entities + assert not any([any(turn_tags) for turn_tags in entity_tags]) diff --git a/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py b/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py index fa746263b082..4e48ea67d793 100644 --- a/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py +++ b/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py @@ -108,8 +108,6 @@ def test_yaml_writer_dumps_user_messages(): - story: default steps: - intent: greet - user: |- - Hello - action: utter_greet """ @@ -139,10 +137,10 @@ def test_yaml_writer_avoids_dumping_not_existing_user_messages(): @pytest.mark.parametrize( - "input_yaml_file", ["data/test_yaml_stories/rules_with_stories_sorted.yaml",], + "input_yaml_file", ["data/test_yaml_stories/rules_with_stories_sorted.yaml"] ) def test_yaml_writer_dumps_rules( - input_yaml_file: Text, tmpdir: Path, default_domain: Domain, + input_yaml_file: Text, tmpdir: Path, default_domain: Domain ): original_yaml_reader = YAMLStoryReader(default_domain, None, False) original_yaml_story_steps = original_yaml_reader.read_from_file(input_yaml_file) diff --git a/tests/test_test.py b/tests/test_test.py index b279fee01231..8bbc45bececb 100644 --- a/tests/test_test.py +++ b/tests/test_test.py @@ -197,8 +197,6 @@ def test_write_classification_errors(): - story: default steps: - intent: greet # predicted: goodbye: Hello - user: |- - Hello - action: utter_greet # predicted: utter_goodbye """ diff --git a/tests/utils/tensorflow/test_model_data_utils.py b/tests/utils/tensorflow/test_model_data_utils.py index f495222958df..2dab29353f3a 100644 --- a/tests/utils/tensorflow/test_model_data_utils.py +++ b/tests/utils/tensorflow/test_model_data_utils.py @@ -30,7 +30,7 @@ shape = 100 -def test_create_zero_features(): +def test_create_fake_features(): # DENSE FEATURES dense_feature_sentence_features = Features( features=np.random.rand(shape), @@ -40,10 +40,10 @@ def test_create_zero_features(): ) features = [[None, None, [dense_feature_sentence_features]]] - zero_features = model_data_utils._create_zero_features(features) - assert len(zero_features) == 1 - assert zero_features[0].is_dense() - assert zero_features[0].features.shape == (0, shape) + fake_features = model_data_utils._create_fake_features(features) + assert len(fake_features) == 1 + assert fake_features[0].is_dense() + assert fake_features[0].features.shape == (0, shape) # SPARSE FEATURES sparse_feature_sentence_features = Features( @@ -53,11 +53,11 @@ def test_create_zero_features(): origin=[], ) features = [[None, None, [sparse_feature_sentence_features]]] - zero_features = model_data_utils._create_zero_features(features) - assert len(zero_features) == 1 - assert zero_features[0].is_sparse() - assert zero_features[0].features.shape == (0, shape) - assert zero_features[0].features.nnz == 0 + fake_features = model_data_utils._create_fake_features(features) + assert len(fake_features) == 1 + assert fake_features[0].is_sparse() + assert fake_features[0].features.shape == (0, shape) + assert fake_features[0].features.nnz == 0 def test_surface_attributes(): @@ -142,18 +142,18 @@ def test_surface_attributes(): def test_extract_features(): - zero_features = np.zeros(shape) - zero_features_as_features = Features( - features=zero_features, attribute=INTENT, feature_type=SENTENCE, origin=[] + fake_features = np.zeros(shape) + fake_features_as_features = Features( + features=fake_features, attribute=INTENT, feature_type=SENTENCE, origin=[] ) # create zero features - zero_features_list = [zero_features_as_features] + fake_features_list = [fake_features_as_features] # create tracker state features by setting a random index in the array to 1 random_inds = np.random.randint(shape, size=6) list_of_features = [] for idx in random_inds: - current_features = copy.deepcopy(zero_features_as_features) + current_features = copy.deepcopy(fake_features_as_features) current_features.features[idx] = 1 list_of_features.append([current_features]) @@ -168,11 +168,11 @@ def test_extract_features(): attribute_masks, dense_features, sparse_features, - ) = model_data_utils._extract_features(tracker_features, zero_features_list, INTENT) + ) = model_data_utils._extract_features(tracker_features, fake_features_list, INTENT) expected_mask = np.array([[1, 0, 1], [0, 0, 1], [1, 1, 1]]) assert np.all(np.squeeze(np.array(attribute_masks), 2) == expected_mask) - assert np.array(dense_features[SENTENCE]).shape[-1] == zero_features.shape[-1] + assert np.array(dense_features[SENTENCE]).shape[-1] == fake_features.shape[-1] assert sparse_features == {}