From 8279a174cdd8644417db7f2caf7d3535c8712bbf Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Tue, 20 Oct 2020 14:58:51 +0200 Subject: [PATCH 01/62] add diet to ted --- examples/e2ebot/config.yml | 7 +- rasa/core/policies/rule_policy.py | 2 +- rasa/core/policies/ted_policy.py | 46 ++++++- rasa/nlu/classifiers/diet_classifier.py | 159 +---------------------- rasa/utils/tensorflow/constants.py | 1 + rasa/utils/tensorflow/model_data.py | 1 + rasa/utils/tensorflow/models.py | 165 +++++++++++++++++++++++- 7 files changed, 211 insertions(+), 170 deletions(-) diff --git a/examples/e2ebot/config.yml b/examples/e2ebot/config.yml index 0ce241f491f5..e2cbcd0cb5af 100644 --- a/examples/e2ebot/config.yml +++ b/examples/e2ebot/config.yml @@ -10,10 +10,7 @@ pipeline: min_ngram: 1 max_ngram: 4 - name: DIETClassifier - epochs: 100 + epochs: 1 policies: - name: TEDPolicy - epochs: 100 - batch_size: - - 32 - - 64 + epochs: 200 diff --git a/rasa/core/policies/rule_policy.py b/rasa/core/policies/rule_policy.py index 0da4a4a6d90f..face3ced59ec 100644 --- a/rasa/core/policies/rule_policy.py +++ b/rasa/core/policies/rule_policy.py @@ -770,7 +770,7 @@ def predict_action_probabilities( if default_action_name and not rules_action_name_from_text: return self._prediction_result(default_action_name, tracker, domain), False - # A loop has priority over any other rule. + # A loop has priority over any other rule except defaults. # The rules or any other prediction will be applied only if a loop was rejected. # If we are in a loop, and the loop didn't run previously or rejected, we can # simply force predict the loop. diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 4cb64a0dcc8f..a6f1c796fc89 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -71,9 +71,13 @@ ENCODING_DIMENSION, UNIDIRECTIONAL_ENCODER, SEQUENCE, + SEQUENCE_LENGTH, SENTENCE, DENSE_DIMENSION, E2E_CONFIDENCE_THRESHOLD, + SPARSE_INPUT_DROPOUT, + DENSE_INPUT_DROPOUT, + MASKED_LM, ) @@ -89,6 +93,7 @@ LENGTH = "length" POSSIBLE_FEATURE_TYPES = [SEQUENCE, SENTENCE] FEATURES_TO_ENCODE = [INTENT, TEXT, ACTION_NAME, ACTION_TEXT] +SEQUENCE_FEATURES_TO_ENCODE = [TEXT, ACTION_TEXT] LABEL_FEATURES_TO_ENCODE = [f"{LABEL}_{ACTION_NAME}", f"{LABEL}_{ACTION_TEXT}"] STATE_LEVEL_FEATURES = [ENTITIES, SLOTS, ACTIVE_LOOP] @@ -190,6 +195,13 @@ class TEDPolicy(Policy): DROP_RATE_ATTENTION: 0, # Sparsity of the weights in dense layers WEIGHT_SPARSITY: 0.8, + # If 'True' apply dropout to sparse input tensors + SPARSE_INPUT_DROPOUT: True, + # If 'True' apply dropout to dense input tensors + DENSE_INPUT_DROPOUT: True, + # If 'True' random tokens of the input message will be masked and the model + # should predict those tokens. + MASKED_LM: False, # ## Evaluation parameters # How often calculate validation accuracy. # Small values may hurt performance, e.g. model accuracy. @@ -320,6 +332,8 @@ def _create_model_data( model_data.add_lengths( DIALOGUE, LENGTH, next(iter(list(attribute_data.keys()))), MASK ) + model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE) + model_data.add_lengths(ACTION_TEXT, SEQUENCE_LENGTH, ACTION_TEXT, SEQUENCE) return model_data @@ -623,7 +637,10 @@ def _check_data(self) -> None: def _prepare_layers(self) -> None: for name in self.data_signature.keys(): self._prepare_sparse_dense_layer_for(name, self.data_signature) - self._prepare_encoding_layers(name) + if name in SEQUENCE_FEATURES_TO_ENCODE: + self._prepare_sequence_layers(name) + else: + self._prepare_encoding_layers(name) for name in self.label_signature.keys(): self._prepare_sparse_dense_layer_for(name, self.label_signature) @@ -756,11 +773,30 @@ def _encode_features_per_attribute( A tensor combining all features for `attribute` """ - if not tf_batch_data[attribute]: - return None - attribute_mask = tf_batch_data[attribute][MASK][0] - # TODO transformer has to be used to process sequence features + + if attribute in SEQUENCE_FEATURES_TO_ENCODE: + batch_dim = self._get_batch_dim(tf_batch_data) + mask_sequence_text = self._get_mask_for(tf_batch_data, TEXT, SEQUENCE_LENGTH) + sequence_lengths = self._get_sequence_lengths( + tf_batch_data, TEXT, SEQUENCE_LENGTH, batch_dim + ) + mask_text = self._compute_mask(sequence_lengths) + + attribute_features, _, _, _ = self._create_sequence( + tf_batch_data[TEXT][SEQUENCE], + tf_batch_data[TEXT][SENTENCE], + mask_sequence_text, + mask_text, + attribute, + sparse_dropout=self.config[SPARSE_INPUT_DROPOUT], + dense_dropout=self.config[DENSE_INPUT_DROPOUT], + masked_lm_loss=self.config[MASKED_LM], + sequence_ids=True, + ) + # TODO entities + return self._last_token(attribute_features, sequence_lengths) * attribute_mask + attribute_features = self._combine_sparse_dense_features( tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}", diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index f476d4917351..736de4d71eb0 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -7,7 +7,6 @@ import os import scipy.sparse import tensorflow as tf -import tensorflow_addons as tfa from typing import Any, Dict, List, Optional, Text, Tuple, Union, Type, NamedTuple @@ -90,6 +89,7 @@ FEATURIZERS, CHECKPOINT_MODEL, SEQUENCE, + SEQUENCE_LENGTH, SENTENCE, DENSE_DIMENSION, ) @@ -100,7 +100,6 @@ SPARSE = "sparse" DENSE = "dense" -SEQUENCE_LENGTH = f"{SEQUENCE}_lengths" LABEL_KEY = LABEL LABEL_SUB_KEY = "ids" TAG_IDS = "tag_ids" @@ -1282,39 +1281,6 @@ def _prepare_layers(self) -> None: if self.config[ENTITY_RECOGNITION]: self._prepare_entity_recognition_layers() - def _prepare_input_layers(self, name: Text) -> None: - self._prepare_ffnn_layer( - name, self.config[HIDDEN_LAYERS_SIZES][name], self.config[DROP_RATE] - ) - - for feature_type in [SENTENCE, SEQUENCE]: - if ( - name not in self.data_signature - or feature_type not in self.data_signature[name] - ): - continue - - self._prepare_sparse_dense_dropout_layers( - f"{name}_{feature_type}", self.config[DROP_RATE] - ) - self._prepare_sparse_dense_layers( - self.data_signature[name][feature_type], - f"{name}_{feature_type}", - self.config[DENSE_DIMENSION][name], - ) - self._prepare_ffnn_layer( - f"{name}_{feature_type}", - [self.config[CONCAT_DIMENSION][name]], - self.config[DROP_RATE], - prefix="concat_layer", - ) - - def _prepare_sequence_layers(self, name: Text) -> None: - self._prepare_input_layers(name) - self._prepare_transformer_layer( - name, self.config[DROP_RATE], self.config[DROP_RATE_ATTENTION] - ) - def _prepare_mask_lm_layers(self, name: Text) -> None: self._tf_layers[f"{name}_input_mask"] = layers.InputMask() @@ -1347,67 +1313,6 @@ def _prepare_entity_recognition_layers(self) -> None: f"tags.{name}", ) - def _features_as_seq_ids( - self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], name: Text - ) -> Optional[tf.Tensor]: - """Creates dense labels for negative sampling.""" - - # if there are dense features - we can use them - for f in features: - if not isinstance(f, tf.SparseTensor): - seq_ids = tf.stop_gradient(f) - # add a zero to the seq dimension for the sentence features - seq_ids = tf.pad(seq_ids, [[0, 0], [0, 1], [0, 0]]) - return seq_ids - - # use additional sparse to dense layer - for f in features: - if isinstance(f, tf.SparseTensor): - seq_ids = tf.stop_gradient( - self._tf_layers[f"sparse_to_dense_ids.{name}"](f) - ) - # add a zero to the seq dimension for the sentence features - seq_ids = tf.pad(seq_ids, [[0, 0], [0, 1], [0, 0]]) - return seq_ids - - return None - - def _combine_sequence_sentence_features( - self, - sequence_features: List[Union[tf.Tensor, tf.SparseTensor]], - sentence_features: List[Union[tf.Tensor, tf.SparseTensor]], - mask_sequence: tf.Tensor, - mask_text: tf.Tensor, - name: Text, - sparse_dropout: bool = False, - dense_dropout: bool = False, - ) -> tf.Tensor: - sequence_x = self._combine_sparse_dense_features( - sequence_features, - f"{name}_{SEQUENCE}", - mask_sequence, - sparse_dropout, - dense_dropout, - ) - sentence_x = self._combine_sparse_dense_features( - sentence_features, f"{name}_{SENTENCE}", None, sparse_dropout, dense_dropout - ) - - if sequence_x is not None and sentence_x is None: - return sequence_x - - if sequence_x is None and sentence_x is not None: - return sentence_x - - if sequence_x is not None and sentence_x is not None: - return self._concat_sequence_sentence_features( - sequence_x, sentence_x, name, mask_text - ) - - raise ValueError( - "No features are present. Please check your configuration file." - ) - def _concat_sequence_sentence_features( self, sequence_x: tf.Tensor, @@ -1464,52 +1369,6 @@ def _create_bow( x = tf.reduce_sum(x, axis=1) # convert to bag-of-words return self._tf_layers[f"ffnn.{name}"](x, self._training) - def _create_sequence( - self, - sequence_features: List[Union[tf.Tensor, tf.SparseTensor]], - sentence_features: List[Union[tf.Tensor, tf.SparseTensor]], - mask_sequence: tf.Tensor, - mask: tf.Tensor, - name: Text, - sparse_dropout: bool = False, - dense_dropout: bool = False, - masked_lm_loss: bool = False, - sequence_ids: bool = False, - ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]: - if sequence_ids: - seq_ids = self._features_as_seq_ids(sequence_features, f"{name}_{SEQUENCE}") - else: - seq_ids = None - - inputs = self._combine_sequence_sentence_features( - sequence_features, - sentence_features, - mask_sequence, - mask, - name, - sparse_dropout, - dense_dropout, - ) - inputs = self._tf_layers[f"ffnn.{name}"](inputs, self._training) - - if masked_lm_loss: - transformer_inputs, lm_mask_bool = self._tf_layers[f"{name}_input_mask"]( - inputs, mask, self._training - ) - else: - transformer_inputs = inputs - lm_mask_bool = None - - outputs = self._tf_layers[f"transformer.{name}"]( - transformer_inputs, 1 - mask, self._training - ) - - if self.config[NUM_TRANSFORMER_LAYERS] > 0: - # apply activation - outputs = tfa.activations.gelu(outputs) - - return outputs, inputs, seq_ids, lm_mask_bool - def _create_all_labels(self) -> Tuple[tf.Tensor, tf.Tensor]: all_label_ids = self.tf_label_data[LABEL_KEY][LABEL_SUB_KEY][0] @@ -1598,22 +1457,6 @@ def _calculate_entity_loss( return loss, f1, logits - @staticmethod - def _get_sequence_lengths( - tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], - key: Text, - sub_key: Text, - batch_dim: int = 1, - ) -> tf.Tensor: - # sentence features have a sequence lengths of 1 - # if sequence features are present we add the sequence lengths of those - - sequence_lengths = tf.ones([batch_dim], dtype=tf.int32) - if key in tf_batch_data and sub_key in tf_batch_data[key]: - sequence_lengths += tf.cast(tf_batch_data[key][sub_key][0], dtype=tf.int32) - - return sequence_lengths - @staticmethod def _get_batch_dim(tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]]) -> int: if TEXT in tf_batch_data and SEQUENCE in tf_batch_data[TEXT]: diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index f214319e7e3a..1bd4322b7c91 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -67,6 +67,7 @@ BALANCED = "balanced" SEQUENCE = "sequence" +SEQUENCE_LENGTH = f"{SEQUENCE}_lengths" SENTENCE = "sentence" POOLING = "pooling" diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index 083db97d167e..3d011eef1806 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -455,6 +455,7 @@ def add_lengths( self.data[key][sub_key] = [] for data in self.data[from_key][from_sub_key]: + print(data) if len(data) > 0: lengths = np.array([x.shape[0] for x in data]) self.data[key][sub_key].extend( diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index d26e77efe5ef..d860cfe97851 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -1,6 +1,7 @@ import datetime import tensorflow as tf +import tensorflow_addons as tfa import numpy as np import logging import os @@ -26,6 +27,7 @@ from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature from rasa.utils.tensorflow.constants import ( SEQUENCE, + SENTENCE, TENSORBOARD_LOG_LEVEL, RANDOM_SEED, TENSORBOARD_LOG_DIR, @@ -47,6 +49,11 @@ MAX_NEG_SIM, USE_MAX_NEG_SIM, NEGATIVE_MARGIN_SCALE, + HIDDEN_LAYERS_SIZES, + DROP_RATE, + DENSE_DIMENSION, + CONCAT_DIMENSION, + DROP_RATE_ATTENTION, ) from rasa.utils.tensorflow import layers from rasa.utils.tensorflow.transformer import TransformerEncoder @@ -153,7 +160,7 @@ def fit( batch_strategy: Text, silent: bool = False, loading: bool = False, - eager: bool = False, + eager: bool = True, ) -> None: """Fit model data""" @@ -767,6 +774,39 @@ def _prepare_sparse_dense_layers( units=2, trainable=False, name=f"sparse_to_dense_ids.{name}" ) + def _prepare_input_layers(self, name: Text) -> None: + self._prepare_ffnn_layer( + name, self.config[HIDDEN_LAYERS_SIZES][name], self.config[DROP_RATE] + ) + + for feature_type in [SENTENCE, SEQUENCE]: + if ( + name not in self.data_signature + or feature_type not in self.data_signature[name] + ): + continue + + self._prepare_sparse_dense_dropout_layers( + f"{name}_{feature_type}", self.config[DROP_RATE] + ) + self._prepare_sparse_dense_layers( + self.data_signature[name][feature_type], + f"{name}_{feature_type}", + self.config[DENSE_DIMENSION][name], + ) + self._prepare_ffnn_layer( + f"{name}_{feature_type}", + [self.config[CONCAT_DIMENSION][name]], + self.config[DROP_RATE], + prefix="concat_layer", + ) + + def _prepare_sequence_layers(self, name: Text) -> None: + self._prepare_input_layers(name) + self._prepare_transformer_layer( + name, self.config[DROP_RATE], self.config[DROP_RATE_ATTENTION] + ) + def _combine_sparse_dense_features( self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], @@ -806,6 +846,113 @@ def _combine_sparse_dense_features( return tf.concat(dense_features, axis=-1) * mask + def _combine_sequence_sentence_features( + self, + sequence_features: List[Union[tf.Tensor, tf.SparseTensor]], + sentence_features: List[Union[tf.Tensor, tf.SparseTensor]], + mask_sequence: tf.Tensor, + mask_text: tf.Tensor, + name: Text, + sparse_dropout: bool = False, + dense_dropout: bool = False, + ) -> tf.Tensor: + sequence_x = self._combine_sparse_dense_features( + sequence_features, + f"{name}_{SEQUENCE}", + mask_sequence, + sparse_dropout, + dense_dropout, + ) + sentence_x = self._combine_sparse_dense_features( + sentence_features, f"{name}_{SENTENCE}", None, sparse_dropout, dense_dropout + ) + + if sequence_x is not None and sentence_x is None: + return sequence_x + + if sequence_x is None and sentence_x is not None: + return sentence_x + + if sequence_x is not None and sentence_x is not None: + return self._concat_sequence_sentence_features( + sequence_x, sentence_x, name, mask_text + ) + + raise ValueError( + "No features are present. Please check your configuration file." + ) + + def _features_as_seq_ids( + self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], name: Text + ) -> Optional[tf.Tensor]: + """Creates dense labels for negative sampling.""" + + # if there are dense features - we can use them + for f in features: + if not isinstance(f, tf.SparseTensor): + seq_ids = tf.stop_gradient(f) + # add a zero to the seq dimension for the sentence features + seq_ids = tf.pad(seq_ids, [[0, 0], [0, 1], [0, 0]]) + return seq_ids + + # use additional sparse to dense layer + for f in features: + if isinstance(f, tf.SparseTensor): + seq_ids = tf.stop_gradient( + self._tf_layers[f"sparse_to_dense_ids.{name}"](f) + ) + # add a zero to the seq dimension for the sentence features + seq_ids = tf.pad(seq_ids, [[0, 0], [0, 1], [0, 0]]) + return seq_ids + + return None + + def _create_sequence( + self, + sequence_features: List[Union[tf.Tensor, tf.SparseTensor]], + sentence_features: List[Union[tf.Tensor, tf.SparseTensor]], + mask_sequence: tf.Tensor, + mask: tf.Tensor, + name: Text, + sparse_dropout: bool = False, + dense_dropout: bool = False, + masked_lm_loss: bool = False, + sequence_ids: bool = False, + ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]: + if sequence_ids: + seq_ids = self._features_as_seq_ids(sequence_features, f"{name}_{SEQUENCE}") + else: + seq_ids = None + + inputs = self._combine_sequence_sentence_features( + sequence_features, + sentence_features, + mask_sequence, + mask, + name, + sparse_dropout, + dense_dropout, + ) + inputs = self._tf_layers[f"ffnn.{name}"](inputs, self._training) + + if masked_lm_loss: + transformer_inputs, lm_mask_bool = self._tf_layers[f"{name}_input_mask"]( + inputs, mask, self._training + ) + else: + transformer_inputs = inputs + lm_mask_bool = None + + outputs = self._tf_layers[f"transformer.{name}"]( + transformer_inputs, 1 - mask, self._training + ) + + if self.config[NUM_TRANSFORMER_LAYERS] > 0: + # apply activation + outputs = tfa.activations.gelu(outputs) + + return outputs, inputs, seq_ids, lm_mask_bool + @staticmethod def _compute_mask(sequence_lengths: tf.Tensor) -> tf.Tensor: mask = tf.sequence_mask(sequence_lengths, dtype=tf.float32) @@ -833,6 +980,22 @@ def _get_mask_for( sequence_lengths = tf.cast(tf_batch_data[key][sub_key][0], dtype=tf.int32) return self._compute_mask(sequence_lengths) + @staticmethod + def _get_sequence_lengths( + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + key: Text, + sub_key: Text, + batch_dim: int = 1, + ) -> tf.Tensor: + # sentence features have a sequence lengths of 1 + # if sequence features are present we add the sequence lengths of those + + sequence_lengths = tf.ones([batch_dim], dtype=tf.int32) + if key in tf_batch_data and sub_key in tf_batch_data[key]: + sequence_lengths += tf.cast(tf_batch_data[key][sub_key][0], dtype=tf.int32) + + return sequence_lengths + def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: From e4f795a50ffddde961dd306f1290ff1b2a155b1b Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 22 Oct 2020 15:42:19 +0200 Subject: [PATCH 02/62] reshape 4d tensors into 3d and back --- examples/e2ebot/config.yml | 2 +- rasa/core/policies/ted_policy.py | 83 ++++++++++++++++++------- rasa/nlu/classifiers/diet_classifier.py | 49 +++------------ rasa/utils/tensorflow/model_data.py | 34 ++++++---- rasa/utils/tensorflow/models.py | 44 ++++++++++++- 5 files changed, 133 insertions(+), 79 deletions(-) diff --git a/examples/e2ebot/config.yml b/examples/e2ebot/config.yml index e2cbcd0cb5af..f38558adb0ad 100644 --- a/examples/e2ebot/config.yml +++ b/examples/e2ebot/config.yml @@ -10,7 +10,7 @@ pipeline: min_ngram: 1 max_ngram: 4 - name: DIETClassifier - epochs: 1 + epochs: 200 policies: - name: TEDPolicy epochs: 200 diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 85fa1d8cb112..6ec2591ea79a 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -74,11 +74,13 @@ SEQUENCE_LENGTH, SENTENCE, DENSE_DIMENSION, + CONCAT_DIMENSION, E2E_CONFIDENCE_THRESHOLD, SPARSE_INPUT_DROPOUT, DENSE_INPUT_DROPOUT, MASKED_LM, MASK, + HIDDEN_LAYERS_SIZES, ) @@ -125,7 +127,20 @@ class TEDPolicy(Policy): # The number of hidden layers is equal to the length of the corresponding # list. # TODO add 2 parallel NNs: transformer for text and ffnn for names - DENSE_DIMENSION: 20, + # Hidden layer sizes for layers before the embedding layers for user message + # and labels. + # The number of hidden layers is equal to the length of the corresponding + # list. + HIDDEN_LAYERS_SIZES: {TEXT: [], ACTION_TEXT: []}, + DENSE_DIMENSION: { + TEXT: 128, + ACTION_TEXT: 128, + f"{LABEL}_{ACTION_TEXT}": 20, + INTENT: 20, + ACTION_NAME: 20, + f"{LABEL}_{ACTION_NAME}": 20, + }, + CONCAT_DIMENSION: {TEXT: 128, ACTION_TEXT: 128}, ENCODING_DIMENSION: 50, # Number of units in transformer TRANSFORMER_SIZE: 128, @@ -639,8 +654,7 @@ def _prepare_layers(self) -> None: self._prepare_sparse_dense_layer_for(name, self.data_signature) if name in SEQUENCE_FEATURES_TO_ENCODE: self._prepare_sequence_layers(name) - else: - self._prepare_encoding_layers(name) + self._prepare_encoding_layers(name) for name in self.label_signature.keys(): self._prepare_sparse_dense_layer_for(name, self.label_signature) @@ -679,7 +693,7 @@ def _prepare_sparse_dense_layer_for( self._prepare_sparse_dense_layers( signature[name][feature_type], f"{name}_{feature_type}", - self.config[DENSE_DIMENSION], + self.config[DENSE_DIMENSION][name], ) def _prepare_encoding_layers(self, name: Text) -> None: @@ -704,7 +718,7 @@ def _prepare_encoding_layers(self, name: Text) -> None: return self._prepare_ffnn_layer( - f"{name}_{feature_type}", + f"{name}", [self.config[ENCODING_DIMENSION]], self.config[DROP_RATE_DIALOGUE], ) @@ -776,39 +790,56 @@ def _encode_features_per_attribute( attribute_mask = tf_batch_data[attribute][MASK][0] if attribute in SEQUENCE_FEATURES_TO_ENCODE: - batch_dim = self._get_batch_dim(tf_batch_data) - mask_sequence_text = self._get_mask_for( - tf_batch_data, TEXT, SEQUENCE_LENGTH - ) - sequence_lengths = self._get_sequence_lengths( - tf_batch_data, TEXT, SEQUENCE_LENGTH, batch_dim + sequence_shape = [tf.shape(x) for x in tf_batch_data[attribute][SEQUENCE]] + sentence_shape = [tf.shape(x) for x in tf_batch_data[attribute][SENTENCE]] + + sequence = [ + tf.sparse.reshape(x, (-1, shape[2], shape[3])) + if isinstance(x, tf.SparseTensor) + else tf.reshape(x, (-1, shape[2], shape[3])) + for x, shape in zip(tf_batch_data[attribute][SEQUENCE], sequence_shape) + ] + sentence = [ + tf.sparse.reshape(x, (-1, 1, shape[2])) + if isinstance(x, tf.SparseTensor) + else tf.reshape(x, (-1, shape[2])) + for x, shape in zip(tf_batch_data[attribute][SENTENCE], sentence_shape) + ] + + _sequence_lengths = tf.cast( + tf_batch_data[attribute][SEQUENCE_LENGTH][0], dtype=tf.int32 ) + _sequence_lengths = tf.reshape(_sequence_lengths, (-1,)) + mask_sequence_text = self._compute_mask(_sequence_lengths) + sequence_lengths = _sequence_lengths + 1 mask_text = self._compute_mask(sequence_lengths) attribute_features, _, _, _ = self._create_sequence( - tf_batch_data[TEXT][SEQUENCE], - tf_batch_data[TEXT][SENTENCE], + sequence, + sentence, mask_sequence_text, mask_text, attribute, sparse_dropout=self.config[SPARSE_INPUT_DROPOUT], dense_dropout=self.config[DENSE_INPUT_DROPOUT], masked_lm_loss=self.config[MASKED_LM], - sequence_ids=True, + sequence_ids=False, ) # TODO entities - return ( - self._last_token(attribute_features, sequence_lengths) * attribute_mask + last_token = self._last_token(attribute_features, sequence_lengths) + attribute_features = tf.reshape( + last_token, (sequence_shape[0][0], sequence_shape[0][1], -1) ) - attribute_features = self._combine_sparse_dense_features( - tf_batch_data[attribute][SENTENCE], - f"{attribute}_{SENTENCE}", - mask=attribute_mask, - ) + else: + attribute_features = self._combine_sparse_dense_features( + tf_batch_data[attribute][SENTENCE], + f"{attribute}_{SENTENCE}", + mask=attribute_mask, + ) if attribute in FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: - attribute_features = self._tf_layers[f"ffnn.{attribute}_{SENTENCE}"]( + attribute_features = self._tf_layers[f"ffnn.{attribute}"]( attribute_features ) @@ -879,7 +910,13 @@ def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) - + for k, v in tf_batch_data.items(): + print(k) + for _k, _v in v.items(): + print(" ", _k) + for __v in _v: + print(" ", __v.shape) + # exit() dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) all_label_ids, all_labels_embed = self._create_all_labels_embed() diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index c6989c3cbc02..3b3ffea3ba18 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -1318,39 +1318,6 @@ def _prepare_entity_recognition_layers(self) -> None: f"tags.{name}", ) - def _concat_sequence_sentence_features( - self, - sequence_x: tf.Tensor, - sentence_x: tf.Tensor, - name: Text, - mask_text: tf.Tensor, - ): - if sequence_x.shape[-1] != sentence_x.shape[-1]: - sequence_x = self._tf_layers[f"concat_layer.{name}_{SEQUENCE}"]( - sequence_x, self._training - ) - sentence_x = self._tf_layers[f"concat_layer.{name}_{SENTENCE}"]( - sentence_x, self._training - ) - - # we need to concatenate the sequence features with the sentence features - # we cannot use tf.concat as the sequence features are padded - - # (1) get position of sentence features in mask - last = mask_text * tf.math.cumprod( - 1 - mask_text, axis=1, exclusive=True, reverse=True - ) - # (2) multiply by sentence features so that we get a matrix of - # batch-dim x seq-dim x feature-dim with zeros everywhere except for - # for the sentence features - sentence_x = last * sentence_x - - # (3) add a zero to the end of sequence matrix to match the final shape - sequence_x = tf.pad(sequence_x, [[0, 0], [0, 1], [0, 0]]) - - # (4) sum up sequence features and sentence features - return sequence_x + sentence_x - def _create_bow( self, sequence_features: List[Union[tf.Tensor, tf.SparseTensor]], @@ -1462,19 +1429,17 @@ def _calculate_entity_loss( return loss, f1, logits - @staticmethod - def _get_batch_dim(tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]]) -> int: - if TEXT in tf_batch_data and SEQUENCE in tf_batch_data[TEXT]: - return tf.shape(tf_batch_data[TEXT][SEQUENCE][0])[0] - - return tf.shape(tf_batch_data[TEXT][SENTENCE][0])[0] - def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) - - batch_dim = self._get_batch_dim(tf_batch_data) + for k, v in tf_batch_data.items(): + print(k) + for _k, _v in v.items(): + print(" ", _k) + for __v in _v: + print(" ", __v.shape) + batch_dim = self._get_batch_dim(tf_batch_data[TEXT]) mask_sequence_text = self._get_mask_for(tf_batch_data, TEXT, SEQUENCE_LENGTH) sequence_lengths = self._get_sequence_lengths( tf_batch_data, TEXT, SEQUENCE_LENGTH, batch_dim diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index d10f8318828c..1d1d2093afa2 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -93,7 +93,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): "at": ufunc.at, "__call__": ufunc, } - # convert the inputs to np.ndarray to prevent recursion, call the function, then cast it back as FeatureArray + # convert the inputs to np.ndarray to prevent recursion, call the function, + # then cast it back as FeatureArray output = FeatureArray( f[method](*(i.view(np.ndarray) for i in inputs), **kwargs), number_of_dimensions=kwargs["number_of_dimensions"], @@ -141,20 +142,21 @@ def _validate_number_of_dimensions( dim = i break - # If the resulting sub_array is sparse, the remaining number of dimensions should be at least 2 + # If the resulting sub_array is sparse, the remaining number of dimensions + # should be at least 2 if isinstance(_sub_array, scipy.sparse.spmatrix): if dim > 2: raise ValueError( - f"Given number of dimensions '{number_of_dimensions}' does not match dimensiona of given input " - f"array: {input_array}." + f"Given number of dimensions '{number_of_dimensions}' does not " + f"match dimensiona of given input array: {input_array}." ) # If the resulting sub_array is dense, the sub_array should be a single number elif not np.issubdtype(type(_sub_array), np.integer) and not isinstance( _sub_array, (np.float32, np.float64) ): raise ValueError( - f"Given number of dimensions '{number_of_dimensions}' does not match dimensiona of given input " - f"array: {input_array}." + f"Given number of dimensions '{number_of_dimensions}' does not match " + f"dimensions of given input array: {input_array}." ) def get_shape_type_info( @@ -486,12 +488,22 @@ def add_lengths( for features in self.data[from_key][from_sub_key]: if len(features) > 0: if features.number_of_dimensions == 4: - lengths = np.array([x[0].shape[0] for x in features]) + lengths = FeatureArray( + np.array( + [ + # add one more dim so that dialogue dim + # would be a sequence + np.array([[[x.shape[0]]] for x in _features]) + for _features in features + ] + ), + number_of_dimensions=4, + ) else: - lengths = np.array([x.shape[0] for x in features]) - self.data[key][sub_key].extend( - [FeatureArray(lengths, number_of_dimensions=1)] - ) + lengths = FeatureArray( + np.array([x.shape[0] for x in features]), number_of_dimensions=1 + ) + self.data[key][sub_key].extend([lengths]) break def split( diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index d860cfe97851..17e8df4f31e7 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -160,7 +160,7 @@ def fit( batch_strategy: Text, silent: bool = False, loading: bool = False, - eager: bool = True, + eager: bool = False, ) -> None: """Fit model data""" @@ -882,6 +882,39 @@ def _combine_sequence_sentence_features( "No features are present. Please check your configuration file." ) + def _concat_sequence_sentence_features( + self, + sequence_x: tf.Tensor, + sentence_x: tf.Tensor, + name: Text, + mask_text: tf.Tensor, + ): + if sequence_x.shape[-1] != sentence_x.shape[-1]: + sequence_x = self._tf_layers[f"concat_layer.{name}_{SEQUENCE}"]( + sequence_x, self._training + ) + sentence_x = self._tf_layers[f"concat_layer.{name}_{SENTENCE}"]( + sentence_x, self._training + ) + + # we need to concatenate the sequence features with the sentence features + # we cannot use tf.concat as the sequence features are padded + + # (1) get position of sentence features in mask + last = mask_text * tf.math.cumprod( + 1 - mask_text, axis=1, exclusive=True, reverse=True + ) + # (2) multiply by sentence features so that we get a matrix of + # batch-dim x seq-dim x feature-dim with zeros everywhere except for + # for the sentence features + sentence_x = last * sentence_x + + # (3) add a zero to the end of sequence matrix to match the final shape + sequence_x = tf.pad(sequence_x, [[0, 0], [0, 1], [0, 0]]) + + # (4) sum up sequence features and sentence features + return sequence_x + sentence_x + def _features_as_seq_ids( self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], name: Text ) -> Optional[tf.Tensor]: @@ -994,7 +1027,14 @@ def _get_sequence_lengths( if key in tf_batch_data and sub_key in tf_batch_data[key]: sequence_lengths += tf.cast(tf_batch_data[key][sub_key][0], dtype=tf.int32) - return sequence_lengths + return tf.cast(tf_batch_data[key][sub_key][0], dtype=tf.int32) + 1 + + @staticmethod + def _get_batch_dim(attribute_data: Dict[Text, List[tf.Tensor]]) -> int: + if SEQUENCE in attribute_data: + return tf.shape(attribute_data[SEQUENCE][0])[0] + + return tf.shape(attribute_data[SENTENCE][0])[0] def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] From 0a97001fefb493f88eda73e0b79d601eac5df285 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 22 Oct 2020 16:07:48 +0200 Subject: [PATCH 03/62] fix shapes in non eager mode --- rasa/core/policies/ted_policy.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 6ec2591ea79a..5e8bc4186a3f 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -790,8 +790,14 @@ def _encode_features_per_attribute( attribute_mask = tf_batch_data[attribute][MASK][0] if attribute in SEQUENCE_FEATURES_TO_ENCODE: - sequence_shape = [tf.shape(x) for x in tf_batch_data[attribute][SEQUENCE]] - sentence_shape = [tf.shape(x) for x in tf_batch_data[attribute][SENTENCE]] + sequence_shape = [ + [tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2], x.shape[-1]] + for x in tf_batch_data[attribute][SEQUENCE] + ] + sentence_shape = [ + [tf.shape(x)[0], tf.shape(x)[1], x.shape[-1]] + for x in tf_batch_data[attribute][SENTENCE] + ] sequence = [ tf.sparse.reshape(x, (-1, shape[2], shape[3])) @@ -805,6 +811,22 @@ def _encode_features_per_attribute( else tf.reshape(x, (-1, shape[2])) for x, shape in zip(tf_batch_data[attribute][SENTENCE], sentence_shape) ] + sequence = [ + tf.SparseTensor( + x.indices, x.values, (tf.shape(x)[0], tf.shape(x)[1], shape[3]) + ) + if isinstance(x, tf.SparseTensor) + else x + for x, shape in zip(sequence, sequence_shape) + ] + sentence = [ + tf.SparseTensor( + x.indices, x.values, (tf.shape(x)[0], tf.shape(x)[1], shape[2]) + ) + if isinstance(x, tf.SparseTensor) + else x + for x, shape in zip(sentence, sentence_shape) + ] _sequence_lengths = tf.cast( tf_batch_data[attribute][SEQUENCE_LENGTH][0], dtype=tf.int32 From f904e46075d06f8ba5dd1365cc624e471474fb21 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 22 Oct 2020 16:14:57 +0200 Subject: [PATCH 04/62] make shape indices more general --- rasa/core/policies/ted_policy.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 5e8bc4186a3f..4c7909ba600f 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -795,25 +795,25 @@ def _encode_features_per_attribute( for x in tf_batch_data[attribute][SEQUENCE] ] sentence_shape = [ - [tf.shape(x)[0], tf.shape(x)[1], x.shape[-1]] + [tf.shape(x)[0], tf.shape(x)[1], 1, x.shape[-1]] for x in tf_batch_data[attribute][SENTENCE] ] sequence = [ - tf.sparse.reshape(x, (-1, shape[2], shape[3])) + tf.sparse.reshape(x, (-1, shape[2], shape[-1])) if isinstance(x, tf.SparseTensor) - else tf.reshape(x, (-1, shape[2], shape[3])) + else tf.reshape(x, (-1, shape[2], shape[-1])) for x, shape in zip(tf_batch_data[attribute][SEQUENCE], sequence_shape) ] sentence = [ - tf.sparse.reshape(x, (-1, 1, shape[2])) + tf.sparse.reshape(x, (-1, shape[2], shape[-1])) if isinstance(x, tf.SparseTensor) - else tf.reshape(x, (-1, shape[2])) + else tf.reshape(x, (-1, shape[2], shape[-1])) for x, shape in zip(tf_batch_data[attribute][SENTENCE], sentence_shape) ] sequence = [ tf.SparseTensor( - x.indices, x.values, (tf.shape(x)[0], tf.shape(x)[1], shape[3]) + x.indices, x.values, (tf.shape(x)[0], tf.shape(x)[1], shape[-1]) ) if isinstance(x, tf.SparseTensor) else x @@ -821,7 +821,7 @@ def _encode_features_per_attribute( ] sentence = [ tf.SparseTensor( - x.indices, x.values, (tf.shape(x)[0], tf.shape(x)[1], shape[2]) + x.indices, x.values, (tf.shape(x)[0], tf.shape(x)[1], shape[-1]) ) if isinstance(x, tf.SparseTensor) else x From dd576e249140b7e11f58f97ba5b35bea10b5a23a Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 22 Oct 2020 17:44:49 +0200 Subject: [PATCH 05/62] fix add_length --- rasa/utils/tensorflow/model_data.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index d10f8318828c..9d7a488e608f 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -484,15 +484,27 @@ def add_lengths( self.data[key][sub_key] = [] for features in self.data[from_key][from_sub_key]: - if len(features) > 0: - if features.number_of_dimensions == 4: - lengths = np.array([x[0].shape[0] for x in features]) - else: - lengths = np.array([x.shape[0] for x in features]) - self.data[key][sub_key].extend( - [FeatureArray(lengths, number_of_dimensions=1)] + if len(features) == 0: + continue + + if features.number_of_dimensions == 4: + lengths = FeatureArray( + np.array( + [ + # add one more dim so that dialogue dim + # would be a sequence + np.array([[[x.shape[0]]] for x in _features]) + for _features in features + ] + ), + number_of_dimensions=4, ) - break + else: + lengths = FeatureArray( + np.array([x.shape[0] for x in features]), number_of_dimensions=1 + ) + self.data[key][sub_key].extend([lengths]) + break def split( self, number_of_test_examples: int, random_seed: int From 45a29828a213c13d95af6a4516e08a617a725291 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 23 Oct 2020 09:25:49 +0200 Subject: [PATCH 06/62] add todo --- rasa/core/policies/ted_policy.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 55e65c36bbc7..fc09fb63fcc5 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -296,6 +296,12 @@ def _create_model_data( """ model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY) + # TODO: + # sentence features should also be 4D + # sequence length should be 4D + # pad_data should convert 4D to 3D (sum up batch and dialogue dimension) + # inside batch_loss after the transformer convert 3D back to 4D + if label_ids is not None and encoded_all_labels is not None: label_ids = np.array( From 6e64bd739f489f70df64264b3b284673d954af39 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 23 Oct 2020 10:11:19 +0200 Subject: [PATCH 07/62] sentence features are now also 4D --- rasa/utils/tensorflow/model_data_utils.py | 32 +++++++---------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index f3fb30a2813a..09b54fc3af03 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -282,36 +282,22 @@ def _features_for_attribute( sparse_features = {} dense_features = {} - # vstack serves as removing dimension in case we are not dealing with a sequence for key, values in _sparse_features.items(): - if key == SEQUENCE: - if consider_dialogue_dimension: - sparse_features[key] = FeatureArray( - np.array(values), number_of_dimensions=4 - ) - else: - sparse_features[key] = FeatureArray( - np.array([v[0] for v in values]), number_of_dimensions=3 - ) + if consider_dialogue_dimension: + sparse_features[key] = FeatureArray( + np.array(values), number_of_dimensions=4 + ) else: - features = [scipy.sparse.vstack(value) for value in values] sparse_features[key] = FeatureArray( - np.array(features), number_of_dimensions=3 + np.array([v[0] for v in values]), number_of_dimensions=3 ) + for key, values in _dense_features.items(): - if key == SEQUENCE: - if consider_dialogue_dimension: - dense_features[key] = FeatureArray( - np.array(values), number_of_dimensions=4 - ) - else: - dense_features[key] = FeatureArray( - np.array([v[0] for v in values]), number_of_dimensions=3 - ) + if consider_dialogue_dimension: + dense_features[key] = FeatureArray(np.array(values), number_of_dimensions=4) else: - features = [np.vstack(value) for value in values] dense_features[key] = FeatureArray( - np.array(features), number_of_dimensions=3 + np.array([v[0] for v in values]), number_of_dimensions=3 ) attribute_to_feature_arrays = { From 94c0fa901e7be8f3c30b4b561453af8eb485988e Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 23 Oct 2020 10:26:56 +0200 Subject: [PATCH 08/62] sequence length is 4D --- rasa/core/policies/ted_policy.py | 11 +++++++++-- rasa/nlu/classifiers/diet_classifier.py | 2 +- rasa/utils/tensorflow/constants.py | 1 + rasa/utils/tensorflow/models.py | 6 +++--- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index fc09fb63fcc5..1f75792ec29a 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -72,6 +72,7 @@ UNIDIRECTIONAL_ENCODER, SEQUENCE, SENTENCE, + SEQUENCE_LENGTH, DENSE_DIMENSION, E2E_CONFIDENCE_THRESHOLD, MASK, @@ -297,8 +298,6 @@ def _create_model_data( model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY) # TODO: - # sentence features should also be 4D - # sequence length should be 4D # pad_data should convert 4D to 3D (sum up batch and dialogue dimension) # inside batch_loss after the transformer convert 3D back to 4D @@ -326,6 +325,7 @@ def _create_model_data( model_data.add_lengths( DIALOGUE, LENGTH, next(iter(list(attribute_data.keys()))), MASK ) + model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE) return model_data @@ -846,6 +846,13 @@ def batch_loss( ) -> tf.Tensor: tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) + for key, values in tf_batch_data.items(): + print(key) + for sub_key, tensors in values.items(): + print(f" {sub_key}") + for t in tensors: + print(f" {t.shape}") + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) all_label_ids, all_labels_embed = self._create_all_labels_embed() diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index f58e6d301ad7..2470c2ca0f2e 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -90,6 +90,7 @@ CHECKPOINT_MODEL, SEQUENCE, SENTENCE, + SEQUENCE_LENGTH, DENSE_DIMENSION, MASK, ) @@ -99,7 +100,6 @@ SPARSE = "sparse" DENSE = "dense" -SEQUENCE_LENGTH = f"{SEQUENCE}_lengths" LABEL_KEY = LABEL LABEL_SUB_KEY = "ids" TAG_IDS = "tag_ids" diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 3f42323260be..9244e35cedff 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -68,6 +68,7 @@ SEQUENCE = "sequence" SENTENCE = "sentence" +SEQUENCE_LENGTH = f"{SEQUENCE}_lengths" POOLING = "pooling" MAX_POOLING = "max" diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index d26e77efe5ef..b2b105b90d25 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -153,7 +153,7 @@ def fit( batch_strategy: Text, silent: bool = False, loading: bool = False, - eager: bool = False, + eager: bool = True, ) -> None: """Fit model data""" @@ -285,7 +285,7 @@ def train_on_batch( self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) def build_for_predict( - self, predict_data: RasaModelData, eager: bool = False + self, predict_data: RasaModelData, eager: bool = True ) -> None: self._training = False # needed for tf graph mode self._predict_function = self._get_tf_call_model_function( @@ -533,7 +533,7 @@ def batch_to_model_data_format( batch[idx + 2][i] for i in range(number_of_dimensions - 1) ] + [feature_dimension] batch_data[key][sub_key].append( - tf.SparseTensor(batch[idx], batch[idx + 1], shape,) + tf.SparseTensor(batch[idx], batch[idx + 1], shape) ) idx += 3 else: From 5263b6064d34e4edc7119506e035c721c1036228 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 23 Oct 2020 11:39:47 +0200 Subject: [PATCH 09/62] convert 4d to 3 during padding --- rasa/utils/tensorflow/model_data.py | 72 ++++++++++++++--------- rasa/utils/tensorflow/models.py | 3 + tests/utils/tensorflow/test_model_data.py | 14 ++--- 3 files changed, 54 insertions(+), 35 deletions(-) diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index 9d7a488e608f..90bb00d6defc 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -178,13 +178,13 @@ def get_shape_type_info( A list of type tuples. """ if self.is_sparse: + # 4D tensors were converted into 3D tensors during padding + number_of_dimensions = ( + self.number_of_dimensions if self.number_of_dimensions != 4 else 3 + ) # scipy matrix is converted into indices, data, shape return ( - [ - (None, self.number_of_dimensions), - (None,), - (self.number_of_dimensions), - ], + [(None, number_of_dimensions), (None,), (number_of_dimensions)], [tf.int64, tf.float32, tf.int64], ) @@ -198,13 +198,15 @@ def get_shape_type_info( return [(None, None, self.units)], [tf.float32] if self.number_of_dimensions == 4: - return [(None, None, None, self.units)], [tf.float32] + # 4D tensors were converted into 3D tensors during padding + return [(None, None, self.units)], [tf.float32] return [], [] class FeatureSignature(NamedTuple): - """Stores the number of units, the type (sparse vs dense), and the number of dimensions of features.""" + """Stores the number of units, the type (sparse vs dense), and the number of + dimensions of features.""" is_sparse: bool units: Optional[int] @@ -1068,8 +1070,15 @@ def _pad_dense_data(array_of_dense: FeatureArray) -> np.ndarray: def _pad_4d_dense_data(array_of_array_of_dense: FeatureArray) -> np.ndarray: # in case of dialogue data we may have 4 dimensions # batch size x dialogue history length x sequence length x number of features - data_size = len(array_of_array_of_dense) - max_dialogue_len = max( + + # as transformers cannot handle 4D tensors pad and reshape the data + # so that the resulting tensor is 3D + # the shape is (sum of dialogue history length for all tensors in the + # batch x max sequence length x number of features) + # the original shape is passed on the model via the data signature, the + # original shape can be used to transform the 3D tensor back into 4D + + sum_dialogue_len = sum( len(array_of_dense) for array_of_dense in array_of_array_of_dense ) max_seq_len = max( @@ -1081,18 +1090,15 @@ def _pad_4d_dense_data(array_of_array_of_dense: FeatureArray) -> np.ndarray: ) data_padded = np.zeros( - [ - data_size, - max_dialogue_len, - max_seq_len, - array_of_array_of_dense[0][0].shape[-1], - ], + [sum_dialogue_len, max_seq_len, array_of_array_of_dense[0][0].shape[-1]], dtype=array_of_array_of_dense[0][0].dtype, ) + current_sum_dialogue_len = 0 for i, array_of_dense in enumerate(array_of_array_of_dense): for j, dense in enumerate(array_of_dense): - data_padded[i, j, : dense.shape[0], :] = dense + data_padded[current_sum_dialogue_len + j, : dense.shape[0], :] = dense + current_sum_dialogue_len += len(array_of_dense) return data_padded.astype(np.float32) @@ -1136,10 +1142,19 @@ def _scipy_matrix_to_values(array_of_sparse: FeatureArray) -> List[np.ndarray]: ] @staticmethod - def _4d_scipy_matrix_to_values(array_of_array_of_sparse: FeatureArray): + def _4d_scipy_matrix_to_values( + array_of_array_of_sparse: FeatureArray, + ) -> List[np.ndarray]: # in case of dialogue data we may have 4 dimensions # batch size x dialogue history length x sequence length x number of features + # as transformers cannot handle 4D tensors pad and reshape the data + # so that the resulting tensor is 3D + # the shape is (sum of dialogue history length for all tensors in the + # batch x max sequence length x number of features) + # the original shape is passed on the model via the data signature, the + # original shape can be used to transform the 3D tensor back into 4D + # we need to make sure that the matrices are coo_matrices otherwise the # transformation does not work (e.g. you cannot access x.row, x.col) if not isinstance(array_of_array_of_sparse[0][0], scipy.sparse.coo_matrix): @@ -1148,8 +1163,8 @@ def _4d_scipy_matrix_to_values(array_of_array_of_sparse: FeatureArray): for array_of_sparse in array_of_array_of_sparse ] - max_dialogue_len = max( - [len(array_of_sparse) for array_of_sparse in array_of_array_of_sparse] + max_dialogue_len = sum( + len(array_of_sparse) for array_of_sparse in array_of_array_of_sparse ) max_seq_len = max( [ @@ -1162,7 +1177,15 @@ def _4d_scipy_matrix_to_values(array_of_array_of_sparse: FeatureArray): indices = np.hstack( [ np.vstack( - [i * np.ones_like(x.row), j * np.ones_like(x.row), x.row, x.col] + [ + sum( + len(array_of_sparse) + for array_of_sparse in array_of_array_of_sparse[:i] + ) + + j * np.ones_like(x.row), + x.row, + x.col, + ] ) for i, array_of_sparse in enumerate(array_of_array_of_sparse) for j, x in enumerate(array_of_sparse) @@ -1178,14 +1201,7 @@ def _4d_scipy_matrix_to_values(array_of_array_of_sparse: FeatureArray): ) number_of_features = array_of_array_of_sparse[0][0].shape[-1] - shape = np.array( - ( - len(array_of_array_of_sparse), - max_dialogue_len, - max_seq_len, - number_of_features, - ) - ) + shape = np.array((max_dialogue_len, max_seq_len, number_of_features)) return [ indices.astype(np.int64), diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index b2b105b90d25..b11a38350b6e 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -526,6 +526,9 @@ def batch_to_model_data_format( for key, values in data_signature.items(): for sub_key, signature in values.items(): for is_sparse, feature_dimension, number_of_dimensions in signature: + number_of_dimensions = ( + number_of_dimensions if number_of_dimensions != 4 else 3 + ) if is_sparse: # explicitly substitute last dimension in shape with known # static value diff --git a/tests/utils/tensorflow/test_model_data.py b/tests/utils/tensorflow/test_model_data.py index d26bac06d722..304179434b13 100644 --- a/tests/utils/tensorflow/test_model_data.py +++ b/tests/utils/tensorflow/test_model_data.py @@ -96,7 +96,7 @@ async def model_data() -> RasaModelData: [ scipy.sparse.csr_matrix( np.random.randint(5, size=(3, 10)) - ), + ) ], [ scipy.sparse.csr_matrix( @@ -123,13 +123,13 @@ async def model_data() -> RasaModelData: np.random.rand(1, 14), np.random.rand(3, 14), ], - [np.random.rand(5, 14), np.random.rand(2, 14),], + [np.random.rand(5, 14), np.random.rand(2, 14)], [ np.random.rand(5, 14), np.random.rand(1, 14), np.random.rand(3, 14), ], - [np.random.rand(3, 14),], + [np.random.rand(3, 14)], [ np.random.rand(3, 14), np.random.rand(1, 14), @@ -383,12 +383,12 @@ def test_get_num_of_features(model_data: RasaModelData): np.random.rand(7, 10), ] ), - np.array([np.random.rand(2, 10),]), + np.array([np.random.rand(2, 10)]), ] ), number_of_dimensions=4, ), - (3, 4, 7, 10), + (8, 7, 10), ), ], ) @@ -466,14 +466,14 @@ def test_pad_dense_data(incoming_data: FeatureArray, expected_shape: np.ndarray) [ scipy.sparse.csr_matrix( np.random.randint(10, size=(2, 10)) - ), + ) ] ), ] ), number_of_dimensions=4, ), - (3, 4, 7, 10), + (8, 7, 10), ), ], ) From b5d479b7edb2342b5771cb1ca98b34ee41e806f5 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 23 Oct 2020 14:07:00 +0200 Subject: [PATCH 10/62] mask is 4d now --- rasa/utils/tensorflow/model_data_utils.py | 24 ++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index 09b54fc3af03..0459cee91b8d 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -300,9 +300,18 @@ def _features_for_attribute( np.array([v[0] for v in values]), number_of_dimensions=3 ) - attribute_to_feature_arrays = { - MASK: [FeatureArray(np.array(attribute_masks), number_of_dimensions=3)] - } + if consider_dialogue_dimension: + attribute_to_feature_arrays = { + MASK: [FeatureArray(np.array(attribute_masks), number_of_dimensions=4)] + } + else: + attribute_to_feature_arrays = { + MASK: [ + FeatureArray( + np.array(np.squeeze(attribute_masks, -1)), number_of_dimensions=3 + ) + ] + } feature_types = set() feature_types.update(list(dense_features.keys())) @@ -354,7 +363,7 @@ def _extract_features( # create a mask for every state # to capture which turn has which input attribute_mask = np.expand_dims( - np.ones(len(list_of_list_of_features), np.float32), -1 + np.expand_dims(np.ones(len(list_of_list_of_features), np.float32), -1), -1 ) for i, list_of_features in enumerate(list_of_list_of_features): @@ -365,9 +374,10 @@ def _extract_features( list_of_features = zero_features for features in list_of_features: - # in case of ENTITIES, if the attribute type matches either 'entity', 'role', or 'group' the - # features correspond to the tag ids of that entity type - # in order to distinguish later on between the different tag ids, we use the entity type as key + # in case of ENTITIES, if the attribute type matches either 'entity', + # 'role', or 'group' the features correspond to the tag ids of that + # entity type in order to distinguish later on between the different + # tag ids, we use the entity type as key if attribute == ENTITIES and features.attribute in [ ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_GROUP, From df2ccc3bbfc88cb08a484713d95d90603f987c36 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 23 Oct 2020 14:20:42 +0200 Subject: [PATCH 11/62] bring mask in correct shape before transformer --- rasa/core/policies/ted_policy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 1f75792ec29a..46d967e4153a 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -297,10 +297,6 @@ def _create_model_data( """ model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY) - # TODO: - # pad_data should convert 4D to 3D (sum up batch and dialogue dimension) - # inside batch_loss after the transformer convert 3D back to 4D - if label_ids is not None and encoded_all_labels is not None: label_ids = np.array( @@ -733,12 +729,16 @@ def _emebed_dialogue( """Create dialogue level embedding and mask.""" mask = self._compute_mask(sequence_lengths) + # remove the additional dimensions that were added due to 4D shape + mask = tf.squeeze(tf.squeeze(mask, axis=-1), axis=-1) dialogue_transformed = self._tf_layers[f"transformer.{DIALOGUE}"]( dialogue_in, 1 - mask, self._training ) dialogue_transformed = tfa.activations.gelu(dialogue_transformed) + # TODO transform back to original 4D shape + if self.max_history_tracker_featurizer_used: # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( From 9813014d9a45c61a77d812a40a8d51e1eb633057 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 23 Oct 2020 15:35:55 +0200 Subject: [PATCH 12/62] keep also the orginial dialogue length --- rasa/core/policies/ted_policy.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 46d967e4153a..68b8f164c457 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -321,6 +321,14 @@ def _create_model_data( model_data.add_lengths( DIALOGUE, LENGTH, next(iter(list(attribute_data.keys()))), MASK ) + model_data.data[DIALOGUE][f"3D_{LENGTH}"] = [ + FeatureArray( + np.array( + [np.squeeze(f, -1) for f in model_data.data[DIALOGUE][LENGTH][0]] + ), + number_of_dimensions=3, + ) + ] model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE) return model_data @@ -724,7 +732,10 @@ def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: return all_label_ids, all_labels_embed def _emebed_dialogue( - self, dialogue_in: tf.Tensor, sequence_lengths: tf.Tensor + self, + dialogue_in: tf.Tensor, + sequence_lengths: tf.Tensor, + dialogue_3d_lengths: tf.Tensor, ) -> Tuple[tf.Tensor, tf.Tensor]: """Create dialogue level embedding and mask.""" @@ -738,6 +749,17 @@ def _emebed_dialogue( dialogue_transformed = tfa.activations.gelu(dialogue_transformed) # TODO transform back to original 4D shape + output = tf.zeros( + ( + dialogue_3d_lengths.shape[0], + dialogue_3d_lengths.shape[1], + dialogue_transformed.shape[1], + dialogue_transformed.shape[2], + ) + ) + + # output shape 32, 29, 1, 128 + # dialogue_transformed shape 647, 1, 128 if self.max_history_tracker_featurizer_used: # pick last vector if max history featurizer is used @@ -854,6 +876,9 @@ def batch_loss( print(f" {t.shape}") dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + dialogue_3d_lengths = tf.cast( + tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32 + ) all_label_ids, all_labels_embed = self._create_all_labels_embed() @@ -862,7 +887,7 @@ def batch_loss( dialogue_in = self._process_batch_data(tf_batch_data) dialogue_embed, dialogue_mask = self._emebed_dialogue( - dialogue_in, dialogue_lengths + dialogue_in, dialogue_lengths, dialogue_3d_lengths ) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) From 71c527f927239fa35233b1357a10de5e845b6efb Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 23 Oct 2020 17:25:56 +0200 Subject: [PATCH 13/62] update doc strings --- rasa/core/policies/ted_policy.py | 39 ++++++++++++-------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 68b8f164c457..f1db91017424 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -749,17 +749,6 @@ def _emebed_dialogue( dialogue_transformed = tfa.activations.gelu(dialogue_transformed) # TODO transform back to original 4D shape - output = tf.zeros( - ( - dialogue_3d_lengths.shape[0], - dialogue_3d_lengths.shape[1], - dialogue_transformed.shape[1], - dialogue_transformed.shape[2], - ) - ) - - # output shape 32, 29, 1, 128 - # dialogue_transformed shape 647, 1, 128 if self.max_history_tracker_featurizer_used: # pick last vector if max history featurizer is used @@ -775,11 +764,13 @@ def _emebed_dialogue( def _encode_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text ) -> Optional[tf.Tensor]: - """ - Encodes features for a given attribute + """Encodes features for a given attribute + Args: tf_batch_data: dictionary mapping every attribute to its features and masks - attribute: the attribute we will encode features for (e.g., ACTION_NAME, INTENT) + attribute: the attribute we will encode features for + (e.g., ACTION_NAME, INTENT) + Returns: A tensor combining all features for `attribute` """ @@ -805,9 +796,12 @@ def _encode_features_per_attribute( def _process_batch_data( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] ) -> tf.Tensor: - """Encodes batch data; combines intent and text and action name and action text if both are present + """Encodes batch data; combines intent and text and action name and action + text if both are present. + Args: tf_batch_data: dictionary mapping every attribute to its features and masks + Returns: Tensor: encoding of all features in the batch, combined; """ @@ -817,7 +811,8 @@ def _process_batch_data( for key in tf_batch_data.keys() if LABEL_KEY not in key and DIALOGUE not in key } - # if both action text and action name are present, combine them; otherwise, return the one which is present + # if both action text and action name are present, combine them; otherwise, + # return the one which is present if ( batch_encoded.get(ACTION_TEXT) is not None @@ -868,13 +863,6 @@ def batch_loss( ) -> tf.Tensor: tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) - for key, values in tf_batch_data.items(): - print(key) - for sub_key, tensors in values.items(): - print(f" {sub_key}") - for t in tensors: - print(f" {t.shape}") - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) dialogue_3d_lengths = tf.cast( tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32 @@ -913,13 +901,16 @@ def batch_predict( ) dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + dialogue_3d_lengths = tf.cast( + tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32 + ) if self.all_labels_embed is None: _, self.all_labels_embed = self._create_all_labels_embed() dialogue_in = self._process_batch_data(tf_batch_data) dialogue_embed, dialogue_mask = self._emebed_dialogue( - dialogue_in, dialogue_lengths + dialogue_in, dialogue_lengths, dialogue_3d_lengths ) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) From f4c119a33ca94ee728e6c95fd1e2e4b22f90bdc4 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Mon, 26 Oct 2020 16:39:59 +0100 Subject: [PATCH 14/62] use tf.scatter_nd to tranform 3d back to 4d --- rasa/core/policies/ted_policy.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index f1db91017424..dda229102ec2 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -748,14 +748,31 @@ def _emebed_dialogue( ) dialogue_transformed = tfa.activations.gelu(dialogue_transformed) - # TODO transform back to original 4D shape - if self.max_history_tracker_featurizer_used: # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( - self._last_token(dialogue_transformed, sequence_lengths), 1 + self._last_token(dialogue_transformed, tf.squeeze(sequence_lengths)), 1 + ) + mask = tf.expand_dims( + self._last_token(mask, tf.squeeze(sequence_lengths)), 1 ) - mask = tf.expand_dims(self._last_token(mask, sequence_lengths), 1) + + # transform dialogue tensor back to original 4D shape + indices = [] + for batch_dim in range(dialogue_3d_lengths.shape[0]): + for dialogue_dim in range(dialogue_3d_lengths.shape[1]): + if dialogue_3d_lengths[batch_dim][dialogue_dim] > 0: + indices.append([batch_dim, dialogue_dim]) + indices = tf.constant(indices) + shape = tf.constant( + [ + dialogue_3d_lengths.shape[0], + dialogue_3d_lengths.shape[1], + dialogue_transformed.shape[1], + dialogue_transformed.shape[2], + ] + ) + dialogue_transformed = tf.scatter_nd(indices, dialogue_transformed, shape) dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed) From 886ab01110e0c668caf8b2b003e91197fd1aecad Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 27 Oct 2020 13:54:29 +0100 Subject: [PATCH 15/62] move tensor transformation to _encode_features_per_attribute --- rasa/core/policies/ted_policy.py | 109 ++++++++++--------------------- 1 file changed, 34 insertions(+), 75 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 7828d4e67274..d66d450d2a36 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -762,10 +762,7 @@ def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: return all_label_ids, all_labels_embed def _emebed_dialogue( - self, - dialogue_in: tf.Tensor, - sequence_lengths: tf.Tensor, - dialogue_3d_lengths: tf.Tensor, + self, dialogue_in: tf.Tensor, sequence_lengths: tf.Tensor ) -> Tuple[tf.Tensor, tf.Tensor]: """Create dialogue level embedding and mask.""" @@ -781,28 +778,9 @@ def _emebed_dialogue( if self.max_history_tracker_featurizer_used: # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( - self._last_token(dialogue_transformed, tf.squeeze(sequence_lengths)), 1 - ) - mask = tf.expand_dims( - self._last_token(mask, tf.squeeze(sequence_lengths)), 1 + self._last_token(dialogue_transformed, sequence_lengths), 1 ) - - # transform dialogue tensor back to original 4D shape - indices = [] - for batch_dim in range(dialogue_3d_lengths.shape[0]): - for dialogue_dim in range(dialogue_3d_lengths.shape[1]): - if dialogue_3d_lengths[batch_dim][dialogue_dim] > 0: - indices.append([batch_dim, dialogue_dim]) - indices = tf.constant(indices) - shape = tf.constant( - [ - dialogue_3d_lengths.shape[0], - dialogue_3d_lengths.shape[1], - dialogue_transformed.shape[1], - dialogue_transformed.shape[2], - ] - ) - dialogue_transformed = tf.scatter_nd(indices, dialogue_transformed, shape) + mask = tf.expand_dims(self._last_token(mask, sequence_lengths), 1) dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed) @@ -825,55 +803,16 @@ def _encode_features_per_attribute( attribute_mask = tf_batch_data[attribute][MASK][0] if attribute in SEQUENCE_FEATURES_TO_ENCODE: - sequence_shape = [ - [tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2], x.shape[-1]] - for x in tf_batch_data[attribute][SEQUENCE] - ] - sentence_shape = [ - [tf.shape(x)[0], tf.shape(x)[1], 1, x.shape[-1]] - for x in tf_batch_data[attribute][SENTENCE] - ] - - sequence = [ - tf.sparse.reshape(x, (-1, shape[2], shape[-1])) - if isinstance(x, tf.SparseTensor) - else tf.reshape(x, (-1, shape[2], shape[-1])) - for x, shape in zip(tf_batch_data[attribute][SEQUENCE], sequence_shape) - ] - sentence = [ - tf.sparse.reshape(x, (-1, shape[2], shape[-1])) - if isinstance(x, tf.SparseTensor) - else tf.reshape(x, (-1, shape[2], shape[-1])) - for x, shape in zip(tf_batch_data[attribute][SENTENCE], sentence_shape) - ] - sequence = [ - tf.SparseTensor( - x.indices, x.values, (tf.shape(x)[0], tf.shape(x)[1], shape[-1]) - ) - if isinstance(x, tf.SparseTensor) - else x - for x, shape in zip(sequence, sequence_shape) - ] - sentence = [ - tf.SparseTensor( - x.indices, x.values, (tf.shape(x)[0], tf.shape(x)[1], shape[-1]) - ) - if isinstance(x, tf.SparseTensor) - else x - for x, shape in zip(sentence, sentence_shape) - ] - _sequence_lengths = tf.cast( tf_batch_data[attribute][SEQUENCE_LENGTH][0], dtype=tf.int32 ) - _sequence_lengths = tf.reshape(_sequence_lengths, (-1,)) - mask_sequence_text = self._compute_mask(_sequence_lengths) + mask_sequence_text = self._compute_mask(tf.squeeze(_sequence_lengths)) sequence_lengths = _sequence_lengths + 1 - mask_text = self._compute_mask(sequence_lengths) + mask_text = self._compute_mask(tf.squeeze(sequence_lengths)) attribute_features, _, _, _ = self._create_sequence( - sequence, - sentence, + tf_batch_data[attribute][SEQUENCE], + tf_batch_data[attribute][SENTENCE], mask_sequence_text, mask_text, attribute, @@ -883,9 +822,32 @@ def _encode_features_per_attribute( sequence_ids=False, ) # TODO entities - last_token = self._last_token(attribute_features, sequence_lengths) - attribute_features = tf.reshape( - last_token, (sequence_shape[0][0], sequence_shape[0][1], -1) + last_token = self._last_token( + attribute_features, tf.squeeze(sequence_lengths) + ) + + # transform attribute features back to original + # batch x dialogue length x units + indices = [] + dialogue_lengths = tf.cast( + tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32 + ) + for batch_dim in range(dialogue_lengths.shape[0]): + for dialogue_dim in range(dialogue_lengths.shape[1]): + if dialogue_lengths[batch_dim][dialogue_dim] > 0: + indices.append([batch_dim, dialogue_dim]) + indices = tf.constant(indices) + shape = tf.constant( + [ + dialogue_lengths.shape[0], + dialogue_lengths.shape[1], + last_token.shape[-1], + ] + ) + attribute_features = tf.scatter_nd(indices, last_token, shape) + + attribute_mask = tf.expand_dims( + tf.squeeze(self._compute_mask(tf.squeeze(dialogue_lengths))), axis=-1 ) else: @@ -979,9 +941,6 @@ def batch_loss( print(" ", __v.shape) # exit() dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - dialogue_3d_lengths = tf.cast( - tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32 - ) all_label_ids, all_labels_embed = self._create_all_labels_embed() @@ -990,7 +949,7 @@ def batch_loss( dialogue_in = self._process_batch_data(tf_batch_data) dialogue_embed, dialogue_mask = self._emebed_dialogue( - dialogue_in, dialogue_lengths, dialogue_3d_lengths + dialogue_in, dialogue_lengths ) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) From 098e441360f8b80e41b375569a32db245ec41dbc Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 27 Oct 2020 14:26:53 +0100 Subject: [PATCH 16/62] fix issues in _encode_features_per_attribute --- rasa/core/policies/ted_policy.py | 41 +++++++++++++++++++------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index d66d450d2a36..fe81e59862fc 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -780,7 +780,9 @@ def _emebed_dialogue( dialogue_transformed = tf.expand_dims( self._last_token(dialogue_transformed, sequence_lengths), 1 ) - mask = tf.expand_dims(self._last_token(mask, sequence_lengths), 1) + mask = tf.expand_dims( + self._last_token(mask, tf.squeeze(sequence_lengths)), 1 + ) dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed) @@ -821,11 +823,26 @@ def _encode_features_per_attribute( masked_lm_loss=self.config[MASKED_LM], sequence_ids=False, ) + # TODO entities - last_token = self._last_token( + + attribute_features = self._last_token( attribute_features, tf.squeeze(sequence_lengths) ) + else: + attribute_features = self._combine_sparse_dense_features( + tf_batch_data[attribute][SENTENCE], + f"{attribute}_{SENTENCE}", + mask=attribute_mask, + ) + + if attribute in FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: + attribute_features = self._tf_layers[f"ffnn.{attribute}"]( + attribute_features + ) + + if attribute in FEATURES_TO_ENCODE: # transform attribute features back to original # batch x dialogue length x units indices = [] @@ -841,27 +858,17 @@ def _encode_features_per_attribute( [ dialogue_lengths.shape[0], dialogue_lengths.shape[1], - last_token.shape[-1], + attribute_features.shape[-1], ] ) - attribute_features = tf.scatter_nd(indices, last_token, shape) + attribute_features = tf.scatter_nd( + indices, tf.squeeze(attribute_features), shape + ) attribute_mask = tf.expand_dims( tf.squeeze(self._compute_mask(tf.squeeze(dialogue_lengths))), axis=-1 ) - else: - attribute_features = self._combine_sparse_dense_features( - tf_batch_data[attribute][SENTENCE], - f"{attribute}_{SENTENCE}", - mask=attribute_mask, - ) - - if attribute in FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: - attribute_features = self._tf_layers[f"ffnn.{attribute}"]( - attribute_features - ) - return attribute_features * attribute_mask def _process_batch_data( @@ -940,7 +947,7 @@ def batch_loss( for __v in _v: print(" ", __v.shape) # exit() - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32) all_label_ids, all_labels_embed = self._create_all_labels_embed() From 94e0d8191c50d2683885072f64a5a912f2f22af7 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 27 Oct 2020 14:47:29 +0100 Subject: [PATCH 17/62] use correct dialogue length --- rasa/core/policies/ted_policy.py | 19 +++++++++---------- rasa/nlu/classifiers/diet_classifier.py | 12 ++++++------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index fe81e59862fc..ea019c5dbf24 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -776,13 +776,12 @@ def _emebed_dialogue( dialogue_transformed = tfa.activations.gelu(dialogue_transformed) if self.max_history_tracker_featurizer_used: + dialogue_lengths = tf.squeeze(tf.reduce_sum(sequence_lengths, axis=1)) # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( - self._last_token(dialogue_transformed, sequence_lengths), 1 - ) - mask = tf.expand_dims( - self._last_token(mask, tf.squeeze(sequence_lengths)), 1 + self._last_token(dialogue_transformed, dialogue_lengths), 1 ) + mask = tf.expand_dims(self._last_token(mask, dialogue_lengths), 1) dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed) @@ -940,12 +939,12 @@ def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) - for k, v in tf_batch_data.items(): - print(k) - for _k, _v in v.items(): - print(" ", _k) - for __v in _v: - print(" ", __v.shape) + # for k, v in tf_batch_data.items(): + # print(k) + # for _k, _v in v.items(): + # print(" ", _k) + # for __v in _v: + # print(" ", __v.shape) # exit() dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index b31717a5ae76..481bce505c38 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -1434,12 +1434,12 @@ def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) - for k, v in tf_batch_data.items(): - print(k) - for _k, _v in v.items(): - print(" ", _k) - for __v in _v: - print(" ", __v.shape) + # for k, v in tf_batch_data.items(): + # print(k) + # for _k, _v in v.items(): + # print(" ", _k) + # for __v in _v: + # print(" ", __v.shape) batch_dim = self._get_batch_dim(tf_batch_data[TEXT]) mask_sequence_text = self._get_mask_for(tf_batch_data, TEXT, SEQUENCE_LENGTH) sequence_lengths = self._get_sequence_lengths( From 032666241994ea1f2cfef7f004e5fac7847e2243 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 27 Oct 2020 15:02:00 +0100 Subject: [PATCH 18/62] add comments --- rasa/core/policies/ted_policy.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index ea019c5dbf24..bb6179955f37 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -825,11 +825,15 @@ def _encode_features_per_attribute( # TODO entities + # resulting attribute features will have shape + # combined batch dimension and dialogue length x sequence length x units attribute_features = self._last_token( attribute_features, tf.squeeze(sequence_lengths) ) else: + # resulting attribute features will have shape + # combined batch dimension and dialogue length x 1 x units attribute_features = self._combine_sparse_dense_features( tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}", @@ -864,6 +868,8 @@ def _encode_features_per_attribute( indices, tf.squeeze(attribute_features), shape ) + # create a attribute mask that has the shape + # batch x dialogue length attribute_mask = tf.expand_dims( tf.squeeze(self._compute_mask(tf.squeeze(dialogue_lengths))), axis=-1 ) From 03cc881d9b09c31cfa29bc8aa192dd805f389aca Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Wed, 28 Oct 2020 15:07:33 +0100 Subject: [PATCH 19/62] clean up --- rasa/core/policies/ted_policy.py | 80 ++++++++++++++----------- rasa/nlu/classifiers/diet_classifier.py | 7 +-- rasa/utils/tensorflow/model_data.py | 8 +-- rasa/utils/tensorflow/models.py | 4 +- 4 files changed, 51 insertions(+), 48 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index bb6179955f37..1792ab30966f 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -95,7 +95,7 @@ LABEL_SUB_KEY = "ids" LENGTH = "length" POSSIBLE_FEATURE_TYPES = [SEQUENCE, SENTENCE] -FEATURES_TO_ENCODE = [INTENT, TEXT, ACTION_NAME, ACTION_TEXT] +SENTENCE_FEATURES_TO_ENCODE = [INTENT, TEXT, ACTION_NAME, ACTION_TEXT] SEQUENCE_FEATURES_TO_ENCODE = [TEXT, ACTION_TEXT] LABEL_FEATURES_TO_ENCODE = [f"{LABEL}_{ACTION_NAME}", f"{LABEL}_{ACTION_TEXT}"] STATE_LEVEL_FEATURES = [ENTITIES, SLOTS, ACTIVE_LOOP] @@ -350,6 +350,10 @@ def _create_model_data( ) model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE) model_data.add_lengths(ACTION_TEXT, SEQUENCE_LENGTH, ACTION_TEXT, SEQUENCE) + # Add the dialogue in 3D, e.g. batch-size x dialogue-length x 1 to have + # the actual dialogue length inside the model + # (the 4D dialogue length will be converted into + # combined batch size and dialogue length x sequence length x 1) model_data.data[DIALOGUE][f"3D_{LENGTH}"] = [ FeatureArray( np.array( @@ -590,7 +594,7 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": for feature_name, features in model_data_example.items() if feature_name # we need to remove label features for prediction if they are present - in STATE_LEVEL_FEATURES + FEATURES_TO_ENCODE + [DIALOGUE] + in STATE_LEVEL_FEATURES + SENTENCE_FEATURES_TO_ENCODE + [DIALOGUE] }, ) model.build_for_predict(predict_data_example) @@ -623,7 +627,8 @@ def __init__( self.predict_data_signature = { feature_name: features for feature_name, features in data_signature.items() - if feature_name in STATE_LEVEL_FEATURES + FEATURES_TO_ENCODE + [DIALOGUE] + if feature_name + in STATE_LEVEL_FEATURES + SENTENCE_FEATURES_TO_ENCODE + [DIALOGUE] } # optimizer @@ -714,10 +719,13 @@ def _prepare_encoding_layers(self, name: Text) -> None: """ feature_type = SENTENCE # create encoding layers only for the features which should be encoded; - if name not in FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: + if name not in SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: return # check that there are SENTENCE features for the attribute name in data - if name in FEATURES_TO_ENCODE and feature_type not in self.data_signature[name]: + if ( + name in SENTENCE_FEATURES_TO_ENCODE + and feature_type not in self.data_signature[name] + ): return # same for label_data if ( @@ -776,6 +784,7 @@ def _emebed_dialogue( dialogue_transformed = tfa.activations.gelu(dialogue_transformed) if self.max_history_tracker_featurizer_used: + # get the actual dialogue length in a 1D tensor dialogue_lengths = tf.squeeze(tf.reduce_sum(sequence_lengths, axis=1)) # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( @@ -840,32 +849,18 @@ def _encode_features_per_attribute( mask=attribute_mask, ) - if attribute in FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: + if attribute in SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: attribute_features = self._tf_layers[f"ffnn.{attribute}"]( attribute_features ) - if attribute in FEATURES_TO_ENCODE: - # transform attribute features back to original - # batch x dialogue length x units - indices = [] + if attribute in SENTENCE_FEATURES_TO_ENCODE: dialogue_lengths = tf.cast( tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32 ) - for batch_dim in range(dialogue_lengths.shape[0]): - for dialogue_dim in range(dialogue_lengths.shape[1]): - if dialogue_lengths[batch_dim][dialogue_dim] > 0: - indices.append([batch_dim, dialogue_dim]) - indices = tf.constant(indices) - shape = tf.constant( - [ - dialogue_lengths.shape[0], - dialogue_lengths.shape[1], - attribute_features.shape[-1], - ] - ) - attribute_features = tf.scatter_nd( - indices, tf.squeeze(attribute_features), shape + + attribute_features = self._convert_to_original_shape( + attribute_features, dialogue_lengths ) # create a attribute mask that has the shape @@ -876,6 +871,29 @@ def _encode_features_per_attribute( return attribute_features * attribute_mask + @staticmethod + def _convert_to_original_shape( + attribute_features: tf.Tensor, dialogue_lengths: tf.Tensor + ) -> tf.Tensor: + # transform attribute features back to original shape: + # batch x dialogue length x units + indices = [] + for batch_dim in range(dialogue_lengths.shape[0]): + for dialogue_dim in range(dialogue_lengths.shape[1]): + if dialogue_lengths[batch_dim][dialogue_dim] > 0: + indices.append([batch_dim, dialogue_dim]) + indices = tf.constant(indices) + + shape = tf.constant( + [ + dialogue_lengths.shape[0], + dialogue_lengths.shape[1], + attribute_features.shape[-1], + ] + ) + + return tf.scatter_nd(indices, tf.squeeze(attribute_features), shape) + def _process_batch_data( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] ) -> tf.Tensor: @@ -945,13 +963,6 @@ def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) - # for k, v in tf_batch_data.items(): - # print(k) - # for _k, _v in v.items(): - # print(" ", _k) - # for __v in _v: - # print(" ", __v.shape) - # exit() dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32) all_label_ids, all_labels_embed = self._create_all_labels_embed() @@ -986,17 +997,14 @@ def batch_predict( batch_in, self.predict_data_signature ) - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - dialogue_3d_lengths = tf.cast( - tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32 - ) + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32) if self.all_labels_embed is None: _, self.all_labels_embed = self._create_all_labels_embed() dialogue_in = self._process_batch_data(tf_batch_data) dialogue_embed, dialogue_mask = self._emebed_dialogue( - dialogue_in, dialogue_lengths, dialogue_3d_lengths + dialogue_in, dialogue_lengths ) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 481bce505c38..22197e83943e 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -1434,12 +1434,7 @@ def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) - # for k, v in tf_batch_data.items(): - # print(k) - # for _k, _v in v.items(): - # print(" ", _k) - # for __v in _v: - # print(" ", __v.shape) + batch_dim = self._get_batch_dim(tf_batch_data[TEXT]) mask_sequence_text = self._get_mask_for(tf_batch_data, TEXT, SEQUENCE_LENGTH) sequence_lengths = self._get_sequence_lengths( diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index a72abb3831dc..dd6f11db3dfa 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -1077,8 +1077,8 @@ def _pad_4d_dense_data(array_of_array_of_dense: FeatureArray) -> np.ndarray: # so that the resulting tensor is 3D # the shape is (sum of dialogue history length for all tensors in the # batch x max sequence length x number of features) - # the original shape is passed on the model via the data signature, the - # original shape can be used to transform the 3D tensor back into 4D + # the original shape and the original dialogue length is passed on to the model + # it can be used to transform the 3D tensor back into 4D sum_dialogue_len = sum( len(array_of_dense) for array_of_dense in array_of_array_of_dense @@ -1154,8 +1154,8 @@ def _4d_scipy_matrix_to_values( # so that the resulting tensor is 3D # the shape is (sum of dialogue history length for all tensors in the # batch x max sequence length x number of features) - # the original shape is passed on the model via the data signature, the - # original shape can be used to transform the 3D tensor back into 4D + # the original shape and the original dialogue length is passed on to the model + # it can be used to transform the 3D tensor back into 4D # we need to make sure that the matrices are coo_matrices otherwise the # transformation does not work (e.g. you cannot access x.row, x.col) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 0b49b739a453..0938b8710c2f 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -160,7 +160,7 @@ def fit( batch_strategy: Text, silent: bool = False, loading: bool = False, - eager: bool = True, + eager: bool = False, ) -> None: """Fit model data""" @@ -292,7 +292,7 @@ def train_on_batch( self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) def build_for_predict( - self, predict_data: RasaModelData, eager: bool = True + self, predict_data: RasaModelData, eager: bool = False ) -> None: self._training = False # needed for tf graph mode self._predict_function = self._get_tf_call_model_function( From 2cb13f52e32cbb23d3ff65e060e2c3f218934bb0 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Wed, 28 Oct 2020 15:26:33 +0100 Subject: [PATCH 20/62] update constants --- rasa/core/policies/ted_policy.py | 19 ++++++++++++------- rasa/utils/tensorflow/constants.py | 1 - 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index bf01c050af30..e61898782d61 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -17,7 +17,15 @@ MaxHistoryTrackerFeaturizer, ) from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer -from rasa.shared.nlu.constants import ACTION_TEXT, ACTION_NAME, INTENT, TEXT, ENTITIES +from rasa.shared.nlu.constants import ( + ACTION_TEXT, + ACTION_NAME, + INTENT, + TEXT, + ENTITIES, + VALID_FEATURE_TYPES, + FEATURE_TYPE_SENTENCE, +) from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter from rasa.core.policies.policy import Policy from rasa.core.constants import DEFAULT_POLICY_PRIORITY, DIALOGUE @@ -71,7 +79,6 @@ ENCODING_DIMENSION, UNIDIRECTIONAL_ENCODER, SEQUENCE, - SEQUENCE_LENGTH, SENTENCE, SEQUENCE_LENGTH, DENSE_DIMENSION, @@ -95,7 +102,6 @@ LABEL_KEY = LABEL LABEL_SUB_KEY = "ids" LENGTH = "length" -POSSIBLE_FEATURE_TYPES = [SEQUENCE, SENTENCE] SENTENCE_FEATURES_TO_ENCODE = [INTENT, TEXT, ACTION_NAME, ACTION_TEXT] SEQUENCE_FEATURES_TO_ENCODE = [TEXT, ACTION_TEXT] LABEL_FEATURES_TO_ENCODE = [f"{LABEL}_{ACTION_NAME}", f"{LABEL}_{ACTION_TEXT}"] @@ -702,7 +708,7 @@ def _prepare_sparse_dense_layer_for( name: the attribute name signature: data signature """ - for feature_type in POSSIBLE_FEATURE_TYPES: + for feature_type in VALID_FEATURE_TYPES: if name not in signature or feature_type not in signature[name]: # features for feature type are not present continue @@ -725,20 +731,19 @@ def _prepare_encoding_layers(self, name: Text) -> None: Args: name: attribute name """ - feature_type = SENTENCE # create encoding layers only for the features which should be encoded; if name not in SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: return # check that there are SENTENCE features for the attribute name in data if ( name in SENTENCE_FEATURES_TO_ENCODE - and feature_type not in self.data_signature[name] + and FEATURE_TYPE_SENTENCE not in self.data_signature[name] ): return # same for label_data if ( name in LABEL_FEATURES_TO_ENCODE - and feature_type not in self.label_signature[name] + and FEATURE_TYPE_SENTENCE not in self.label_signature[name] ): return diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 80497fa261d5..06f81775a673 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -69,7 +69,6 @@ SEQUENCE = "sequence" SEQUENCE_LENGTH = f"{SEQUENCE}_lengths" SENTENCE = "sentence" -SEQUENCE_LENGTH = f"{SEQUENCE}_lengths" POOLING = "pooling" MAX_POOLING = "max" From 4d2b5a18f92c5c4f3d54e9d38d4926a02ff6aa43 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Wed, 28 Oct 2020 16:34:51 +0100 Subject: [PATCH 21/62] review comment --- rasa/core/featurizers/single_state_featurizer.py | 2 +- rasa/core/policies/ted_policy.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 6831bca01efa..87779199ff4a 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -87,7 +87,7 @@ def _create_features( features = np.zeros(len(self._default_feature_states[attribute]), np.float32) for state_feature, value in state_features.items(): - # check that the value is in default_feature_states to be able to assigh + # check that the value is in default_feature_states to be able to assign # its value if state_feature in self._default_feature_states[attribute]: features[self._default_feature_states[attribute][state_feature]] = value diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index e61898782d61..1869abfc7a42 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -862,12 +862,16 @@ def _encode_features_per_attribute( mask=attribute_mask, ) - if attribute in SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: + if attribute in set( + SENTENCE_FEATURES_TO_ENCODE + + SEQUENCE_FEATURES_TO_ENCODE + + LABEL_FEATURES_TO_ENCODE + ): attribute_features = self._tf_layers[f"ffnn.{attribute}"]( attribute_features ) - if attribute in SENTENCE_FEATURES_TO_ENCODE: + if attribute in set(SENTENCE_FEATURES_TO_ENCODE + SEQUENCE_FEATURES_TO_ENCODE): dialogue_lengths = tf.cast( tf_batch_data[DIALOGUE][f"3D_{LENGTH}"][0], tf.int32 ) From 6525b8e7b4c6db7d01c5a4b4e8c4ee0d58476f8e Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Wed, 28 Oct 2020 17:33:27 +0100 Subject: [PATCH 22/62] keep entity dict --- .../featurizers/single_state_featurizer.py | 1 + rasa/shared/core/domain.py | 10 +++++++-- rasa/shared/core/events.py | 2 +- rasa/shared/core/trackers.py | 22 +++++++++++-------- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 87779199ff4a..ce12467e2682 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -207,6 +207,7 @@ def encode_state( self._extract_state_features(sub_state, interpreter, sparse=True) ) if sub_state.get(ENTITIES): + # TODO entities is a frozenset state_features[ENTITIES] = self._create_features( sub_state, ENTITIES, sparse=True ) diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py index b14a046dda54..cb6a61367756 100644 --- a/rasa/shared/core/domain.py +++ b/rasa/shared/core/domain.py @@ -676,7 +676,9 @@ def input_states(self) -> List[Text]: + self.form_names ) - def _get_featurized_entities(self, latest_message: UserUttered) -> Set[Text]: + def _get_featurized_entities( + self, latest_message: UserUttered + ) -> List[Dict[Text, Any]]: intent_name = latest_message.intent.get( rasa.shared.nlu.constants.INTENT_NAME_KEY ) @@ -688,7 +690,11 @@ def _get_featurized_entities(self, latest_message: UserUttered) -> Set[Text]: wanted_entities = set(intent_config.get(USED_ENTITIES_KEY, entity_names)) - return entity_names.intersection(wanted_entities) + return [ + entity + for entity in latest_message.entities + if entity["entity"] in wanted_entities + ] def _get_user_sub_state( self, tracker: "DialogueStateTracker" diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py index fdc2b4fbd690..5ad058bcf124 100644 --- a/rasa/shared/core/events.py +++ b/rasa/shared/core/events.py @@ -355,7 +355,7 @@ def as_sub_state(self) -> Dict[Text, Union[None, Text, List[Optional[Text]]]]: if self.intent_name and not self.use_text_for_featurization: out[INTENT] = self.intent_name if entities: - out[ENTITIES] = entities + out[ENTITIES] = self.entities return out diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py index 296f07be5385..60dbdecc3902 100644 --- a/rasa/shared/core/trackers.py +++ b/rasa/shared/core/trackers.py @@ -229,15 +229,19 @@ def _events_for_verbosity( @staticmethod def freeze_current_state(state: State) -> FrozenState: - frozen_state = frozenset( - { - key: frozenset(values.items()) - if isinstance(values, Dict) - else frozenset(values) - for key, values in state.items() - }.items() - ) - return frozen_state + state_copy = copy.deepcopy(state) + frozen_state = {} + for key, values in state_copy.items(): + if isinstance(values, dict): + if "entities" in values and isinstance(values["entities"][0], dict): + values["entities"] = tuple( + [frozenset(e.items()) for e in values["entities"]] + ) + frozen_state[key] = frozenset(values.items()) + else: + frozen_state[key] = frozenset(values) + + return frozenset(frozen_state.items()) def past_states(self, domain: Domain) -> List[State]: """Generate the past states of this tracker based on the history. From f4aec125a11d80d92c99fd9cc61654e62ddb664d Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Wed, 28 Oct 2020 18:05:20 +0100 Subject: [PATCH 23/62] create tag_ids for TED --- .../featurizers/single_state_featurizer.py | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index ce12467e2682..53fb48211044 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -5,6 +5,7 @@ from collections import defaultdict import rasa.shared.utils.io +from nlu.constants import TOKENS_NAMES from rasa.shared.core.domain import SubState, State, Domain from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter from rasa.shared.core.constants import PREVIOUS_ACTION, ACTIVE_LOOP, USER, SLOTS @@ -16,9 +17,12 @@ ACTION_TEXT, ACTION_NAME, INTENT, + FEATURE_TYPE_SEQUENCE, + TEXT, ) from rasa.shared.nlu.training_data.features import Features from rasa.shared.nlu.training_data.message import Message +from utils.tensorflow.model_data_utils import TAG_ID_ORIGIN logger = logging.getLogger(__name__) @@ -101,6 +105,32 @@ def _create_features( ) return [features] + def _create_entity_tag_features( + self, sub_state: SubState, interpreter: NaturalLanguageInterpreter + ) -> List["Features"]: + from rasa.nlu.test import determine_token_labels + + # TODO what about roles and groups + + parsed_text = interpreter.featurize_message(Message({TEXT: sub_state[TEXT]})) + entities = [dict(entity) for entity in sub_state[ENTITIES]] + + _tags = [] + for token in parsed_text.get(TOKENS_NAMES[TEXT]): + _tag = determine_token_labels(token, entities, attribute_key="entity") + if _tag in self._default_feature_states[ENTITIES]: + # +1 to keep the 0 for the NO ENTITY TAG + _tags.append(self._default_feature_states[ENTITIES][_tag] + 1) + else: + _tags.append(0) + + # transpose to have seq_len x 1 + return [ + Features( + np.array([_tags]).T, FEATURE_TYPE_SEQUENCE, "entity", TAG_ID_ORIGIN + ) + ] + @staticmethod def _to_sparse_sentence_features( sparse_sequence_features: List["Features"], @@ -207,10 +237,9 @@ def encode_state( self._extract_state_features(sub_state, interpreter, sparse=True) ) if sub_state.get(ENTITIES): - # TODO entities is a frozenset state_features[ENTITIES] = self._create_features( sub_state, ENTITIES, sparse=True - ) + ) + self._create_entity_tag_features(sub_state, interpreter) if state_type in {SLOTS, ACTIVE_LOOP}: state_features[state_type] = self._create_features( From 2fd1c5208afca37365998e98e05500d077ce6101 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 3 Nov 2020 15:40:44 +0100 Subject: [PATCH 24/62] clean up after merge --- rasa/core/policies/ted_policy.py | 3 --- rasa/nlu/classifiers/diet_classifier.py | 1 - 2 files changed, 4 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 169586a9306b..3eaa9399bdb5 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -787,7 +787,6 @@ def _emebed_dialogue( tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], ) -> Tuple[tf.Tensor, tf.Tensor]: """Create dialogue level embedding and mask.""" - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) mask = self._compute_mask(dialogue_lengths) @@ -797,8 +796,6 @@ def _emebed_dialogue( dialogue_transformed = tfa.activations.gelu(dialogue_transformed) if self.max_history_tracker_featurizer_used: - # get the actual dialogue length in a 1D tensor - dialogue_lengths = tf.squeeze(tf.reduce_sum(dialogue_lengths, axis=1)) # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( self._last_token(dialogue_transformed, dialogue_lengths), 1 diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index af864175dc13..fe0890c8a66c 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -89,7 +89,6 @@ FEATURIZERS, CHECKPOINT_MODEL, SEQUENCE, - SEQUENCE_LENGTH, SENTENCE, SEQUENCE_LENGTH, DENSE_DIMENSION, From 62d8bab9615372cef7199d73840b00bb2442dbe8 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 09:56:51 +0100 Subject: [PATCH 25/62] add batch_loss_entities (not working) --- rasa/core/policies/ted_policy.py | 85 ++++++++++++++++++++++- rasa/utils/tensorflow/model_data_utils.py | 2 +- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 3eaa9399bdb5..eee732fb4f76 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -27,6 +27,7 @@ ENTITIES, VALID_FEATURE_TYPES, FEATURE_TYPE_SENTENCE, + ENTITY_ATTRIBUTE_TYPE, ) from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter from rasa.core.policies.policy import Policy @@ -656,6 +657,9 @@ def __init__( self._prepare_layers() + self.text_seq_transformer_output: Optional[tf.Tensor] = None + self.dialogue_transformer_output: Optional[tf.Tensor] = None + def _check_data(self) -> None: if not any(key in [INTENT, TEXT] for key in self.data_signature.keys()): raise ValueError( @@ -795,6 +799,8 @@ def _emebed_dialogue( ) dialogue_transformed = tfa.activations.gelu(dialogue_transformed) + self.dialogue_transformer_output = dialogue_transformed + if self.max_history_tracker_featurizer_used: # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( @@ -844,7 +850,8 @@ def _encode_features_per_attribute( sequence_ids=False, ) - # TODO entities + if attribute == TEXT: + self.text_seq_transformer_output = attribute_features # resulting attribute features will have shape # combined batch dimension and dialogue length x 1 x units @@ -890,6 +897,76 @@ def _encode_features_per_attribute( return attribute_features + def _batch_loss_entities( + self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] + ) -> List[tf.Tensor]: + _sequence_lengths = tf.cast( + tf_batch_data[TEXT][SEQUENCE_LENGTH][0], dtype=tf.int32 + ) + _sequence_lengths = tf.squeeze(_sequence_lengths, axis=-1) + sequence_lengths = _sequence_lengths + 1 + mask_text = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) + + sequence_lengths -= 1 # remove sentence features + + entity_tags = None + + if ENTITY_ATTRIBUTE_TYPE not in tf_batch_data.get(ENTITIES, {}): + return [] + + # text_seq_transformer: 1260 x 5 x 128 -> 64 x 28 x 5 x 128 + # dialogue_transformer: 64 x 28 x 128 -> 64 x 28 x 1 x 128 + # tag_ids: 1260 x 5 x 1 -> 64 x 28 x 5 x 1 + # sequence_length: 1260 x 1 -> 64 x 28 x 1 + # mask: 1260 x 5 x 1 -> 64 x 28 x 5 x 1 + + text_transformed = tf.concat( + [self.text_seq_transformer_output, self.dialogue_transformer_output] + ) + + tag_ids = tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0] + # add a zero (no entity) for the sentence features to match the shape of + # inputs + tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) + + loss, f1, _logits = self._calculate_entity_loss( + text_transformed, + tag_ids, + mask_text, + sequence_lengths, + ENTITY_ATTRIBUTE_TYPE, + entity_tags, + ) + + return [loss] + + def _calculate_entity_loss( + self, + inputs: tf.Tensor, + tag_ids: tf.Tensor, + mask: tf.Tensor, + sequence_lengths: tf.Tensor, + tag_name: Text, + entity_tags: Optional[tf.Tensor] = None, + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + + tag_ids = tf.cast(tag_ids[:, :, 0], tf.int32) + + if entity_tags is not None: + _tags = self._tf_layers[f"embed.{tag_name}.tags"](entity_tags) + inputs = tf.concat([inputs, _tags], axis=-1) + + logits = self._tf_layers[f"embed.{tag_name}.logits"](inputs) + + # should call first to build weights + pred_ids, _ = self._tf_layers[f"crf.{tag_name}"](logits, sequence_lengths) + loss = self._tf_layers[f"crf.{tag_name}"].loss( + logits, tag_ids, sequence_lengths + ) + f1 = self._tf_layers[f"crf.{tag_name}"].f1_score(tag_ids, pred_ids, mask) + + return loss, f1, logits + @staticmethod def _convert_to_original_shape( attribute_features: tf.Tensor, @@ -1036,6 +1113,12 @@ def batch_loss( dialogue_mask, ) + if ( + self.dialogue_transformer_output is not None + and self.text_seq_transformer_output is not None + ): + self._batch_loss_entities(tf_batch_data) + self.action_loss.update_state(loss) self.action_acc.update_state(acc) diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index ffdf1c21d67b..0f70f5464c77 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -260,7 +260,7 @@ def convert_to_data_format( num_examples = 1 for _features in attribute_to_features.values(): num_examples = max(num_examples, len(_features)) - dialogue_length = max(dialogue_length, len(_features[0])) + dialogue_length = max(dialogue_length, max(len(f) for f in _features)) empty_features = [[None] * dialogue_length] * num_examples for attribute in attributes: From e50f4eb97d1ae22d7a47bd1e08b19039118a518c Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 12:45:53 +0100 Subject: [PATCH 26/62] concatenate text and dialogue transformer output --- .../featurizers/single_state_featurizer.py | 35 +++- rasa/core/policies/ted_policy.py | 171 ++++++++++++------ rasa/nlu/classifiers/diet_classifier.py | 43 ----- rasa/utils/tensorflow/model_data_utils.py | 4 +- rasa/utils/tensorflow/models.py | 48 ++++- 5 files changed, 192 insertions(+), 109 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 3a653fade973..693134343a69 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -5,7 +5,7 @@ from collections import defaultdict import rasa.shared.utils.io -from nlu.constants import TOKENS_NAMES +from rasa.nlu.constants import TOKENS_NAMES from rasa.shared.core.domain import SubState, State, Domain from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter from rasa.shared.core.constants import PREVIOUS_ACTION, ACTIVE_LOOP, USER, SLOTS @@ -19,10 +19,11 @@ INTENT, FEATURE_TYPE_SEQUENCE, TEXT, + NO_ENTITY_TAG, ) from rasa.shared.nlu.training_data.features import Features from rasa.shared.nlu.training_data.message import Message -from utils.tensorflow.model_data_utils import TAG_ID_ORIGIN +from rasa.utils.tensorflow.model_data_utils import TAG_ID_ORIGIN logger = logging.getLogger(__name__) @@ -102,24 +103,42 @@ def _create_features( ) return [features] + def get_entity_tag_ids(self) -> Dict[Text, int]: + """Returns the tag to index mapping for entities. + + Returns: + Tag to index mapping. + """ + if ENTITIES not in self._default_feature_states: + return {} + + tag_ids = { + tag: idx + 1 # +1 to keep 0 for the NO_ENTITY_TAG + for tag, idx in self._default_feature_states[ENTITIES].items() + } + tag_ids[NO_ENTITY_TAG] = 0 + return tag_ids + def _create_entity_tag_features( self, sub_state: SubState, interpreter: NaturalLanguageInterpreter ) -> List["Features"]: from rasa.nlu.test import determine_token_labels - # TODO what about roles and groups + # TODO + # The entity states used to create the tag-idx-mapping contains the + # entities and the concatenated entity and roles/groups. We do not + # distinguish between entities and roles/groups right now. + # TODO + # Should we support BILOU tagging? parsed_text = interpreter.featurize_message(Message({TEXT: sub_state[TEXT]})) entities = [dict(entity) for entity in sub_state[ENTITIES]] + tag_id_mapping = self.get_entity_tag_ids() _tags = [] for token in parsed_text.get(TOKENS_NAMES[TEXT]): _tag = determine_token_labels(token, entities, attribute_key="entity") - if _tag in self._default_feature_states[ENTITIES]: - # +1 to keep the 0 for the NO ENTITY TAG - _tags.append(self._default_feature_states[ENTITIES][_tag] + 1) - else: - _tags.append(0) + _tags.append(tag_id_mapping[_tag]) # transpose to have seq_len x 1 return [ diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index eee732fb4f76..b1062beb7484 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -12,6 +12,7 @@ from typing import Any, List, Optional, Text, Dict, Tuple, Union, TYPE_CHECKING import rasa.utils.io as io_utils +from nlu.classifiers.diet_classifier import EntityTagSpec from rasa.shared.core.domain import Domain from rasa.core.featurizers.tracker_featurizers import ( TrackerFeaturizer, @@ -267,6 +268,7 @@ def __init__( max_history: Optional[int] = None, model: Optional[RasaModel] = None, zero_state_features: Optional[Dict[Text, List["Features"]]] = None, + entity_tag_specs: Optional[List[EntityTagSpec]] = None, **kwargs: Any, ) -> None: """Declare instance variables with default values.""" @@ -284,6 +286,8 @@ def __init__( self.model = model + self._entity_tag_specs = entity_tag_specs + self.zero_state_features = zero_state_features or defaultdict(list) self._label_data: Optional[RasaModelData] = None @@ -298,6 +302,28 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self.config = train_utils.update_similarity_type(self.config) self.config = train_utils.update_evaluation_parameters(self.config) + def _create_entity_tag_specs(self) -> List[EntityTagSpec]: + """Create entity tag specifications with their respective tag id mappings.""" + + _tag_specs = [] + + # TODO + tag_id_index_mapping = {"O": 0, "emotion": 1} + + if tag_id_index_mapping: + _tag_specs.append( + EntityTagSpec( + tag_name=ENTITY_ATTRIBUTE_TYPE, + tags_to_ids=tag_id_index_mapping, + ids_to_tags={ + value: key for key, value in tag_id_index_mapping.items() + }, + num_tags=len(tag_id_index_mapping), + ) + ) + + return _tag_specs + def _create_label_data( self, domain: Domain, interpreter: NaturalLanguageInterpreter ) -> Tuple[RasaModelData, List[Dict[Text, List["Features"]]]]: @@ -418,6 +444,8 @@ def train( ) return + self._entity_tag_specs = self._create_entity_tag_specs() + # keep one example for persisting and loading self.data_example = model_data.first_data_example() @@ -426,6 +454,7 @@ def train( self.config, isinstance(self.featurizer, MaxHistoryTrackerFeaturizer), self._label_data, + self._entity_tag_specs, ) self.model.fit( @@ -551,6 +580,16 @@ def persist(self, path: Union[Text, Path]) -> None: dict(self._label_data.data), ) + entity_tag_specs = ( + [tag_spec._asdict() for tag_spec in self._entity_tag_specs] + if self._entity_tag_specs + else [] + ) + rasa.shared.utils.io.dump_obj_as_json_to_file( + model_path / f"{SAVE_MODEL_FILE_NAME}.entity_tag_specs.json", + entity_tag_specs, + ) + @classmethod def load(cls, path: Union[Text, Path]) -> "TEDPolicy": """Loads a policy from the storage. @@ -585,6 +624,22 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": priority = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl" ) + entity_tag_specs = rasa.shared.utils.io.read_json_file( + model_path / f"{SAVE_MODEL_FILE_NAME}.entity_tag_specs.json" + ) + entity_tag_specs = [ + EntityTagSpec( + tag_name=tag_spec["tag_name"], + ids_to_tags={ + int(key): value for key, value in tag_spec["ids_to_tags"].items() + }, + tags_to_ids={ + key: int(value) for key, value in tag_spec["tags_to_ids"].items() + }, + num_tags=tag_spec["num_tags"], + ) + for tag_spec in entity_tag_specs + ] model_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data @@ -600,6 +655,7 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": featurizer, MaxHistoryTrackerFeaturizer ), label_data=label_data, + entity_tag_specs=entity_tag_specs, ) # build the graph for prediction @@ -621,6 +677,7 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": priority=priority, model=model, zero_state_features=zero_state_features, + entity_tag_specs=entity_tag_specs, **meta, ) @@ -632,6 +689,7 @@ def __init__( config: Dict[Text, Any], max_history_tracker_featurizer_used: bool, label_data: RasaModelData, + entity_tag_specs: Optional[List[EntityTagSpec]], ) -> None: super().__init__("TED", config, data_signature, label_data) @@ -644,6 +702,8 @@ def __init__( in STATE_LEVEL_FEATURES + SENTENCE_FEATURES_TO_ENCODE + [DIALOGUE] } + self._entity_tag_specs = entity_tag_specs + # optimizer self.optimizer = tf.keras.optimizers.Adam() @@ -699,6 +759,7 @@ def _prepare_layers(self) -> None: self._prepare_embed_layers(LABEL) self._prepare_dot_product_loss(LABEL, self.config[SCALE_LOSS]) + self._prepare_entity_recognition_layers() def _prepare_sparse_dense_layer_for( self, name: Text, signature: Dict[Text, Dict[Text, List[FeatureSignature]]] @@ -900,77 +961,71 @@ def _encode_features_per_attribute( def _batch_loss_entities( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] ) -> List[tf.Tensor]: - _sequence_lengths = tf.cast( - tf_batch_data[TEXT][SEQUENCE_LENGTH][0], dtype=tf.int32 - ) - _sequence_lengths = tf.squeeze(_sequence_lengths, axis=-1) - sequence_lengths = _sequence_lengths + 1 - mask_text = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) - - sequence_lengths -= 1 # remove sentence features - - entity_tags = None - if ENTITY_ATTRIBUTE_TYPE not in tf_batch_data.get(ENTITIES, {}): return [] - # text_seq_transformer: 1260 x 5 x 128 -> 64 x 28 x 5 x 128 - # dialogue_transformer: 64 x 28 x 128 -> 64 x 28 x 1 x 128 - # tag_ids: 1260 x 5 x 1 -> 64 x 28 x 5 x 1 - # sequence_length: 1260 x 1 -> 64 x 28 x 1 - # mask: 1260 x 5 x 1 -> 64 x 28 x 5 x 1 + sequence_lengths = tf.cast( + tf_batch_data[TEXT][SEQUENCE_LENGTH][0], dtype=tf.int32 + ) + sequence_lengths = tf.squeeze(sequence_lengths, axis=-1) + sequence_lengths += 1 # add sentence features + mask = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) + sequence_lengths -= 1 # remove sentence features - text_transformed = tf.concat( - [self.text_seq_transformer_output, self.dialogue_transformer_output] + # convert from (combined batch and dialogue dimension x 1) to + # (batch-dim x dialogue length x 1) + sequence_lengths = tf.squeeze( + self._convert_to_original_shape( + tf.expand_dims(sequence_lengths, axis=-1), tf_batch_data, False + ), + axis=-1, ) + # convert from (combined batch and dialogue dimension x sequence length x 1) to + # (batch-dim x dialogue length x sequence length x 1) + mask = self._convert_to_original_shape(mask, tf_batch_data, False) tag_ids = tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0] # add a zero (no entity) for the sentence features to match the shape of # inputs tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) + # convert from (combined batch and dialogue dimension x sequence length x 1) to + # (batch-dim x dialogue length x sequence length x 1) + tag_ids = self._convert_to_original_shape(tag_ids, tf_batch_data, False) + + # convert from (combined batch and dialogue dimension x sequence length x units) + # to (batch-dim x dialogue length x sequence length x units) + text_seq_transformer_output = self._convert_to_original_shape( + self.text_seq_transformer_output, tf_batch_data, False + ) - loss, f1, _logits = self._calculate_entity_loss( - text_transformed, - tag_ids, - mask_text, - sequence_lengths, - ENTITY_ATTRIBUTE_TYPE, - entity_tags, + # repeat the dialogue transformer output sequence-length-times to get the + # same shape as the text sequence transformer output + dialogue_transformer_output = tf.repeat( + tf.expand_dims(self.dialogue_transformer_output, axis=2), + text_seq_transformer_output.shape[2], + axis=2, + ) + # add the output of the dialogue transformer to the output of the text + # sequence transformer (adding context) + text_transformed = tf.add( + text_seq_transformer_output, dialogue_transformer_output ) - return [loss] + # TODO get last dialogue if max history + # check if this should happen before concat due to performance + # TODO CRF is currently failing, is it not compatible with 4D? - def _calculate_entity_loss( - self, - inputs: tf.Tensor, - tag_ids: tf.Tensor, - mask: tf.Tensor, - sequence_lengths: tf.Tensor, - tag_name: Text, - entity_tags: Optional[tf.Tensor] = None, - ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: - - tag_ids = tf.cast(tag_ids[:, :, 0], tf.int32) - - if entity_tags is not None: - _tags = self._tf_layers[f"embed.{tag_name}.tags"](entity_tags) - inputs = tf.concat([inputs, _tags], axis=-1) - - logits = self._tf_layers[f"embed.{tag_name}.logits"](inputs) - - # should call first to build weights - pred_ids, _ = self._tf_layers[f"crf.{tag_name}"](logits, sequence_lengths) - loss = self._tf_layers[f"crf.{tag_name}"].loss( - logits, tag_ids, sequence_lengths + loss, f1, _logits = self._calculate_entity_loss( + text_transformed, tag_ids, mask, sequence_lengths, ENTITY_ATTRIBUTE_TYPE ) - f1 = self._tf_layers[f"crf.{tag_name}"].f1_score(tag_ids, pred_ids, mask) - return loss, f1, logits + return [loss] @staticmethod def _convert_to_original_shape( attribute_features: tf.Tensor, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + squeeze_sequence_dimension: bool = True, ) -> tf.Tensor: """Transform attribute features back to original shape. @@ -998,6 +1053,7 @@ def _convert_to_original_shape( batch_dim = tf.size(dialogue_lengths) dialogue_dim = tf.reduce_max(dialogue_lengths) + sequence_dim = attribute_features.shape[-2] units = attribute_features.shape[-1] batch_indices = tf.repeat(tf.range(batch_dim), dialogue_lengths) @@ -1010,9 +1066,13 @@ def _convert_to_original_shape( ).values indices = tf.stack([batch_indices, dialogue_indices], axis=1) - shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) + if squeeze_sequence_dimension: + attribute_features = tf.squeeze(attribute_features, axis=1) + shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) + else: + shape = tf.convert_to_tensor([batch_dim, dialogue_dim, sequence_dim, units]) - return tf.scatter_nd(indices, tf.squeeze(attribute_features, axis=1), shape) + return tf.scatter_nd(indices, attribute_features, shape) def _process_batch_data( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] @@ -1104,6 +1164,8 @@ def batch_loss( ) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) + losses = [] + loss, acc = self._tf_layers[f"loss.{LABEL}"]( dialogue_embed, labels_embed, @@ -1112,17 +1174,18 @@ def batch_loss( all_label_ids, dialogue_mask, ) + losses.append(loss) if ( self.dialogue_transformer_output is not None and self.text_seq_transformer_output is not None ): - self._batch_loss_entities(tf_batch_data) + losses.extend(self._batch_loss_entities(tf_batch_data)) self.action_loss.update_state(loss) self.action_acc.update_state(acc) - return loss + return tf.math.add_n(losses) def batch_predict( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index fe0890c8a66c..9cbb4e36a21b 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -1309,22 +1309,6 @@ def _prepare_label_classification_layers(self) -> None: self._prepare_dot_product_loss(LABEL, self.config[SCALE_LOSS]) - def _prepare_entity_recognition_layers(self) -> None: - for tag_spec in self._entity_tag_specs: - name = tag_spec.tag_name - num_tags = tag_spec.num_tags - self._tf_layers[f"embed.{name}.logits"] = layers.Embed( - num_tags, self.config[REGULARIZATION_CONSTANT], f"logits.{name}" - ) - self._tf_layers[f"crf.{name}"] = layers.CRF( - num_tags, self.config[REGULARIZATION_CONSTANT], self.config[SCALE_LOSS] - ) - self._tf_layers[f"embed.{name}.tags"] = layers.Embed( - self.config[EMBEDDING_DIMENSION], - self.config[REGULARIZATION_CONSTANT], - f"tags.{name}", - ) - def _create_bow( self, sequence_features: List[Union[tf.Tensor, tf.SparseTensor]], @@ -1406,33 +1390,6 @@ def _calculate_label_loss( text_embed, label_embed, label_ids, all_labels_embed, all_label_ids ) - def _calculate_entity_loss( - self, - inputs: tf.Tensor, - tag_ids: tf.Tensor, - mask: tf.Tensor, - sequence_lengths: tf.Tensor, - tag_name: Text, - entity_tags: Optional[tf.Tensor] = None, - ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: - - tag_ids = tf.cast(tag_ids[:, :, 0], tf.int32) - - if entity_tags is not None: - _tags = self._tf_layers[f"embed.{tag_name}.tags"](entity_tags) - inputs = tf.concat([inputs, _tags], axis=-1) - - logits = self._tf_layers[f"embed.{tag_name}.logits"](inputs) - - # should call first to build weights - pred_ids, _ = self._tf_layers[f"crf.{tag_name}"](logits, sequence_lengths) - loss = self._tf_layers[f"crf.{tag_name}"].loss( - logits, tag_ids, sequence_lengths - ) - f1 = self._tf_layers[f"crf.{tag_name}"].f1_score(tag_ids, pred_ids, mask) - - return loss, f1, logits - def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index 0f70f5464c77..ad6af067814e 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -295,8 +295,8 @@ def _features_for_attribute( attribute_to_features: features for every example training: boolean indicating whether we are currently in training or not zero_features: zero features - consider_dialogue_dimension: If set to false the dialogue dimension will be removed from the resulting sequence - features. + consider_dialogue_dimension: If set to false the dialogue dimension will be + removed from the resulting sequence features. Returns: A dictionary of feature type to actual features for the given attribute. diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 0b492058db8b..4d446e16e226 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -54,6 +54,7 @@ DENSE_DIMENSION, CONCAT_DIMENSION, DROP_RATE_ATTENTION, + SCALE_LOSS, ) from rasa.utils.tensorflow import layers from rasa.utils.tensorflow.transformer import TransformerEncoder @@ -176,7 +177,7 @@ def fit( batch_strategy: Text, silent: bool = False, loading: bool = False, - eager: bool = False, + eager: bool = True, ) -> None: """Fit model data""" @@ -308,7 +309,7 @@ def train_on_batch( self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) def build_for_predict( - self, predict_data: RasaModelData, eager: bool = False + self, predict_data: RasaModelData, eager: bool = True ) -> None: self._training = False # needed for tf graph mode self._predict_function = self._get_tf_call_model_function( @@ -826,6 +827,22 @@ def _prepare_sequence_layers(self, name: Text) -> None: name, self.config[DROP_RATE], self.config[DROP_RATE_ATTENTION] ) + def _prepare_entity_recognition_layers(self) -> None: + for tag_spec in self._entity_tag_specs: + name = tag_spec.tag_name + num_tags = tag_spec.num_tags + self._tf_layers[f"embed.{name}.logits"] = layers.Embed( + num_tags, self.config[REGULARIZATION_CONSTANT], f"logits.{name}" + ) + self._tf_layers[f"crf.{name}"] = layers.CRF( + num_tags, self.config[REGULARIZATION_CONSTANT], self.config[SCALE_LOSS] + ) + self._tf_layers[f"embed.{name}.tags"] = layers.Embed( + self.config[EMBEDDING_DIMENSION], + self.config[REGULARIZATION_CONSTANT], + f"tags.{name}", + ) + def _combine_sparse_dense_features( self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], @@ -1055,6 +1072,33 @@ def _get_batch_dim(attribute_data: Dict[Text, List[tf.Tensor]]) -> int: return tf.shape(attribute_data[SENTENCE][0])[0] + def _calculate_entity_loss( + self, + inputs: tf.Tensor, + tag_ids: tf.Tensor, + mask: tf.Tensor, + sequence_lengths: tf.Tensor, + tag_name: Text, + entity_tags: Optional[tf.Tensor] = None, + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + + tag_ids = tf.cast(tag_ids[:, :, 0], tf.int32) + + if entity_tags is not None: + _tags = self._tf_layers[f"embed.{tag_name}.tags"](entity_tags) + inputs = tf.concat([inputs, _tags], axis=-1) + + logits = self._tf_layers[f"embed.{tag_name}.logits"](inputs) + + # should call first to build weights + pred_ids, _ = self._tf_layers[f"crf.{tag_name}"](logits, sequence_lengths) + loss = self._tf_layers[f"crf.{tag_name}"].loss( + logits, tag_ids, sequence_lengths + ) + f1 = self._tf_layers[f"crf.{tag_name}"].f1_score(tag_ids, pred_ids, mask) + + return loss, f1, logits + def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: From 2833ef5206f9c345b536f06c8aab675e3dc693db Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 14:22:37 +0100 Subject: [PATCH 27/62] get last dialogue before CRF --- rasa/core/policies/ted_policy.py | 35 ++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index b1062beb7484..e3392dfd362a 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -4,7 +4,6 @@ from collections import defaultdict import numpy as np -from tensorflow import RaggedTensorSpec import rasa.shared.utils.io import tensorflow as tf @@ -12,7 +11,7 @@ from typing import Any, List, Optional, Text, Dict, Tuple, Union, TYPE_CHECKING import rasa.utils.io as io_utils -from nlu.classifiers.diet_classifier import EntityTagSpec +from rasa.nlu.classifiers.diet_classifier import EntityTagSpec from rasa.shared.core.domain import Domain from rasa.core.featurizers.tracker_featurizers import ( TrackerFeaturizer, @@ -308,7 +307,7 @@ def _create_entity_tag_specs(self) -> List[EntityTagSpec]: _tag_specs = [] # TODO - tag_id_index_mapping = {"O": 0, "emotion": 1} + tag_id_index_mapping = {"O": 0, "emotion": 1, "account_number": 2, "item": 3} if tag_id_index_mapping: _tag_specs.append( @@ -1011,9 +1010,33 @@ def _batch_loss_entities( text_seq_transformer_output, dialogue_transformer_output ) - # TODO get last dialogue if max history - # check if this should happen before concat due to performance - # TODO CRF is currently failing, is it not compatible with 4D? + if self.max_history_tracker_featurizer_used: + # get last dialogue turn for every batch example + # resulting shapes are + # text_transformed (batch-dim x sequence length x units) + # mask (batch-dim x sequence length x 1) + # tag_ids (batch-dim x sequence length x 1) + # sequence_lengths (batch-dim) + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + text_transformed = tf.squeeze( + tf.expand_dims(self._last_token(text_transformed, dialogue_lengths), 1), + axis=1, + ) + mask = tf.squeeze( + tf.expand_dims(self._last_token(mask, dialogue_lengths), 1), axis=1 + ) + tag_ids = tf.squeeze( + tf.expand_dims(self._last_token(tag_ids, dialogue_lengths), 1), axis=1 + ) + sequence_lengths = tf.squeeze( + tf.expand_dims(self._last_token(sequence_lengths, dialogue_lengths), 1) + ) + + else: + # TODO + # CRF cannot handle 4D tensors, convert text_transformed back to + # combined batch and dialogue dimenstion x sequence length x untis + return [] loss, f1, _logits = self._calculate_entity_loss( text_transformed, tag_ids, mask, sequence_lengths, ENTITY_ATTRIBUTE_TYPE From ff6f002e24ff6adca38e94322ab50052f06a035b Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 15:15:25 +0100 Subject: [PATCH 28/62] add predicting entities --- rasa/core/policies/ted_policy.py | 105 +++++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 5 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index e3392dfd362a..51177651502c 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -93,6 +93,7 @@ MASK, HIDDEN_LAYERS_SIZES, FEATURIZERS, + ENTITY_RECOGNITION, ) @@ -252,6 +253,10 @@ class TEDPolicy(Policy): # Specify what features to use as sequence and sentence features. # By default all features in the pipeline are used. FEATURIZERS: [], + # If set to true, entities are predicted in user utterances. + # TODO Do not communicate this option to users yet as we have to run some + # experiments first. + ENTITY_RECOGNITION: True, } @staticmethod @@ -443,7 +448,8 @@ def train( ) return - self._entity_tag_specs = self._create_entity_tag_specs() + if self.config[ENTITY_RECOGNITION]: + self._entity_tag_specs = self._create_entity_tag_specs() # keep one example for persisting and loading self.data_example = model_data.first_data_example() @@ -709,7 +715,11 @@ def __init__( # metrics self.action_loss = tf.keras.metrics.Mean(name="loss") self.action_acc = tf.keras.metrics.Mean(name="acc") + self.entity_loss = tf.keras.metrics.Mean(name="e_loss") + self.entity_f1 = tf.keras.metrics.Mean(name="e_f1") self.metrics_to_log += ["loss", "acc"] + if self.config[ENTITY_RECOGNITION]: + self.metrics_to_log += ["e_loss", "e_f1"] # needed for efficient prediction self.all_labels_embed: Optional[tf.Tensor] = None @@ -758,7 +768,9 @@ def _prepare_layers(self) -> None: self._prepare_embed_layers(LABEL) self._prepare_dot_product_loss(LABEL, self.config[SCALE_LOSS]) - self._prepare_entity_recognition_layers() + + if self.config[ENTITY_RECOGNITION]: + self._prepare_entity_recognition_layers() def _prepare_sparse_dense_layer_for( self, name: Text, signature: Dict[Text, Dict[Text, List[FeatureSignature]]] @@ -963,6 +975,11 @@ def _batch_loss_entities( if ENTITY_ATTRIBUTE_TYPE not in tf_batch_data.get(ENTITIES, {}): return [] + # if no tags are present at all, we can skip training + # TODO is there a better solution? + if tf.reduce_max(tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0]) == 0.0: + return [] + sequence_lengths = tf.cast( tf_batch_data[TEXT][SEQUENCE_LENGTH][0], dtype=tf.int32 ) @@ -1038,10 +1055,13 @@ def _batch_loss_entities( # combined batch and dialogue dimenstion x sequence length x untis return [] - loss, f1, _logits = self._calculate_entity_loss( + loss, f1, _ = self._calculate_entity_loss( text_transformed, tag_ids, mask, sequence_lengths, ENTITY_ATTRIBUTE_TYPE ) + self.entity_loss.update_state(loss) + self.entity_f1.update_state(f1) + return [loss] @staticmethod @@ -1200,7 +1220,8 @@ def batch_loss( losses.append(loss) if ( - self.dialogue_transformer_output is not None + self.config[ENTITY_RECOGNITION] + and self.dialogue_transformer_output is not None and self.text_seq_transformer_output is not None ): losses.extend(self._batch_loss_entities(tf_batch_data)) @@ -1234,6 +1255,15 @@ def batch_predict( ) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) + predictions = {} + + if ( + self.config[ENTITY_RECOGNITION] + and self.dialogue_transformer_output is not None + and self.text_seq_transformer_output is not None + ): + predictions.update(self._batch_predict_entities(tf_batch_data)) + sim_all = self._tf_layers[f"loss.{LABEL}"].sim( dialogue_embed[:, :, tf.newaxis, :], self.all_labels_embed[tf.newaxis, tf.newaxis, :, :], @@ -1244,7 +1274,72 @@ def batch_predict( sim_all, self.config[SIMILARITY_TYPE] ) - return {"action_scores": scores, "similarities": sim_all} + predictions.update({"action_scores": scores, "similarities": sim_all}) + + return predictions + + def _batch_predict_entities( + self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] + ) -> Dict[Text, tf.Tensor]: + predictions: Dict[Text, tf.Tensor] = {} + + sequence_lengths = tf.cast( + tf_batch_data[TEXT][SEQUENCE_LENGTH][0], dtype=tf.int32 + ) + sequence_lengths = tf.squeeze(sequence_lengths, axis=-1) + + # convert from (combined batch and dialogue dimension x sequence length x units) + # to (batch-dim x dialogue length x sequence length x units) + text_seq_transformer_output = self._convert_to_original_shape( + self.text_seq_transformer_output, tf_batch_data, False + ) + # repeat the dialogue transformer output sequence-length-times to get the + # same shape as the text sequence transformer output + dialogue_transformer_output = tf.repeat( + tf.expand_dims(self.dialogue_transformer_output, axis=2), + text_seq_transformer_output.shape[2], + axis=2, + ) + # add the output of the dialogue transformer to the output of the text + # sequence transformer (adding context) + text_transformed = tf.add( + text_seq_transformer_output, dialogue_transformer_output + ) + + if self.max_history_tracker_featurizer_used: + # get last dialogue turn for every batch example + # resulting shapes are + # text_transformed (batch-dim x sequence length x units) + # mask (batch-dim x sequence length x 1) + # tag_ids (batch-dim x sequence length x 1) + # sequence_lengths (batch-dim) + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + text_transformed = tf.squeeze( + tf.expand_dims(self._last_token(text_transformed, dialogue_lengths), 1), + axis=1, + ) + sequence_lengths = tf.squeeze( + tf.expand_dims(self._last_token(sequence_lengths, dialogue_lengths), 1) + ) + + else: + # TODO + # CRF cannot handle 4D tensors, convert text_transformed back to + # combined batch and dialogue dimenstion x sequence length x untis + return {} + + name = ENTITY_ATTRIBUTE_TYPE + _input = text_transformed + + _logits = self._tf_layers[f"embed.{name}.logits"](_input) + pred_ids, confidences = self._tf_layers[f"crf.{name}"]( + _logits, sequence_lengths - 1 + ) + + predictions[f"e_{name}_ids"] = pred_ids + predictions[f"e_{name}_scores"] = confidences + + return predictions # pytype: enable=key-error From 5b46f404d466d9bb28d58e295742f1fc03c098a0 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 15:25:10 +0100 Subject: [PATCH 29/62] clean up --- rasa/core/featurizers/single_state_featurizer.py | 10 ++++++++-- rasa/core/policies/ted_policy.py | 5 ++++- rasa/shared/core/trackers.py | 11 +++++++---- rasa/utils/tensorflow/model_data_utils.py | 2 +- rasa/utils/tensorflow/models.py | 4 ++-- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 693134343a69..68a6f9a72ffd 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -20,6 +20,7 @@ FEATURE_TYPE_SEQUENCE, TEXT, NO_ENTITY_TAG, + ENTITY_ATTRIBUTE_TYPE, ) from rasa.shared.nlu.training_data.features import Features from rasa.shared.nlu.training_data.message import Message @@ -137,13 +138,18 @@ def _create_entity_tag_features( _tags = [] for token in parsed_text.get(TOKENS_NAMES[TEXT]): - _tag = determine_token_labels(token, entities, attribute_key="entity") + _tag = determine_token_labels( + token, entities, attribute_key=ENTITY_ATTRIBUTE_TYPE + ) _tags.append(tag_id_mapping[_tag]) # transpose to have seq_len x 1 return [ Features( - np.array([_tags]).T, FEATURE_TYPE_SEQUENCE, "entity", TAG_ID_ORIGIN + np.array([_tags]).T, + FEATURE_TYPE_SEQUENCE, + ENTITY_ATTRIBUTE_TYPE, + TAG_ID_ORIGIN, ) ] diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 51177651502c..332f417fc538 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -976,7 +976,10 @@ def _batch_loss_entities( return [] # if no tags are present at all, we can skip training - # TODO is there a better solution? + # check if there is any tag other than 0, which maps to NO_ENTITY_TAG + # TODO + # If we remove this check the CRF layer is throwing an error. + # Is there a better solution? if tf.reduce_max(tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0]) == 0.0: return [] diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py index 657ee9366e67..3d2ef8f1a8d9 100644 --- a/rasa/shared/core/trackers.py +++ b/rasa/shared/core/trackers.py @@ -29,6 +29,7 @@ ENTITY_ATTRIBUTE_ROLE, ACTION_TEXT, ACTION_NAME, + ENTITIES, ) from rasa.shared.core import events from rasa.shared.core.constants import ( @@ -68,7 +69,9 @@ logger = logging.getLogger(__name__) # same as State but with Dict[...] substituted with FrozenSet[Tuple[...]] -FrozenState = FrozenSet[Tuple[Text, FrozenSet[Tuple[Text, Tuple[Union[float, Text]]]]]] +FrozenState = FrozenSet[ + Tuple[Text, FrozenSet[Tuple[Text, Tuple[Union[float, Text, FrozenSet]]]]] +] class EventVerbosity(Enum): @@ -235,9 +238,9 @@ def freeze_current_state(state: State) -> FrozenState: frozen_state = {} for key, values in state_copy.items(): if isinstance(values, dict): - if "entities" in values and isinstance(values["entities"][0], dict): - values["entities"] = tuple( - [frozenset(e.items()) for e in values["entities"]] + if ENTITIES in values and isinstance(values[ENTITIES][0], dict): + values[ENTITIES] = tuple( + [frozenset(e.items()) for e in values[ENTITIES]] ) frozen_state[key] = frozenset(values.items()) else: diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index ad6af067814e..887bbb4ed44d 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -260,7 +260,7 @@ def convert_to_data_format( num_examples = 1 for _features in attribute_to_features.values(): num_examples = max(num_examples, len(_features)) - dialogue_length = max(dialogue_length, max(len(f) for f in _features)) + dialogue_length = max(dialogue_length, len(_features[0])) empty_features = [[None] * dialogue_length] * num_examples for attribute in attributes: diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 4d446e16e226..9e8f67748ea0 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -177,7 +177,7 @@ def fit( batch_strategy: Text, silent: bool = False, loading: bool = False, - eager: bool = True, + eager: bool = False, ) -> None: """Fit model data""" @@ -309,7 +309,7 @@ def train_on_batch( self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) def build_for_predict( - self, predict_data: RasaModelData, eager: bool = True + self, predict_data: RasaModelData, eager: bool = False ) -> None: self._training = False # needed for tf graph mode self._predict_function = self._get_tf_call_model_function( From 906ff977d6fcae47fa4436002cbfa79bea6a7a2a Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 17:07:06 +0100 Subject: [PATCH 30/62] differentiate between max history tracker featurizer used or not --- rasa/core/policies/ted_policy.py | 180 ++++++++++++++++++------------- 1 file changed, 107 insertions(+), 73 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 332f417fc538..98e66d679292 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -991,38 +991,25 @@ def _batch_loss_entities( mask = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) sequence_lengths -= 1 # remove sentence features - # convert from (combined batch and dialogue dimension x 1) to - # (batch-dim x dialogue length x 1) - sequence_lengths = tf.squeeze( - self._convert_to_original_shape( - tf.expand_dims(sequence_lengths, axis=-1), tf_batch_data, False - ), - axis=-1, - ) - # convert from (combined batch and dialogue dimension x sequence length x 1) to - # (batch-dim x dialogue length x sequence length x 1) - mask = self._convert_to_original_shape(mask, tf_batch_data, False) + # +1 for sentence features + sequence_dimension = tf.reduce_max(sequence_lengths) + 1 tag_ids = tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0] # add a zero (no entity) for the sentence features to match the shape of # inputs tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) - # convert from (combined batch and dialogue dimension x sequence length x 1) to - # (batch-dim x dialogue length x sequence length x 1) - tag_ids = self._convert_to_original_shape(tag_ids, tf_batch_data, False) - - # convert from (combined batch and dialogue dimension x sequence length x units) - # to (batch-dim x dialogue length x sequence length x units) - text_seq_transformer_output = self._convert_to_original_shape( - self.text_seq_transformer_output, tf_batch_data, False + + text_seq_transformer_output = self.text_seq_transformer_output + dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( + self.dialogue_transformer_output, tf_batch_data ) # repeat the dialogue transformer output sequence-length-times to get the # same shape as the text sequence transformer output dialogue_transformer_output = tf.repeat( - tf.expand_dims(self.dialogue_transformer_output, axis=2), - text_seq_transformer_output.shape[2], - axis=2, + tf.expand_dims(dialogue_transformer_output, axis=1), + sequence_dimension, + axis=1, ) # add the output of the dialogue transformer to the output of the text # sequence transformer (adding context) @@ -1031,35 +1018,44 @@ def _batch_loss_entities( ) if self.max_history_tracker_featurizer_used: - # get last dialogue turn for every batch example - # resulting shapes are - # text_transformed (batch-dim x sequence length x units) - # mask (batch-dim x sequence length x 1) - # tag_ids (batch-dim x sequence length x 1) - # sequence_lengths (batch-dim) dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - text_transformed = tf.squeeze( - tf.expand_dims(self._last_token(text_transformed, dialogue_lengths), 1), + + batch_dim = tf.size(dialogue_lengths) + + # the first dimension of text transformed is the combined batch and dialogue + # dimension, which corresponds to the sum of all dialogue lengths + # if the max history tracker featurizer is used we just want the last + # dialogues of every batch example + + # get the indices of all last dialogues + last_dialogue_indices = tf.cumsum(dialogue_lengths) - 1 + + # build up indices to get the last dialogues from text_transformed + dialogue_indices = tf.repeat( + tf.expand_dims(last_dialogue_indices, axis=1), + sequence_dimension, axis=1, ) - mask = tf.squeeze( - tf.expand_dims(self._last_token(mask, dialogue_lengths), 1), axis=1 - ) - tag_ids = tf.squeeze( - tf.expand_dims(self._last_token(tag_ids, dialogue_lengths), 1), axis=1 + sequence_indices = tf.repeat( + tf.expand_dims(tf.range(sequence_dimension), axis=0), batch_dim, axis=0 ) - sequence_lengths = tf.squeeze( - tf.expand_dims(self._last_token(sequence_lengths, dialogue_lengths), 1) + indices = tf.stack([dialogue_indices, sequence_indices], axis=2) + + # get all last dialogues from text_transformed using the above indices + text_transformed = tf.gather_nd(text_transformed, indices) + # do the same for the other tensors + tag_ids = tf.gather_nd(tag_ids, indices) + mask = tf.gather_nd(mask, indices) + sequence_lengths = tf.gather( + tf.squeeze(sequence_lengths), last_dialogue_indices ) - else: - # TODO - # CRF cannot handle 4D tensors, convert text_transformed back to - # combined batch and dialogue dimenstion x sequence length x untis - return [] - loss, f1, _ = self._calculate_entity_loss( - text_transformed, tag_ids, mask, sequence_lengths, ENTITY_ATTRIBUTE_TYPE + text_transformed, + tag_ids, + mask, + tf.squeeze(sequence_lengths), + ENTITY_ATTRIBUTE_TYPE, ) self.entity_loss.update_state(loss) @@ -1067,11 +1063,42 @@ def _batch_loss_entities( return [loss] + @staticmethod + def _combine_batch_and_dialogue_dimension( + tensor: tf.Tensor, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] + ): + """Combines the batch and dialogue dimension of the given tensor. + + Before the tensor has shape (batch-size x dialogue-length x ...). + Afterwards the tensor will have shape + (combined batch and dialogue dimension x ...). + + Args: + tensor: The tensor + tf_batch_data: the batch data + + Returns: + The converted tensor + """ + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + + batch_dim = tf.size(dialogue_lengths) + batch_indices = tf.repeat(tf.range(batch_dim), dialogue_lengths) + dialogue_indices = ( + tf.map_fn( + tf.range, + dialogue_lengths, + fn_output_signature=tf.RaggedTensorSpec(shape=[None], dtype=tf.int32), + ) + ).values + indices = tf.stack([batch_indices, dialogue_indices], axis=1) + + return tf.gather_nd(tensor, indices) + @staticmethod def _convert_to_original_shape( attribute_features: tf.Tensor, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], - squeeze_sequence_dimension: bool = True, ) -> tf.Tensor: """Transform attribute features back to original shape. @@ -1099,7 +1126,6 @@ def _convert_to_original_shape( batch_dim = tf.size(dialogue_lengths) dialogue_dim = tf.reduce_max(dialogue_lengths) - sequence_dim = attribute_features.shape[-2] units = attribute_features.shape[-1] batch_indices = tf.repeat(tf.range(batch_dim), dialogue_lengths) @@ -1112,12 +1138,9 @@ def _convert_to_original_shape( ).values indices = tf.stack([batch_indices, dialogue_indices], axis=1) - if squeeze_sequence_dimension: - attribute_features = tf.squeeze(attribute_features, axis=1) - shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) - else: - shape = tf.convert_to_tensor([batch_dim, dialogue_dim, sequence_dim, units]) + shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) + attribute_features = tf.squeeze(attribute_features, axis=1) return tf.scatter_nd(indices, attribute_features, shape) def _process_batch_data( @@ -1291,17 +1314,17 @@ def _batch_predict_entities( ) sequence_lengths = tf.squeeze(sequence_lengths, axis=-1) - # convert from (combined batch and dialogue dimension x sequence length x units) - # to (batch-dim x dialogue length x sequence length x units) - text_seq_transformer_output = self._convert_to_original_shape( - self.text_seq_transformer_output, tf_batch_data, False + text_seq_transformer_output = self.text_seq_transformer_output + dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( + self.dialogue_transformer_output, tf_batch_data ) + # repeat the dialogue transformer output sequence-length-times to get the # same shape as the text sequence transformer output dialogue_transformer_output = tf.repeat( - tf.expand_dims(self.dialogue_transformer_output, axis=2), - text_seq_transformer_output.shape[2], - axis=2, + tf.expand_dims(dialogue_transformer_output, axis=1), + text_seq_transformer_output.shape[1], + axis=1, ) # add the output of the dialogue transformer to the output of the text # sequence transformer (adding context) @@ -1310,26 +1333,37 @@ def _batch_predict_entities( ) if self.max_history_tracker_featurizer_used: - # get last dialogue turn for every batch example - # resulting shapes are - # text_transformed (batch-dim x sequence length x units) - # mask (batch-dim x sequence length x 1) - # tag_ids (batch-dim x sequence length x 1) - # sequence_lengths (batch-dim) dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - text_transformed = tf.squeeze( - tf.expand_dims(self._last_token(text_transformed, dialogue_lengths), 1), + + batch_dim = tf.size(dialogue_lengths) + # +1 for sentence features + sequence_dimension = tf.reduce_max(sequence_lengths) + 1 + + # the first dimension of text transformed is the combined batch and dialogue + # dimension, which corresponds to the sum of all dialogue lengths + # if the max history tracker featurizer is used we just want the last + # dialogues of every batch example + + # get the indices of all last dialogues + last_dialogue_indices = tf.cumsum(dialogue_lengths) - 1 + + # build up indices to get the last dialogues from text_transformed + dialogue_indices = tf.repeat( + tf.expand_dims(last_dialogue_indices, axis=1), + sequence_dimension, axis=1, ) - sequence_lengths = tf.squeeze( - tf.expand_dims(self._last_token(sequence_lengths, dialogue_lengths), 1) + sequence_indices = tf.repeat( + tf.expand_dims(tf.range(sequence_dimension), axis=0), batch_dim, axis=0 ) + indices = tf.stack([dialogue_indices, sequence_indices], axis=2) - else: - # TODO - # CRF cannot handle 4D tensors, convert text_transformed back to - # combined batch and dialogue dimenstion x sequence length x untis - return {} + # get all last dialogues from text_transformed using the above indices + text_transformed = tf.gather_nd(text_transformed, indices) + # do the same for the other tensors + sequence_lengths = tf.gather( + tf.squeeze(sequence_lengths), last_dialogue_indices + ) name = ENTITY_ATTRIBUTE_TYPE _input = text_transformed From c0eaa70e808d7c8b67a042581b76470231df375b Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 17:32:10 +0100 Subject: [PATCH 31/62] add todo --- rasa/core/policies/ted_policy.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 98e66d679292..719d2aff975a 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -1013,6 +1013,8 @@ def _batch_loss_entities( ) # add the output of the dialogue transformer to the output of the text # sequence transformer (adding context) + # resulting shape + # (combined batch and dialogue dimension x sequence length x units) text_transformed = tf.add( text_seq_transformer_output, dialogue_transformer_output ) @@ -1042,13 +1044,17 @@ def _batch_loss_entities( indices = tf.stack([dialogue_indices, sequence_indices], axis=2) # get all last dialogues from text_transformed using the above indices + # resulting shape (batch size x sequence length x units) text_transformed = tf.gather_nd(text_transformed, indices) # do the same for the other tensors tag_ids = tf.gather_nd(tag_ids, indices) mask = tf.gather_nd(mask, indices) - sequence_lengths = tf.gather( - tf.squeeze(sequence_lengths), last_dialogue_indices + sequence_lengths = tf.gather_nd( + sequence_lengths, tf.expand_dims(last_dialogue_indices, axis=1) ) + # TODO + # inside the LSTM of the CRF layer the check len(mask.shape) == 2 + # fails. mask is created from the sequence length. loss, f1, _ = self._calculate_entity_loss( text_transformed, From 9239bfa5c9b5bb5edee0ce2bb9ab87245c71b1ae Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 17:36:49 +0100 Subject: [PATCH 32/62] add comments --- rasa/core/policies/ted_policy.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 719d2aff975a..cdc03f66fcd6 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -999,6 +999,9 @@ def _batch_loss_entities( # inputs tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) + # shape of the following two tensors + # (combined batch and dialogue dimension x sequence length x units) + # in case of dialogue_transformer_output sequence length is 1 text_seq_transformer_output = self.text_seq_transformer_output dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( self.dialogue_transformer_output, tf_batch_data @@ -1049,8 +1052,8 @@ def _batch_loss_entities( # do the same for the other tensors tag_ids = tf.gather_nd(tag_ids, indices) mask = tf.gather_nd(mask, indices) - sequence_lengths = tf.gather_nd( - sequence_lengths, tf.expand_dims(last_dialogue_indices, axis=1) + sequence_lengths = tf.gather( + tf.squeeze(sequence_lengths), last_dialogue_indices ) # TODO # inside the LSTM of the CRF layer the check len(mask.shape) == 2 From 76b41ee1f83cbda63d3dfea86d3c35483cac1261 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Thu, 5 Nov 2020 17:42:44 +0100 Subject: [PATCH 33/62] use correct tag id mapping --- rasa/core/policies/ted_policy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index cdc03f66fcd6..5e3c1bc26de9 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -311,8 +311,7 @@ def _create_entity_tag_specs(self) -> List[EntityTagSpec]: _tag_specs = [] - # TODO - tag_id_index_mapping = {"O": 0, "emotion": 1, "account_number": 2, "item": 3} + tag_id_index_mapping = self.featurizer.state_featurizer.get_entity_tag_ids() if tag_id_index_mapping: _tag_specs.append( From 58fc4ad951948a9dc2cf54c01a8bc9f7f8000b4f Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 6 Nov 2020 08:45:07 +0100 Subject: [PATCH 34/62] check if text exists --- rasa/core/featurizers/single_state_featurizer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 68a6f9a72ffd..671549c7658c 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -132,6 +132,9 @@ def _create_entity_tag_features( # TODO # Should we support BILOU tagging? + if TEXT not in sub_state: + return [] + parsed_text = interpreter.featurize_message(Message({TEXT: sub_state[TEXT]})) entities = [dict(entity) for entity in sub_state[ENTITIES]] tag_id_mapping = self.get_entity_tag_ids() From 74be41076a0a4834db1f6250ce0cef969a4119ff Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 6 Nov 2020 13:55:06 +0100 Subject: [PATCH 35/62] fix frozenset issues --- .../core/featurizers/single_state_featurizer.py | 2 +- rasa/shared/core/domain.py | 2 +- rasa/shared/core/generator.py | 17 +++++++++++++---- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 671549c7658c..7607b37427a9 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -68,7 +68,7 @@ def _state_features_for_attribute( if attribute in {INTENT, ACTION_NAME}: return {sub_state[attribute]: 1} elif attribute == ENTITIES: - return {entity: 1 for entity in sub_state.get(ENTITIES, [])} + return {entity["entity"]: 1 for entity in sub_state.get(ENTITIES, [])} elif attribute == ACTIVE_LOOP: return {sub_state["name"]: 1} elif attribute == SLOTS: diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py index 3aa818fdadc0..0d37ec7b643e 100644 --- a/rasa/shared/core/domain.py +++ b/rasa/shared/core/domain.py @@ -67,7 +67,7 @@ # State is a dictionary with keys (USER, PREVIOUS_ACTION, SLOTS, ACTIVE_LOOP) # representing the origin of a SubState; # the values are SubStates, that contain the information needed for featurization -SubState = Dict[Text, Union[Text, Tuple[Union[float, Text]]]] +SubState = Dict[Text, Union[Text, Tuple[Union[float, Text, Dict]]]] State = Dict[Text, SubState] logger = logging.getLogger(__name__) diff --git a/rasa/shared/core/generator.py b/rasa/shared/core/generator.py index 994ee52fedaf..11ddfd9fb146 100644 --- a/rasa/shared/core/generator.py +++ b/rasa/shared/core/generator.py @@ -31,6 +31,7 @@ ) from rasa.shared.utils.io import is_logging_disabled import rasa.shared.utils.io +from shared.nlu.constants import ENTITIES logger = logging.getLogger(__name__) @@ -102,10 +103,18 @@ def past_states_for_hashing(self, domain: Domain) -> Deque[FrozenState]: @staticmethod def _unfreeze_states(frozen_states: Deque[FrozenState]) -> List[State]: - return [ - {key: dict(value) for key, value in dict(frozen_state).items()} - for frozen_state in frozen_states - ] + states = [] + for frozen_state in frozen_states: + state_dict = {} + for key, value in dict(frozen_state).items(): + _value = dict(value) + if ENTITIES in _value: + _value[ENTITIES] = [ + dict(frozen_entity) for frozen_entity in _value[ENTITIES] + ] + state_dict[key] = _value + states.append(state_dict) + return states def past_states(self, domain: Domain) -> List[State]: states_for_hashing = self.past_states_for_hashing(domain) From 90feabeb095585427fcedfd0414b782b231bff19 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 6 Nov 2020 14:38:12 +0100 Subject: [PATCH 36/62] ignore actual entity value in MemoizationPolicy --- rasa/core/policies/memoization.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/rasa/core/policies/memoization.py b/rasa/core/policies/memoization.py index e511f35563ac..33b39a8864a4 100644 --- a/rasa/core/policies/memoization.py +++ b/rasa/core/policies/memoization.py @@ -1,3 +1,4 @@ +import copy import zlib import base64 @@ -22,6 +23,8 @@ from rasa.shared.core.generator import TrackerWithCachedStates from rasa.shared.utils.io import is_logging_disabled from rasa.core.constants import MEMOIZATION_POLICY_PRIORITY +from shared.core.constants import USER +from shared.nlu.constants import ENTITIES, ENTITY_ATTRIBUTE_TYPE logger = logging.getLogger(__name__) @@ -158,7 +161,22 @@ def _create_feature_key(self, states: List[State]) -> Text: # we sort keys to make sure that the same states # represented as dictionaries have the same json strings # quotes are removed for aesthetic reasons - feature_str = json.dumps(states, sort_keys=True).replace('"', "") + + # Ignore the actual values of entities + # We are just interested whether an entity of a certain type was detected or not + _states = [] + for state in states: + _state = {} + for key, value in state.items(): + _state[key] = copy.deepcopy(value) + if USER == key and ENTITIES in _state[USER]: + _state[USER][ENTITIES] = [ + entity[ENTITY_ATTRIBUTE_TYPE] + for entity in _state[USER][ENTITIES] + ] + _states.append(_state) + + feature_str = json.dumps(_states, sort_keys=True).replace('"', "") if self.ENABLE_FEATURE_STRING_COMPRESSION: compressed = zlib.compress( bytes(feature_str, rasa.shared.utils.io.DEFAULT_ENCODING) From 6a5efc3321dce58c189f6fb7423484f9df21192e Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Mon, 9 Nov 2020 08:42:33 +0100 Subject: [PATCH 37/62] fix import --- rasa/core/policies/memoization.py | 4 ++-- rasa/shared/core/generator.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rasa/core/policies/memoization.py b/rasa/core/policies/memoization.py index 33b39a8864a4..7170071d17ad 100644 --- a/rasa/core/policies/memoization.py +++ b/rasa/core/policies/memoization.py @@ -23,8 +23,8 @@ from rasa.shared.core.generator import TrackerWithCachedStates from rasa.shared.utils.io import is_logging_disabled from rasa.core.constants import MEMOIZATION_POLICY_PRIORITY -from shared.core.constants import USER -from shared.nlu.constants import ENTITIES, ENTITY_ATTRIBUTE_TYPE +from rasa.shared.core.constants import USER +from rasa.shared.nlu.constants import ENTITIES, ENTITY_ATTRIBUTE_TYPE logger = logging.getLogger(__name__) diff --git a/rasa/shared/core/generator.py b/rasa/shared/core/generator.py index 11ddfd9fb146..dcb69f20cea4 100644 --- a/rasa/shared/core/generator.py +++ b/rasa/shared/core/generator.py @@ -31,7 +31,7 @@ ) from rasa.shared.utils.io import is_logging_disabled import rasa.shared.utils.io -from shared.nlu.constants import ENTITIES +from rasa.shared.nlu.constants import ENTITIES logger = logging.getLogger(__name__) From ccd93d14a13ff5550904229a779ba6d0a7470d80 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Mon, 9 Nov 2020 11:25:08 +0100 Subject: [PATCH 38/62] fix some tests --- .../test_single_state_featurizers.py | 101 ++++++++++++++---- tests/shared/core/test_domain.py | 22 ++-- .../story_writer/test_yaml_story_writer.py | 6 +- tests/test_test.py | 2 - 4 files changed, 93 insertions(+), 38 deletions(-) diff --git a/tests/core/featurizers/test_single_state_featurizers.py b/tests/core/featurizers/test_single_state_featurizers.py index 85e689d23a34..4ea08b9c773e 100644 --- a/tests/core/featurizers/test_single_state_featurizers.py +++ b/tests/core/featurizers/test_single_state_featurizers.py @@ -15,6 +15,10 @@ INTENT, FEATURE_TYPE_SEQUENCE, FEATURE_TYPE_SENTENCE, + ENTITY_ATTRIBUTE_TYPE, + ENTITY_ATTRIBUTE_VALUE, + ENTITY_ATTRIBUTE_START, + ENTITY_ATTRIBUTE_END, ) from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS from rasa.shared.nlu.interpreter import RegexInterpreter @@ -184,28 +188,51 @@ def test_single_state_featurizer_with_entity_roles_and_groups( interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter f = SingleStateFeaturizer() - f._default_feature_states[INTENT] = {"a": 0, "b": 1} + f._default_feature_states[INTENT] = {"inform": 0, "greet": 1} f._default_feature_states[ENTITIES] = { - "c": 0, - "d": 1, - f"d{ENTITY_LABEL_SEPARATOR}e": 2, + "city": 0, + "name": 1, + f"city{ENTITY_LABEL_SEPARATOR}to": 2, + f"city{ENTITY_LABEL_SEPARATOR}from": 3, + } + f._default_feature_states[ACTION_NAME] = { + "utter_ask_where_to": 0, + "utter_greet": 1, + "action_listen": 2, + } + f._default_feature_states[SLOTS] = {"slot_1": 0, "slot_2": 1, "slot_3": 2} + f._default_feature_states[ACTIVE_LOOP] = { + "active_loop_1": 0, + "active_loop_2": 1, + "active_loop_3": 2, + "active_loop_4": 3, } - f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1, "action_listen": 2} - f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2} - f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3} encoded = f.encode_state( { "user": { - "text": "a ball", - "intent": "b", - "entities": ["c", f"d{ENTITY_LABEL_SEPARATOR}e"], + "text": "I am flying from London to Paris", + "intent": "inform", + "entities": [ + { + ENTITY_ATTRIBUTE_TYPE: "city", + ENTITY_ATTRIBUTE_VALUE: "London", + ENTITY_ATTRIBUTE_START: 17, + ENTITY_ATTRIBUTE_END: 23, + }, + { + ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to", + ENTITY_ATTRIBUTE_VALUE: "Paris", + ENTITY_ATTRIBUTE_START: 27, + ENTITY_ATTRIBUTE_END: 32, + }, + ], }, "prev_action": { "action_name": "action_listen", "action_text": "throw a ball", }, - "active_loop": {"name": "k"}, - "slots": {"e": (1.0,)}, + "active_loop": {"name": "active_loop_4"}, + "slots": {"slot_1": (1.0,)}, }, interpreter=interpreter, ) @@ -213,7 +240,7 @@ def test_single_state_featurizer_with_entity_roles_and_groups( assert sorted(list(encoded.keys())) == sorted( [TEXT, ENTITIES, ACTION_NAME, SLOTS, ACTIVE_LOOP, INTENT, ACTION_TEXT] ) - assert np.all(encoded[ENTITIES][0].features.toarray() == [1, 0, 1]) + assert np.all(encoded[ENTITIES][0].features.toarray() == [1, 0, 1, 0]) def test_single_state_featurizer_uses_dtype_float(): @@ -241,21 +268,51 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen( interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter f = SingleStateFeaturizer() - f._default_feature_states[INTENT] = {"a": 0, "b": 1} - f._default_feature_states[ENTITIES] = {"c": 0} - f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1, "action_listen": 2} - f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2} - f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3} - + f._default_feature_states[INTENT] = {"inform": 0, "greet": 1} + f._default_feature_states[ENTITIES] = { + "city": 0, + "name": 1, + f"city{ENTITY_LABEL_SEPARATOR}to": 2, + f"city{ENTITY_LABEL_SEPARATOR}from": 3, + } + f._default_feature_states[ACTION_NAME] = { + "utter_ask_where_to": 0, + "utter_greet": 1, + "action_listen": 2, + } + f._default_feature_states[SLOTS] = {"slot_1": 0, "slot_2": 1, "slot_3": 2} + f._default_feature_states[ACTIVE_LOOP] = { + "active_loop_1": 0, + "active_loop_2": 1, + "active_loop_3": 2, + "active_loop_4": 3, + } encoded = f.encode_state( { - "user": {"text": "a ball", "intent": "b", "entities": ["c"]}, + "user": { + "text": "I am flying from London to Paris", + "intent": "inform", + "entities": [ + { + ENTITY_ATTRIBUTE_TYPE: "city", + ENTITY_ATTRIBUTE_VALUE: "London", + ENTITY_ATTRIBUTE_START: 17, + ENTITY_ATTRIBUTE_END: 23, + }, + { + ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to", + ENTITY_ATTRIBUTE_VALUE: "Paris", + ENTITY_ATTRIBUTE_START: 27, + ENTITY_ATTRIBUTE_END: 32, + }, + ], + }, "prev_action": { "action_name": "action_listen", "action_text": "throw a ball", }, - "active_loop": {"name": "k"}, - "slots": {"e": (1.0,)}, + "active_loop": {"name": "active_loop_4"}, + "slots": {"slot_1": (1.0,)}, }, interpreter=interpreter, ) diff --git a/tests/shared/core/test_domain.py b/tests/shared/core/test_domain.py index 32e6a83cc1f2..c94b20da97ee 100644 --- a/tests/shared/core/test_domain.py +++ b/tests/shared/core/test_domain.py @@ -75,7 +75,7 @@ async def test_create_train_data_no_history(default_domain): assert hashed == [ "[{}]", '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', - '[{"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', + '[{"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', '[{"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "utter_default"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', @@ -83,7 +83,7 @@ async def test_create_train_data_no_history(default_domain): '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', - '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', + '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', ] @@ -104,13 +104,13 @@ async def test_create_train_data_with_history(default_domain): hashed = sorted(hashed) assert hashed == [ - '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', + '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}, {"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', - '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', - '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', - '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', + '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', + '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', + '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}]', @@ -158,7 +158,7 @@ async def test_create_train_data_unfeaturized_entities(): assert hashed == [ "[{}]", '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', - '[{"prev_action": {"action_name": "utter_greet"}, "user": {"entities": ["name"], "intent": "greet"}}]', + '[{"prev_action": {"action_name": "utter_greet"}, "user": {"entities": [{"end": 81, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', '[{"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "why"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "thank"}}]', @@ -168,9 +168,9 @@ async def test_create_train_data_unfeaturized_entities(): '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "thank"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', + '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": [{"end": 81, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": [], "intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": [], "intent": "ask"}}]', - '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": ["name"], "intent": "greet"}}]', ] @@ -1057,7 +1057,7 @@ def test_get_featurized_entities(): featurized_entities = domain._get_featurized_entities(user_uttered) - assert featurized_entities == set() + assert featurized_entities == [] user_uttered = UserUttered( text="I am going to London", @@ -1067,4 +1067,6 @@ def test_get_featurized_entities(): featurized_entities = domain._get_featurized_entities(user_uttered) - assert featurized_entities == {"GPE", f"GPE{ENTITY_LABEL_SEPARATOR}destination"} + assert featurized_entities == [ + {"entity": "GPE", "role": "destination", "value": "London"} + ] diff --git a/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py b/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py index fa746263b082..4e48ea67d793 100644 --- a/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py +++ b/tests/shared/core/training_data/story_writer/test_yaml_story_writer.py @@ -108,8 +108,6 @@ def test_yaml_writer_dumps_user_messages(): - story: default steps: - intent: greet - user: |- - Hello - action: utter_greet """ @@ -139,10 +137,10 @@ def test_yaml_writer_avoids_dumping_not_existing_user_messages(): @pytest.mark.parametrize( - "input_yaml_file", ["data/test_yaml_stories/rules_with_stories_sorted.yaml",], + "input_yaml_file", ["data/test_yaml_stories/rules_with_stories_sorted.yaml"] ) def test_yaml_writer_dumps_rules( - input_yaml_file: Text, tmpdir: Path, default_domain: Domain, + input_yaml_file: Text, tmpdir: Path, default_domain: Domain ): original_yaml_reader = YAMLStoryReader(default_domain, None, False) original_yaml_story_steps = original_yaml_reader.read_from_file(input_yaml_file) diff --git a/tests/test_test.py b/tests/test_test.py index b279fee01231..8bbc45bececb 100644 --- a/tests/test_test.py +++ b/tests/test_test.py @@ -197,8 +197,6 @@ def test_write_classification_errors(): - story: default steps: - intent: greet # predicted: goodbye: Hello - user: |- - Hello - action: utter_greet # predicted: utter_goodbye """ From f3e2b8918ffc1b622b8e2021a13eb74db652892c Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 10 Nov 2020 09:40:58 +0100 Subject: [PATCH 39/62] update after merge --- rasa/core/policies/ted_policy.py | 176 +++++++++++++++++++++------- rasa/utils/tensorflow/model_data.py | 7 +- 2 files changed, 138 insertions(+), 45 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index b02634b746bd..685af9d3b6b3 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -401,6 +401,9 @@ def _create_model_data( model_data.add_data(attribute_data) model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE) + model_data.add_lengths( + ENTITIES, SEQUENCE_LENGTH, ENTITIES, ENTITY_ATTRIBUTE_TYPE + ) model_data.add_lengths(ACTION_TEXT, SEQUENCE_LENGTH, ACTION_TEXT, SEQUENCE) # add the dialogue lengths @@ -996,7 +999,7 @@ def _encode_real_features_per_attribute( # resulting attribute features will have shape # combined batch dimension and dialogue length x 1 x units attribute_features = self._combine_sparse_dense_features( - tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}", + tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}" ) if attribute in set(SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE): @@ -1029,40 +1032,56 @@ def _batch_loss_entities( if ENTITY_ATTRIBUTE_TYPE not in tf_batch_data.get(ENTITIES, {}): return [] - # if no tags are present at all, we can skip training - # check if there is any tag other than 0, which maps to NO_ENTITY_TAG - # TODO - # If we remove this check the CRF layer is throwing an error. - # Is there a better solution? - if tf.reduce_max(tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0]) == 0.0: - return [] - - sequence_lengths = tf.cast( - tf_batch_data[TEXT][SEQUENCE_LENGTH][0], dtype=tf.int32 - ) - sequence_lengths = tf.squeeze(sequence_lengths, axis=-1) - sequence_lengths += 1 # add sentence features - mask = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) - sequence_lengths -= 1 # remove sentence features + # To calculate the loss for entities we need the output of the text + # sequence transformer (shape: combined batch dialogue dimension x + # sequence length x units), the output of the dialogue transformer + # (shape: batch size x dialogue length x units) and the tag ids for the + # entities (shape: combined batch dialogue dimension x sequence length x units) + # As the combined batch dialogue dimension for the output of the text sequence + # transformer and the tag ids differ, all tensors have different shapes. + # In order to process the tensors, they need to have the same shape. + # Convert all tensors to the same + # combined batch dialogue dimension x sequence length x units + # shape. + # Note: The CRF layer cannot handle 4D tensors. E.g. we cannot use the shape + # batch size x dialogue length x sequence length x units - # +1 for sentence features - sequence_dimension = tf.reduce_max(sequence_lengths) + 1 + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) tag_ids = tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0] # add a zero (no entity) for the sentence features to match the shape of # inputs tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) + # convert tag ids to shape batch-size x dialogue length x sequence length x 1 + tag_ids = self._convert_to_original_shape( + tag_ids, tf_batch_data[ENTITIES][MASK][0], dialogue_lengths + ) + # convert tag ids to shape + # combined batch dialogue dimension x sequence length x 1 + tag_ids = self._combine_batch_and_dialogue_dimension(tag_ids, tf_batch_data) + + # convert the output of the text sequence transformer to shape + # batch-size x dialogue length x sequence length x 1 + text_seq_transformer_output = self._convert_to_original_shape( + self.text_seq_transformer_output, + tf_batch_data[TEXT][MASK][0], + dialogue_lengths, + ) + # convert the output of the text sequence transformer to shape + # combined batch dialogue dimension x sequence length x units + text_seq_transformer_output = self._combine_batch_and_dialogue_dimension( + text_seq_transformer_output, tf_batch_data + ) - # shape of the following two tensors - # (combined batch and dialogue dimension x sequence length x units) - # in case of dialogue_transformer_output sequence length is 1 - text_seq_transformer_output = self.text_seq_transformer_output + # convert the output of the dialogue transformer to shape + # combined batch dialogue dimension x sequence length x units dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( self.dialogue_transformer_output, tf_batch_data ) # repeat the dialogue transformer output sequence-length-times to get the # same shape as the text sequence transformer output + sequence_dimension = tf.shape(tag_ids)[1] dialogue_transformer_output = tf.repeat( tf.expand_dims(dialogue_transformer_output, axis=1), sequence_dimension, @@ -1076,9 +1095,33 @@ def _batch_loss_entities( text_seq_transformer_output, dialogue_transformer_output ) - if self.max_history_tracker_featurizer_used: - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + # we need the sequence length and the mask for the CRF layer + _sequence_lengths = tf_batch_data[TEXT][SEQUENCE_LENGTH][0] + # extract only nonzero lengths and cast to int + _sequence_lengths = tf.cast( + tf.boolean_mask(_sequence_lengths, _sequence_lengths), dtype=tf.int32 + ) + # boolean mask returns flat tensor + _sequence_lengths = tf.expand_dims(_sequence_lengths, axis=-1) + # + 1 for sentence features + sequence_lengths = _sequence_lengths + 1 + mask = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) + # convert mask and sequence length to correct shape + mask = self._convert_to_original_shape( + mask, tf_batch_data[TEXT][MASK][0], dialogue_lengths + ) + mask = self._combine_batch_and_dialogue_dimension(mask, tf_batch_data) + sequence_lengths = self._convert_to_original_shape( + tf.expand_dims(sequence_lengths, axis=-1), + tf_batch_data[TEXT][MASK][0], + dialogue_lengths, + ) + sequence_lengths = self._combine_batch_and_dialogue_dimension( + sequence_lengths, tf_batch_data + ) + + if self.max_history_tracker_featurizer_used: batch_dim = tf.size(dialogue_lengths) # the first dimension of text transformed is the combined batch and dialogue @@ -1086,6 +1129,8 @@ def _batch_loss_entities( # if the max history tracker featurizer is used we just want the last # dialogues of every batch example + # TODO the last dialogue turn might not contain any entities + # get the indices of all last dialogues last_dialogue_indices = tf.cumsum(dialogue_lengths) - 1 @@ -1109,6 +1154,7 @@ def _batch_loss_entities( sequence_lengths = tf.gather( tf.squeeze(sequence_lengths), last_dialogue_indices ) + # TODO # inside the LSTM of the CRF layer the check len(mask.shape) == 2 # fails. mask is created from the sequence length. @@ -1166,8 +1212,8 @@ def _convert_to_original_shape( ) -> tf.Tensor: """Transform attribute features back to original shape. - Given shape: combined batch and dialogue dimension x 1 x units - Original shape: batch x dialogue length x units + Given shape: combined batch and dialogue dimension x sequence length x units + Original shape: batch x dialogue length x sequence length x units Args: attribute_features: the "real" features to convert @@ -1181,21 +1227,22 @@ def _convert_to_original_shape( """ # in order to convert the attribute features with shape - # combined batch-size and dialogue length x 1 x units - # to a shape of batch-size x dialogue length x units - # we use tf.scatter_nd. Therefore, we need to the target shape and the indices + # combined batch-size and dialogue length x sequence length x units + # to a shape of batch-size x dialogue length x sequence length x units + # we use tf.scatter_nd. Therefore, we need the target shape and the indices # mapping the values of attribute features to the position in the resulting # tensor. batch_dim = tf.shape(attribute_mask)[0] dialogue_dim = tf.shape(attribute_mask)[1] + sequence_length = tf.shape(attribute_features)[1] units = attribute_features.shape[-1] # attribute_mask has shape (batch x dialogue_len x 1), remove last dimension attribute_mask = tf.cast(tf.squeeze(attribute_mask, axis=-1), dtype=tf.int32) # sum of attribute mask contains number of dialogue turns with "real" features non_fake_dialogue_lengths = tf.reduce_sum(attribute_mask, axis=-1) - + # create the batch indices batch_indices = tf.repeat(tf.range(batch_dim), non_fake_dialogue_lengths) dialogue_indices = ( @@ -1218,9 +1265,19 @@ def _convert_to_original_shape( indices = tf.stack([batch_indices, dialogue_indices], axis=1) - shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) + shape = tf.cond( + sequence_length == 1, + lambda: tf.convert_to_tensor([batch_dim, dialogue_dim, units]), + lambda: tf.convert_to_tensor( + [batch_dim, dialogue_dim, sequence_length, units] + ), + ) + attribute_features = tf.cond( + sequence_length == 1, + lambda: tf.squeeze(attribute_features, axis=1), + lambda: attribute_features, + ) - attribute_features = tf.squeeze(attribute_features, axis=1) return tf.scatter_nd(indices, attribute_features, shape) def _process_batch_data( @@ -1395,41 +1452,74 @@ def _batch_predict_entities( ) -> Dict[Text, tf.Tensor]: predictions: Dict[Text, tf.Tensor] = {} - sequence_lengths = tf.cast( - tf_batch_data[TEXT][SEQUENCE_LENGTH][0], dtype=tf.int32 + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + + # convert the output of the text sequence transformer to shape + # batch-size x dialogue length x sequence length x 1 + text_seq_transformer_output = self._convert_to_original_shape( + self.text_seq_transformer_output, + tf_batch_data[TEXT][MASK][0], + dialogue_lengths, + ) + # convert the output of the text sequence transformer to shape + # combined batch dialogue dimension x sequence length x units + text_seq_transformer_output = self._combine_batch_and_dialogue_dimension( + text_seq_transformer_output, tf_batch_data ) - sequence_lengths = tf.squeeze(sequence_lengths, axis=-1) - text_seq_transformer_output = self.text_seq_transformer_output + # convert the output of the dialogue transformer to shape + # combined batch dialogue dimension x sequence length x units dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( self.dialogue_transformer_output, tf_batch_data ) # repeat the dialogue transformer output sequence-length-times to get the # same shape as the text sequence transformer output + sequence_dimension = tf.shape(text_seq_transformer_output)[1] dialogue_transformer_output = tf.repeat( tf.expand_dims(dialogue_transformer_output, axis=1), - text_seq_transformer_output.shape[1], + sequence_dimension, axis=1, ) # add the output of the dialogue transformer to the output of the text # sequence transformer (adding context) + # resulting shape + # (combined batch and dialogue dimension x sequence length x units) text_transformed = tf.add( text_seq_transformer_output, dialogue_transformer_output ) - if self.max_history_tracker_featurizer_used: - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + # we need the sequence length and the mask for the CRF layer + _sequence_lengths = tf_batch_data[TEXT][SEQUENCE_LENGTH][0] + # extract only nonzero lengths and cast to int + _sequence_lengths = tf.cast( + tf.boolean_mask(_sequence_lengths, _sequence_lengths), dtype=tf.int32 + ) + # boolean mask returns flat tensor + _sequence_lengths = tf.expand_dims(_sequence_lengths, axis=-1) + # + 1 for sentence features + sequence_lengths = _sequence_lengths + 1 + + # convert mask and sequence length to correct shape + sequence_lengths = self._convert_to_original_shape( + tf.expand_dims(sequence_lengths, axis=-1), + tf_batch_data[TEXT][MASK][0], + dialogue_lengths, + ) + sequence_lengths = self._combine_batch_and_dialogue_dimension( + sequence_lengths, tf_batch_data + ) + if self.max_history_tracker_featurizer_used: batch_dim = tf.size(dialogue_lengths) - # +1 for sentence features - sequence_dimension = tf.reduce_max(sequence_lengths) + 1 # the first dimension of text transformed is the combined batch and dialogue # dimension, which corresponds to the sum of all dialogue lengths # if the max history tracker featurizer is used we just want the last # dialogues of every batch example + # TODO the last dialogue turn might not contain any entities + # get the indices of all last dialogues last_dialogue_indices = tf.cumsum(dialogue_lengths) - 1 @@ -1445,6 +1535,7 @@ def _batch_predict_entities( indices = tf.stack([dialogue_indices, sequence_indices], axis=2) # get all last dialogues from text_transformed using the above indices + # resulting shape (batch size x sequence length x units) text_transformed = tf.gather_nd(text_transformed, indices) # do the same for the other tensors sequence_lengths = tf.gather( @@ -1452,9 +1543,8 @@ def _batch_predict_entities( ) name = ENTITY_ATTRIBUTE_TYPE - _input = text_transformed - _logits = self._tf_layers[f"embed.{name}.logits"](_input) + _logits = self._tf_layers[f"embed.{name}.logits"](text_transformed) pred_ids, confidences = self._tf_layers[f"crf.{name}"]( _logits, sequence_lengths - 1 ) diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index 46cf8fd5bd66..3c8012f024ba 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -140,6 +140,9 @@ def _validate_number_of_dimensions( if isinstance(_sub_array, scipy.sparse.spmatrix): dim = i break + if isinstance(_sub_array, np.ndarray) and _sub_array.shape[0] == 0: + # sequence dimension is 0, we are dealing with "fake" features + return # If the resulting sub_array is sparse, the remaining number of dimensions # should be at least 2 @@ -1122,7 +1125,7 @@ def _pad_4d_dense_data(array_of_array_of_dense: FeatureArray) -> np.ndarray: ) data_padded = np.zeros( - [combined_dialogue_len, max_seq_len, number_of_features,], + [combined_dialogue_len, max_seq_len, number_of_features], dtype=array_of_array_of_dense[0][0].dtype, ) @@ -1225,7 +1228,7 @@ def _4d_scipy_matrix_to_values( indices = np.hstack( [ np.vstack( - [sum(dialogue_len[:i]) + j * np.ones_like(x.row), x.row, x.col,] + [sum(dialogue_len[:i]) + j * np.ones_like(x.row), x.row, x.col] ) for i, array_of_sparse in enumerate(array_of_array_of_sparse) for j, x in enumerate(array_of_sparse) From adea49e8ad50532c6634acae681a44a70e93d8a9 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 10 Nov 2020 10:14:52 +0100 Subject: [PATCH 40/62] use python if instead of tf.cond --- rasa/core/policies/ted_policy.py | 137 +++++++++++++++++++------------ 1 file changed, 83 insertions(+), 54 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 685af9d3b6b3..41c29231242b 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -734,9 +734,6 @@ def __init__( self._prepare_layers() - self.text_seq_transformer_output: Optional[tf.Tensor] = None - self.dialogue_transformer_output: Optional[tf.Tensor] = None - def _check_data(self) -> None: if not any(key in [INTENT, TEXT] for key in self.data_signature.keys()): raise ValueError( @@ -841,11 +838,13 @@ def _prepare_encoding_layers(self, name: Text) -> None: def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: all_label_ids = self.tf_label_data[LABEL_KEY][LABEL_SUB_KEY][0] # labels cannot have all features "fake" - all_labels_encoded = { - key: self._encode_real_features_per_attribute(self.tf_label_data, key) - for key in self.tf_label_data.keys() - if key != LABEL_KEY - } + all_labels_encoded = {} + for key in self.tf_label_data.keys(): + if key != LABEL_KEY: + attribute_features, _ = self._encode_real_features_per_attribute( + self.tf_label_data, key + ) + all_labels_encoded[key] = attribute_features if ( all_labels_encoded.get(f"{LABEL_KEY}_{ACTION_TEXT}") is not None @@ -871,7 +870,7 @@ def _emebed_dialogue( self, dialogue_in: tf.Tensor, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], - ) -> Tuple[tf.Tensor, tf.Tensor]: + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """Create dialogue level embedding and mask.""" dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) mask = self._compute_mask(dialogue_lengths) @@ -881,7 +880,7 @@ def _emebed_dialogue( ) dialogue_transformed = tfa.activations.gelu(dialogue_transformed) - self.dialogue_transformer_output = dialogue_transformed + dialogue_transformer_output = dialogue_transformed if self.max_history_tracker_featurizer_used: # pick last vector if max history featurizer is used @@ -892,11 +891,11 @@ def _emebed_dialogue( dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed) - return dialogue_embed, mask + return dialogue_embed, mask, dialogue_transformer_output def _encode_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> tf.Tensor: + ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: # The input is a representation of 4d tensor of # shape (batch-size x dialogue-len x sequence-len x units) in 3d of shape # (sum of dialogue history length for all tensors in the batch x @@ -922,7 +921,7 @@ def _encode_features_per_attribute( def _encode_fake_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> tf.Tensor: + ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: attribute_features_list = tf_batch_data[attribute][SENTENCE] attribute_mask = tf_batch_data[attribute][MASK][0] @@ -939,11 +938,14 @@ def _encode_fake_features_per_attribute( else: units += f.shape[-1] - return tf.zeros((batch_dim, dialogue_dim, units), dtype=tf.float32) + attribute_features = tf.zeros( + (batch_dim, dialogue_dim, units), dtype=tf.float32 + ) + return attribute_features, None def _encode_real_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> tf.Tensor: + ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: """Encodes features for a given attribute. Args: @@ -954,6 +956,8 @@ def _encode_real_features_per_attribute( Returns: A tensor combining all features for `attribute` """ + text_transformer_output = None + if attribute in SEQUENCE_FEATURES_TO_ENCODE: # sequence_lengths contain `0` for "fake" features, while # tf_batch_data[attribute] contain only "real" features @@ -984,7 +988,7 @@ def _encode_real_features_per_attribute( ) if attribute == TEXT: - self.text_seq_transformer_output = attribute_features + text_transformer_output = attribute_features # resulting attribute features will have shape # combined batch dimension and dialogue length x 1 x units @@ -1022,12 +1026,17 @@ def _encode_real_features_per_attribute( # (combined batch dimension and dialogue length x 1 x units) # convert them back to their original shape of # batch size x dialogue length x units - return self._convert_to_original_shape( - attribute_features, attribute_mask, dialogue_lengths + attribute_features = self._convert_to_original_shape( + attribute_features, attribute_mask, dialogue_lengths, False ) + return attribute_features, text_transformer_output + def _batch_loss_entities( - self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, ) -> List[tf.Tensor]: if ENTITY_ATTRIBUTE_TYPE not in tf_batch_data.get(ENTITIES, {}): return [] @@ -1054,7 +1063,7 @@ def _batch_loss_entities( tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) # convert tag ids to shape batch-size x dialogue length x sequence length x 1 tag_ids = self._convert_to_original_shape( - tag_ids, tf_batch_data[ENTITIES][MASK][0], dialogue_lengths + tag_ids, tf_batch_data[ENTITIES][MASK][0], dialogue_lengths, True ) # convert tag ids to shape # combined batch dialogue dimension x sequence length x 1 @@ -1063,9 +1072,10 @@ def _batch_loss_entities( # convert the output of the text sequence transformer to shape # batch-size x dialogue length x sequence length x 1 text_seq_transformer_output = self._convert_to_original_shape( - self.text_seq_transformer_output, + text_transformer_output, tf_batch_data[TEXT][MASK][0], dialogue_lengths, + True, ) # convert the output of the text sequence transformer to shape # combined batch dialogue dimension x sequence length x units @@ -1076,7 +1086,7 @@ def _batch_loss_entities( # convert the output of the dialogue transformer to shape # combined batch dialogue dimension x sequence length x units dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( - self.dialogue_transformer_output, tf_batch_data + dialogue_transformer_output, tf_batch_data ) # repeat the dialogue transformer output sequence-length-times to get the @@ -1109,13 +1119,14 @@ def _batch_loss_entities( # convert mask and sequence length to correct shape mask = self._convert_to_original_shape( - mask, tf_batch_data[TEXT][MASK][0], dialogue_lengths + mask, tf_batch_data[TEXT][MASK][0], dialogue_lengths, True ) mask = self._combine_batch_and_dialogue_dimension(mask, tf_batch_data) sequence_lengths = self._convert_to_original_shape( tf.expand_dims(sequence_lengths, axis=-1), tf_batch_data[TEXT][MASK][0], dialogue_lengths, + True, ) sequence_lengths = self._combine_batch_and_dialogue_dimension( sequence_lengths, tf_batch_data @@ -1209,6 +1220,7 @@ def _convert_to_original_shape( attribute_features: tf.Tensor, attribute_mask: tf.Tensor, dialogue_lengths: tf.Tensor, + consider_sequence_dimension: bool, ) -> tf.Tensor: """Transform attribute features back to original shape. @@ -1265,24 +1277,20 @@ def _convert_to_original_shape( indices = tf.stack([batch_indices, dialogue_indices], axis=1) - shape = tf.cond( - sequence_length == 1, - lambda: tf.convert_to_tensor([batch_dim, dialogue_dim, units]), - lambda: tf.convert_to_tensor( + if consider_sequence_dimension: + shape = tf.convert_to_tensor( [batch_dim, dialogue_dim, sequence_length, units] - ), - ) - attribute_features = tf.cond( - sequence_length == 1, - lambda: tf.squeeze(attribute_features, axis=1), - lambda: attribute_features, - ) + ) + return tf.scatter_nd(indices, attribute_features, shape) + + shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) + attribute_features = tf.squeeze(attribute_features, axis=1) return tf.scatter_nd(indices, attribute_features, shape) def _process_batch_data( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] - ) -> tf.Tensor: + ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: """Encodes batch data. Combines intent and text and action name and action text if both are present. @@ -1294,11 +1302,19 @@ def _process_batch_data( Tensor: encoding of all features in the batch, combined; """ # encode each attribute present in tf_batch_data - batch_encoded = { - key: self._encode_features_per_attribute(tf_batch_data, key) - for key in tf_batch_data.keys() - if LABEL_KEY not in key and DIALOGUE not in key - } + text_transformer_output = None + + batch_encoded = {} + for key in tf_batch_data.keys(): + if LABEL_KEY not in key and DIALOGUE not in key: + attribute_features, _text_transformer_output = self._encode_features_per_attribute( + tf_batch_data, key + ) + + batch_encoded[key] = attribute_features + if _text_transformer_output is not None: + text_transformer_output = _text_transformer_output + # if both action text and action name are present, combine them; otherwise, # return the one which is present @@ -1332,7 +1348,7 @@ def _process_batch_data( batch_features = tf.concat(batch_features, axis=-1) - return batch_features + return batch_features, text_transformer_output @staticmethod def _get_labels_embed( @@ -1364,8 +1380,8 @@ def batch_loss( label_ids = tf_batch_data[LABEL_KEY][LABEL_SUB_KEY][0] labels_embed = self._get_labels_embed(label_ids, all_labels_embed) - dialogue_in = self._process_batch_data(tf_batch_data) - dialogue_embed, dialogue_mask = self._emebed_dialogue( + dialogue_in, text_transformer_output = self._process_batch_data(tf_batch_data) + dialogue_embed, dialogue_mask, dialogue_transformer_output = self._emebed_dialogue( dialogue_in, tf_batch_data ) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) @@ -1384,10 +1400,14 @@ def batch_loss( if ( self.config[ENTITY_RECOGNITION] - and self.dialogue_transformer_output is not None - and self.text_seq_transformer_output is not None + and dialogue_transformer_output is not None + and text_transformer_output is not None ): - losses.extend(self._batch_loss_entities(tf_batch_data)) + losses.extend( + self._batch_loss_entities( + tf_batch_data, dialogue_transformer_output, text_transformer_output + ) + ) self.action_loss.update_state(loss) self.action_acc.update_state(acc) @@ -1418,8 +1438,8 @@ def batch_predict( batch_in, self.predict_data_signature ) - dialogue_in = self._process_batch_data(tf_batch_data) - dialogue_embed, dialogue_mask = self._emebed_dialogue( + dialogue_in, text_transformer_output = self._process_batch_data(tf_batch_data) + dialogue_embed, dialogue_mask, dialogue_transformer_output = self._emebed_dialogue( dialogue_in, tf_batch_data ) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) @@ -1428,10 +1448,14 @@ def batch_predict( if ( self.config[ENTITY_RECOGNITION] - and self.dialogue_transformer_output is not None - and self.text_seq_transformer_output is not None + and dialogue_transformer_output is not None + and text_transformer_output is not None ): - predictions.update(self._batch_predict_entities(tf_batch_data)) + predictions.update( + self._batch_predict_entities( + tf_batch_data, dialogue_transformer_output, text_transformer_output + ) + ) sim_all = self._tf_layers[f"loss.{LABEL}"].sim( dialogue_embed[:, :, tf.newaxis, :], @@ -1448,7 +1472,10 @@ def batch_predict( return predictions def _batch_predict_entities( - self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, ) -> Dict[Text, tf.Tensor]: predictions: Dict[Text, tf.Tensor] = {} @@ -1457,9 +1484,10 @@ def _batch_predict_entities( # convert the output of the text sequence transformer to shape # batch-size x dialogue length x sequence length x 1 text_seq_transformer_output = self._convert_to_original_shape( - self.text_seq_transformer_output, + text_transformer_output, tf_batch_data[TEXT][MASK][0], dialogue_lengths, + True, ) # convert the output of the text sequence transformer to shape # combined batch dialogue dimension x sequence length x units @@ -1470,7 +1498,7 @@ def _batch_predict_entities( # convert the output of the dialogue transformer to shape # combined batch dialogue dimension x sequence length x units dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( - self.dialogue_transformer_output, tf_batch_data + dialogue_transformer_output, tf_batch_data ) # repeat the dialogue transformer output sequence-length-times to get the @@ -1505,6 +1533,7 @@ def _batch_predict_entities( tf.expand_dims(sequence_lengths, axis=-1), tf_batch_data[TEXT][MASK][0], dialogue_lengths, + True, ) sequence_lengths = self._combine_batch_and_dialogue_dimension( sequence_lengths, tf_batch_data From d3bd22db898131825ac9f2fec7e3b8c907779a0b Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 10 Nov 2020 10:39:45 +0100 Subject: [PATCH 41/62] we need to return a tensor in tf.cond instead of None --- rasa/core/policies/ted_policy.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 41c29231242b..698114e06251 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -941,7 +941,7 @@ def _encode_fake_features_per_attribute( attribute_features = tf.zeros( (batch_dim, dialogue_dim, units), dtype=tf.float32 ) - return attribute_features, None + return attribute_features, tf.zeros(([1])) def _encode_real_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text @@ -956,7 +956,7 @@ def _encode_real_features_per_attribute( Returns: A tensor combining all features for `attribute` """ - text_transformer_output = None + text_transformer_output = tf.zeros([1]) if attribute in SEQUENCE_FEATURES_TO_ENCODE: # sequence_lengths contain `0` for "fake" features, while @@ -1071,6 +1071,7 @@ def _batch_loss_entities( # convert the output of the text sequence transformer to shape # batch-size x dialogue length x sequence length x 1 + # TODO text_transformer_output shape is unknown in non-eager mode text_seq_transformer_output = self._convert_to_original_shape( text_transformer_output, tf_batch_data[TEXT][MASK][0], @@ -1247,7 +1248,6 @@ def _convert_to_original_shape( batch_dim = tf.shape(attribute_mask)[0] dialogue_dim = tf.shape(attribute_mask)[1] - sequence_length = tf.shape(attribute_features)[1] units = attribute_features.shape[-1] # attribute_mask has shape (batch x dialogue_len x 1), remove last dimension @@ -1278,6 +1278,7 @@ def _convert_to_original_shape( indices = tf.stack([batch_indices, dialogue_indices], axis=1) if consider_sequence_dimension: + sequence_length = tf.shape(attribute_features)[1] shape = tf.convert_to_tensor( [batch_dim, dialogue_dim, sequence_length, units] ) @@ -1302,7 +1303,7 @@ def _process_batch_data( Tensor: encoding of all features in the batch, combined; """ # encode each attribute present in tf_batch_data - text_transformer_output = None + text_transformer_output = tf.zeros([1]) batch_encoded = {} for key in tf_batch_data.keys(): @@ -1312,7 +1313,7 @@ def _process_batch_data( ) batch_encoded[key] = attribute_features - if _text_transformer_output is not None: + if tf.reduce_max(_text_transformer_output) > 0: text_transformer_output = _text_transformer_output # if both action text and action name are present, combine them; otherwise, From cd69de9a3eec71191801c9901275d9fa92507236 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 10 Nov 2020 12:34:06 +0100 Subject: [PATCH 42/62] create entity tags for all texts --- rasa/core/featurizers/single_state_featurizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 7607b37427a9..a39f161faba6 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -136,7 +136,7 @@ def _create_entity_tag_features( return [] parsed_text = interpreter.featurize_message(Message({TEXT: sub_state[TEXT]})) - entities = [dict(entity) for entity in sub_state[ENTITIES]] + entities = sub_state.get(ENTITIES, []) tag_id_mapping = self.get_entity_tag_ids() _tags = [] @@ -261,7 +261,7 @@ def encode_state( state_features.update( self._extract_state_features(sub_state, interpreter, sparse=True) ) - if sub_state.get(ENTITIES): + if sub_state.get(TEXT): state_features[ENTITIES] = self._create_features( sub_state, ENTITIES, sparse=True ) + self._create_entity_tag_features(sub_state, interpreter) From 8e8af875e990115f87ad1f48515ce99cadd7d2bd Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 10 Nov 2020 17:22:07 +0100 Subject: [PATCH 43/62] update batch loss entities (not yet working) --- rasa/core/policies/ted_policy.py | 179 ++++++++++++++----------------- 1 file changed, 83 insertions(+), 96 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 698114e06251..a7cac2b10880 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -941,7 +941,7 @@ def _encode_fake_features_per_attribute( attribute_features = tf.zeros( (batch_dim, dialogue_dim, units), dtype=tf.float32 ) - return attribute_features, tf.zeros(([1])) + return attribute_features, tf.zeros(([0, 0, units])) def _encode_real_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text @@ -956,7 +956,7 @@ def _encode_real_features_per_attribute( Returns: A tensor combining all features for `attribute` """ - text_transformer_output = tf.zeros([1]) + text_transformer_output = tf.zeros([0, 0, 0]) if attribute in SEQUENCE_FEATURES_TO_ENCODE: # sequence_lengths contain `0` for "fake" features, while @@ -1037,62 +1037,47 @@ def _batch_loss_entities( tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], dialogue_transformer_output: tf.Tensor, text_transformer_output: tf.Tensor, - ) -> List[tf.Tensor]: + ) -> tf.Tensor: if ENTITY_ATTRIBUTE_TYPE not in tf_batch_data.get(ENTITIES, {}): - return [] + return tf.constant(0) + + # TODO tf.cond + if tf.shape(text_transformer_output)[0] == 0: + return tf.constant(0) # To calculate the loss for entities we need the output of the text # sequence transformer (shape: combined batch dialogue dimension x # sequence length x units), the output of the dialogue transformer # (shape: batch size x dialogue length x units) and the tag ids for the # entities (shape: combined batch dialogue dimension x sequence length x units) - # As the combined batch dialogue dimension for the output of the text sequence - # transformer and the tag ids differ, all tensors have different shapes. + # The combined batch dialogue dimension for the text sequence transformer + # and the tag ids matches. # In order to process the tensors, they need to have the same shape. - # Convert all tensors to the same - # combined batch dialogue dimension x sequence length x units - # shape. + # Convert the output of the dialogue transformer to shape + # (combined batch dialogue dimension x sequence length x units). # Note: The CRF layer cannot handle 4D tensors. E.g. we cannot use the shape # batch size x dialogue length x sequence length x units - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - tag_ids = tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0] # add a zero (no entity) for the sentence features to match the shape of # inputs tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) - # convert tag ids to shape batch-size x dialogue length x sequence length x 1 - tag_ids = self._convert_to_original_shape( - tag_ids, tf_batch_data[ENTITIES][MASK][0], dialogue_lengths, True - ) - # convert tag ids to shape - # combined batch dialogue dimension x sequence length x 1 - tag_ids = self._combine_batch_and_dialogue_dimension(tag_ids, tf_batch_data) - - # convert the output of the text sequence transformer to shape - # batch-size x dialogue length x sequence length x 1 - # TODO text_transformer_output shape is unknown in non-eager mode - text_seq_transformer_output = self._convert_to_original_shape( - text_transformer_output, - tf_batch_data[TEXT][MASK][0], - dialogue_lengths, - True, - ) - # convert the output of the text sequence transformer to shape - # combined batch dialogue dimension x sequence length x units - text_seq_transformer_output = self._combine_batch_and_dialogue_dimension( - text_seq_transformer_output, tf_batch_data - ) # convert the output of the dialogue transformer to shape # combined batch dialogue dimension x sequence length x units + batch_dim = tf.shape(dialogue_transformer_output)[0] dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( dialogue_transformer_output, tf_batch_data ) + # get only the dialogues that contain a user utterance + dialogue_transformer_output = tf.boolean_mask( + dialogue_transformer_output, + tf.squeeze(tf_batch_data[TEXT][SEQUENCE_LENGTH][0], axis=-1), + ) # repeat the dialogue transformer output sequence-length-times to get the # same shape as the text sequence transformer output - sequence_dimension = tf.shape(tag_ids)[1] + sequence_dimension = tf.shape(text_transformer_output)[1] dialogue_transformer_output = tf.repeat( tf.expand_dims(dialogue_transformer_output, axis=1), sequence_dimension, @@ -1102,9 +1087,7 @@ def _batch_loss_entities( # sequence transformer (adding context) # resulting shape # (combined batch and dialogue dimension x sequence length x units) - text_transformed = tf.add( - text_seq_transformer_output, dialogue_transformer_output - ) + text_transformed = tf.add(text_transformer_output, dialogue_transformer_output) # we need the sequence length and the mask for the CRF layer _sequence_lengths = tf_batch_data[TEXT][SEQUENCE_LENGTH][0] @@ -1118,35 +1101,48 @@ def _batch_loss_entities( sequence_lengths = _sequence_lengths + 1 mask = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) - # convert mask and sequence length to correct shape - mask = self._convert_to_original_shape( - mask, tf_batch_data[TEXT][MASK][0], dialogue_lengths, True - ) - mask = self._combine_batch_and_dialogue_dimension(mask, tf_batch_data) - sequence_lengths = self._convert_to_original_shape( - tf.expand_dims(sequence_lengths, axis=-1), - tf_batch_data[TEXT][MASK][0], - dialogue_lengths, - True, - ) - sequence_lengths = self._combine_batch_and_dialogue_dimension( - sequence_lengths, tf_batch_data - ) - if self.max_history_tracker_featurizer_used: - batch_dim = tf.size(dialogue_lengths) - - # the first dimension of text transformed is the combined batch and dialogue - # dimension, which corresponds to the sum of all dialogue lengths # if the max history tracker featurizer is used we just want the last - # dialogues of every batch example - - # TODO the last dialogue turn might not contain any entities + # dialogues that contain a user utterance for every batch example + + # the attribute mask indicates which dialogue contains a user utterance + attribute_mask = tf_batch_data[TEXT][MASK][0] + # get indices of all dialogues that contain a user utterance + # shape: (combined batch dialogue dimension x 2) + # TODO it seems like there are sometimes dialogues that do not have any + # text features, but that should not be + indices_of_text_dialogues = tf.where( + tf.not_equal(tf.squeeze(attribute_mask), 0) + ) + # get the index of the last dialogues indices for every batch example + indices_of_last_text_dialogue_indices = ( + tf.cumsum( + tf.squeeze( + tf.cast(tf.reduce_sum(attribute_mask, axis=1), dtype=tf.int32) + ) + ) + - 1 + ) + # get only those the indices_of_text_dialogues of the last dialogues + # resulting shape of indices (batch size x 2) + indices_of_text_dialogues = tf.gather( + indices_of_text_dialogues, indices_of_last_text_dialogue_indices + ) - # get the indices of all last dialogues - last_dialogue_indices = tf.cumsum(dialogue_lengths) - 1 + # We now hove the indices of the relevant dialogues. However, + # text_transformed has a different shape (first dimension is the combined + # batch dialogue dimension). Thus we need to map the + # indices_of_text_dialogues into this shape. + cumsum_sequence_length = tf.squeeze( + tf.cast(tf.cumsum(sequence_lengths, axis=0), dtype=tf.int32) + ) + last_dialogue_indices = tf.map_fn( + lambda x: cumsum_sequence_length[x[0]] + x[1], + tf.cast(indices_of_text_dialogues, dtype=tf.int32), + ) - # build up indices to get the last dialogues from text_transformed + # build up indices to get the last dialogues from text_transformed and the + # other tensors dialogue_indices = tf.repeat( tf.expand_dims(last_dialogue_indices, axis=1), sequence_dimension, @@ -1163,13 +1159,8 @@ def _batch_loss_entities( # do the same for the other tensors tag_ids = tf.gather_nd(tag_ids, indices) mask = tf.gather_nd(mask, indices) - sequence_lengths = tf.gather( - tf.squeeze(sequence_lengths), last_dialogue_indices - ) - - # TODO - # inside the LSTM of the CRF layer the check len(mask.shape) == 2 - # fails. mask is created from the sequence length. + # as sequence_lengths is a 1D tensor use tf.gather instead of tf.gather_nd + sequence_lengths = tf.gather(sequence_lengths, last_dialogue_indices) loss, f1, _ = self._calculate_entity_loss( text_transformed, @@ -1182,7 +1173,7 @@ def _batch_loss_entities( self.entity_loss.update_state(loss) self.entity_f1.update_state(f1) - return [loss] + return loss @staticmethod def _combine_batch_and_dialogue_dimension( @@ -1203,7 +1194,7 @@ def _combine_batch_and_dialogue_dimension( """ dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - batch_dim = tf.size(dialogue_lengths) + batch_dim = tf.shape(dialogue_lengths)[0] batch_indices = tf.repeat(tf.range(batch_dim), dialogue_lengths) dialogue_indices = ( tf.map_fn( @@ -1303,17 +1294,17 @@ def _process_batch_data( Tensor: encoding of all features in the batch, combined; """ # encode each attribute present in tf_batch_data - text_transformer_output = tf.zeros([1]) + text_transformer_output = tf.zeros([0, 0, 0]) batch_encoded = {} for key in tf_batch_data.keys(): if LABEL_KEY not in key and DIALOGUE not in key: - attribute_features, _text_transformer_output = self._encode_features_per_attribute( - tf_batch_data, key - ) - + ( + attribute_features, + _text_transformer_output, + ) = self._encode_features_per_attribute(tf_batch_data, key) batch_encoded[key] = attribute_features - if tf.reduce_max(_text_transformer_output) > 0: + if tf.shape(_text_transformer_output)[0] > 0: text_transformer_output = _text_transformer_output # if both action text and action name are present, combine them; otherwise, @@ -1382,9 +1373,11 @@ def batch_loss( labels_embed = self._get_labels_embed(label_ids, all_labels_embed) dialogue_in, text_transformer_output = self._process_batch_data(tf_batch_data) - dialogue_embed, dialogue_mask, dialogue_transformer_output = self._emebed_dialogue( - dialogue_in, tf_batch_data - ) + ( + dialogue_embed, + dialogue_mask, + dialogue_transformer_output, + ) = self._emebed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) losses = [] @@ -1399,12 +1392,8 @@ def batch_loss( ) losses.append(loss) - if ( - self.config[ENTITY_RECOGNITION] - and dialogue_transformer_output is not None - and text_transformer_output is not None - ): - losses.extend( + if self.config[ENTITY_RECOGNITION]: + losses.append( self._batch_loss_entities( tf_batch_data, dialogue_transformer_output, text_transformer_output ) @@ -1440,18 +1429,16 @@ def batch_predict( ) dialogue_in, text_transformer_output = self._process_batch_data(tf_batch_data) - dialogue_embed, dialogue_mask, dialogue_transformer_output = self._emebed_dialogue( - dialogue_in, tf_batch_data - ) + ( + dialogue_embed, + dialogue_mask, + dialogue_transformer_output, + ) = self._emebed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) predictions = {} - if ( - self.config[ENTITY_RECOGNITION] - and dialogue_transformer_output is not None - and text_transformer_output is not None - ): + if self.config[ENTITY_RECOGNITION]: predictions.update( self._batch_predict_entities( tf_batch_data, dialogue_transformer_output, text_transformer_output @@ -1480,6 +1467,8 @@ def _batch_predict_entities( ) -> Dict[Text, tf.Tensor]: predictions: Dict[Text, tf.Tensor] = {} + # TODO Update according to batch loss entities + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) # convert the output of the text sequence transformer to shape @@ -1548,8 +1537,6 @@ def _batch_predict_entities( # if the max history tracker featurizer is used we just want the last # dialogues of every batch example - # TODO the last dialogue turn might not contain any entities - # get the indices of all last dialogues last_dialogue_indices = tf.cumsum(dialogue_lengths) - 1 From d1f7e978fb76d420f1cd0493ce167b62be890a80 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Wed, 11 Nov 2020 21:54:09 +0100 Subject: [PATCH 44/62] input to entity loss --- .../featurizers/single_state_featurizer.py | 115 ++--- rasa/core/featurizers/tracker_featurizers.py | 92 +++- rasa/core/policies/memoization.py | 19 +- rasa/core/policies/policy.py | 11 +- rasa/core/policies/sklearn_policy.py | 2 +- rasa/core/policies/ted_policy.py | 466 +++++++++--------- rasa/nlu/classifiers/diet_classifier.py | 4 +- rasa/shared/core/domain.py | 12 +- rasa/shared/core/events.py | 2 +- rasa/shared/core/generator.py | 17 +- rasa/shared/core/trackers.py | 26 +- rasa/shared/nlu/constants.py | 1 + rasa/shared/nlu/training_data/features.py | 10 - rasa/utils/tensorflow/constants.py | 1 + rasa/utils/tensorflow/models.py | 12 +- 15 files changed, 411 insertions(+), 379 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index a39f161faba6..e3ed3712ad76 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -1,7 +1,7 @@ import logging import numpy as np import scipy.sparse -from typing import List, Optional, Dict, Text, Set +from typing import List, Optional, Dict, Text, Set, Any from collections import defaultdict import rasa.shared.utils.io @@ -17,14 +17,15 @@ ACTION_TEXT, ACTION_NAME, INTENT, - FEATURE_TYPE_SEQUENCE, TEXT, NO_ENTITY_TAG, ENTITY_ATTRIBUTE_TYPE, + ENTITY_TAGS, ) from rasa.shared.nlu.training_data.features import Features from rasa.shared.nlu.training_data.message import Message from rasa.utils.tensorflow.model_data_utils import TAG_ID_ORIGIN +from rasa.utils.tensorflow.constants import IDS logger = logging.getLogger(__name__) @@ -42,6 +43,23 @@ class SingleStateFeaturizer: def __init__(self) -> None: self._default_feature_states = {} self.action_texts = [] + self.tag_id_mapping = {} + + def get_entity_tag_ids(self) -> Dict[Text, int]: + """Returns the tag to index mapping for entities. + + Returns: + Tag to index mapping. + """ + if ENTITIES not in self._default_feature_states: + return {} + + tag_ids = { + tag: idx + 1 # +1 to keep 0 for the NO_ENTITY_TAG + for tag, idx in self._default_feature_states[ENTITIES].items() + } + tag_ids[NO_ENTITY_TAG] = 0 + return tag_ids def prepare_from_domain(self, domain: Domain) -> None: """Gets necessary information for featurization from domain. @@ -61,6 +79,7 @@ def convert_to_dict(feature_states: List[Text]) -> Dict[Text, int]: self._default_feature_states[SLOTS] = convert_to_dict(domain.slot_states) self._default_feature_states[ACTIVE_LOOP] = convert_to_dict(domain.form_names) self.action_texts = domain.action_texts + self.tag_id_mapping = self.get_entity_tag_ids() def _state_features_for_attribute( self, sub_state: SubState, attribute: Text @@ -68,7 +87,7 @@ def _state_features_for_attribute( if attribute in {INTENT, ACTION_NAME}: return {sub_state[attribute]: 1} elif attribute == ENTITIES: - return {entity["entity"]: 1 for entity in sub_state.get(ENTITIES, [])} + return {entity: 1 for entity in sub_state.get(ENTITIES, [])} elif attribute == ACTIVE_LOOP: return {sub_state["name"]: 1} elif attribute == SLOTS: @@ -104,58 +123,6 @@ def _create_features( ) return [features] - def get_entity_tag_ids(self) -> Dict[Text, int]: - """Returns the tag to index mapping for entities. - - Returns: - Tag to index mapping. - """ - if ENTITIES not in self._default_feature_states: - return {} - - tag_ids = { - tag: idx + 1 # +1 to keep 0 for the NO_ENTITY_TAG - for tag, idx in self._default_feature_states[ENTITIES].items() - } - tag_ids[NO_ENTITY_TAG] = 0 - return tag_ids - - def _create_entity_tag_features( - self, sub_state: SubState, interpreter: NaturalLanguageInterpreter - ) -> List["Features"]: - from rasa.nlu.test import determine_token_labels - - # TODO - # The entity states used to create the tag-idx-mapping contains the - # entities and the concatenated entity and roles/groups. We do not - # distinguish between entities and roles/groups right now. - # TODO - # Should we support BILOU tagging? - - if TEXT not in sub_state: - return [] - - parsed_text = interpreter.featurize_message(Message({TEXT: sub_state[TEXT]})) - entities = sub_state.get(ENTITIES, []) - tag_id_mapping = self.get_entity_tag_ids() - - _tags = [] - for token in parsed_text.get(TOKENS_NAMES[TEXT]): - _tag = determine_token_labels( - token, entities, attribute_key=ENTITY_ATTRIBUTE_TYPE - ) - _tags.append(tag_id_mapping[_tag]) - - # transpose to have seq_len x 1 - return [ - Features( - np.array([_tags]).T, - FEATURE_TYPE_SEQUENCE, - ENTITY_ATTRIBUTE_TYPE, - TAG_ID_ORIGIN, - ) - ] - @staticmethod def _to_sparse_sentence_features( sparse_sequence_features: List["Features"], @@ -261,10 +228,10 @@ def encode_state( state_features.update( self._extract_state_features(sub_state, interpreter, sparse=True) ) - if sub_state.get(TEXT): + if sub_state.get(ENTITIES): state_features[ENTITIES] = self._create_features( sub_state, ENTITIES, sparse=True - ) + self._create_entity_tag_features(sub_state, interpreter) + ) if state_type in {SLOTS, ACTIVE_LOOP}: state_features[state_type] = self._create_features( @@ -273,6 +240,40 @@ def encode_state( return state_features + def encode_entity( + self, entity_data: Dict[Text, Any], interpreter: NaturalLanguageInterpreter + ) -> Dict[Text, List["Features"]]: + from rasa.nlu.test import determine_token_labels + + # TODO + # The entity states used to create the tag-idx-mapping contains the + # entities and the concatenated entity and roles/groups. We do not + # distinguish between entities and roles/groups right now. + # TODO + # Should we support BILOU tagging? + + if TEXT not in entity_data or len(self.tag_id_mapping) < 2: + # we cannot build a classifier if there are less than 2 class + return {} + + parsed_text = interpreter.featurize_message(Message({TEXT: entity_data[TEXT]})) + entities = entity_data.get(ENTITIES, []) + + _tags = [] + for token in parsed_text.get(TOKENS_NAMES[TEXT]): + _tag = determine_token_labels( + token, entities, attribute_key=ENTITY_ATTRIBUTE_TYPE + ) + # TODO handle if tag is not in mapping + _tags.append(self.tag_id_mapping[_tag]) + + # transpose to have seq_len x 1 + return { + ENTITY_TAGS: [ + Features(np.array([_tags]).T, IDS, ENTITY_TAGS, TAG_ID_ORIGIN,) + ] + } + def _encode_action( self, action: Text, interpreter: NaturalLanguageInterpreter ) -> Dict[Text, List["Features"]]: diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py index 4f250c535ea8..4973b1da98a7 100644 --- a/rasa/core/featurizers/tracker_featurizers.py +++ b/rasa/core/featurizers/tracker_featurizers.py @@ -3,15 +3,15 @@ import jsonpickle import logging -from rasa.shared.nlu.constants import TEXT, INTENT +from rasa.shared.nlu.constants import TEXT, INTENT, ENTITIES from rasa.shared.exceptions import RasaException from tqdm import tqdm -from typing import Tuple, List, Optional, Dict, Text, Union +from typing import Tuple, List, Optional, Dict, Text, Union, Any import numpy as np from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer from rasa.shared.core.domain import State, Domain -from rasa.shared.core.events import ActionExecuted +from rasa.shared.core.events import ActionExecuted, UserUttered from rasa.shared.core.trackers import ( DialogueStateTracker, is_prev_action_listen_in_state, @@ -91,6 +91,34 @@ def _convert_labels_to_ids( ] ) + def _create_entity_tags( + self, + trackers_as_entities: List[List[Dict[Text, Any]]], + interpreter: NaturalLanguageInterpreter, + ) -> List[List[Dict[Text, List["Features"]]]]: + return [ + [ + self.state_featurizer.encode_entity(entity_data, interpreter) + for entity_data in trackers_entities + ] + for trackers_entities in trackers_as_entities + ] + + @staticmethod + def _entity_data(event: UserUttered) -> Dict[Text, Any]: + if event.text: + return {TEXT: event.text, ENTITIES: event.entities} + + # input is not textual, so add empty dict + return {} + + def training_states_actions_and_entities( + self, trackers: List[DialogueStateTracker], domain: Domain + ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: + raise NotImplementedError( + "Featurizer must have the capacity to encode trackers to feature vectors" + ) + def training_states_and_actions( self, trackers: List[DialogueStateTracker], domain: Domain ) -> Tuple[List[List[State]], List[List[Text]]]: @@ -103,16 +131,23 @@ def training_states_and_actions( Returns: A tuple of list of states and list of actions. """ - raise NotImplementedError( - "Featurizer must have the capacity to encode trackers to feature vectors" - ) + ( + trackers_as_states, + trackers_as_actions, + _, + ) = self.training_states_actions_and_entities(trackers, domain) + return trackers_as_states, trackers_as_actions def featurize_trackers( self, trackers: List[DialogueStateTracker], domain: Domain, interpreter: NaturalLanguageInterpreter, - ) -> Tuple[List[List[Dict[Text, List["Features"]]]], np.ndarray]: + ) -> Tuple[ + List[List[Dict[Text, List["Features"]]]], + np.ndarray, + List[List[Dict[Text, List["Features"]]]], + ]: """Featurize the training trackers. Args: @@ -137,14 +172,17 @@ def featurize_trackers( self.state_featurizer.prepare_from_domain(domain) - trackers_as_states, trackers_as_actions = self.training_states_and_actions( - trackers, domain - ) + ( + trackers_as_states, + trackers_as_actions, + trackers_as_entities, + ) = self.training_states_actions_and_entities(trackers, domain) tracker_state_features = self._featurize_states(trackers_as_states, interpreter) label_ids = self._convert_labels_to_ids(trackers_as_actions, domain) + entity_tags = self._create_entity_tags(trackers_as_entities, interpreter) - return tracker_state_features, label_ids + return tracker_state_features, label_ids, entity_tags @staticmethod def _choose_last_user_input( @@ -252,9 +290,9 @@ class FullDialogueTrackerFeaturizer(TrackerFeaturizer): Training data is padded up to the length of the longest dialogue with -1. """ - def training_states_and_actions( + def training_states_actions_and_entities( self, trackers: List[DialogueStateTracker], domain: Domain - ) -> Tuple[List[List[State]], List[List[Text]]]: + ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: """Transforms list of trackers to lists of states and actions. Training data is padded up to the length of the longest dialogue with -1. @@ -269,6 +307,7 @@ def training_states_and_actions( trackers_as_states = [] trackers_as_actions = [] + trackers_as_entities = [] logger.debug( "Creating states and action examples from " @@ -285,7 +324,12 @@ def training_states_and_actions( delete_first_state = False actions = [] + entities = [] + entity_data = {} for event in tracker.applied_events(): + if isinstance(event, UserUttered): + entity_data = self._entity_data(event) + if not isinstance(event, ActionExecuted): continue @@ -293,6 +337,7 @@ def training_states_and_actions( # only actions which can be # predicted at a stories start actions.append(event.action_name or event.action_text) + entities.append(entity_data) else: # unpredictable actions can be # only the first in the story @@ -303,13 +348,17 @@ def training_states_and_actions( ) delete_first_state = True + # reset entity_data for the the next turn + entity_data = {} + if delete_first_state: states = states[1:] trackers_as_states.append(states[:-1]) trackers_as_actions.append(actions) + trackers_as_entities.append(entities) - return trackers_as_states, trackers_as_actions + return trackers_as_states, trackers_as_actions, trackers_as_entities def prediction_states( self, @@ -386,9 +435,9 @@ def _hash_example( frozen_actions = (action,) return hash((frozen_states, frozen_actions)) - def training_states_and_actions( + def training_states_actions_and_entities( self, trackers: List[DialogueStateTracker], domain: Domain - ) -> Tuple[List[List[State]], List[List[Text]]]: + ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: """Transforms list of trackers to lists of states and actions. Training data is padded up to the length of the longest dialogue with -1. @@ -403,6 +452,7 @@ def training_states_and_actions( trackers_as_states = [] trackers_as_actions = [] + trackers_as_entities = [] # from multiple states that create equal featurizations # we only need to keep one. @@ -422,7 +472,11 @@ def training_states_and_actions( states = self._create_states(tracker, domain) states_length_for_action = 0 + entity_data = {} for event in tracker.applied_events(): + if isinstance(event, UserUttered): + entity_data = self._entity_data(event) + if not isinstance(event, ActionExecuted): continue @@ -448,15 +502,19 @@ def training_states_and_actions( trackers_as_actions.append( [event.action_name or event.action_text] ) + trackers_as_entities.append([entity_data]) else: trackers_as_states.append(sliced_states) trackers_as_actions.append([event.action_name or event.action_text]) + trackers_as_entities.append([entity_data]) + # reset entity_data for the the next turn + entity_data = {} pbar.set_postfix({"# actions": "{:d}".format(len(trackers_as_actions))}) logger.debug("Created {} action examples.".format(len(trackers_as_actions))) - return trackers_as_states, trackers_as_actions + return trackers_as_states, trackers_as_actions, trackers_as_entities def prediction_states( self, diff --git a/rasa/core/policies/memoization.py b/rasa/core/policies/memoization.py index 7170071d17ad..8510ab9c6852 100644 --- a/rasa/core/policies/memoization.py +++ b/rasa/core/policies/memoization.py @@ -23,8 +23,6 @@ from rasa.shared.core.generator import TrackerWithCachedStates from rasa.shared.utils.io import is_logging_disabled from rasa.core.constants import MEMOIZATION_POLICY_PRIORITY -from rasa.shared.core.constants import USER -from rasa.shared.nlu.constants import ENTITIES, ENTITY_ATTRIBUTE_TYPE logger = logging.getLogger(__name__) @@ -161,22 +159,7 @@ def _create_feature_key(self, states: List[State]) -> Text: # we sort keys to make sure that the same states # represented as dictionaries have the same json strings # quotes are removed for aesthetic reasons - - # Ignore the actual values of entities - # We are just interested whether an entity of a certain type was detected or not - _states = [] - for state in states: - _state = {} - for key, value in state.items(): - _state[key] = copy.deepcopy(value) - if USER == key and ENTITIES in _state[USER]: - _state[USER][ENTITIES] = [ - entity[ENTITY_ATTRIBUTE_TYPE] - for entity in _state[USER][ENTITIES] - ] - _states.append(_state) - - feature_str = json.dumps(_states, sort_keys=True).replace('"', "") + feature_str = json.dumps(states, sort_keys=True).replace('"', "") if self.ENABLE_FEATURE_STRING_COMPRESSION: compressed = zlib.compress( bytes(feature_str, rasa.shared.utils.io.DEFAULT_ENCODING) diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py index 0d5db5a6c01b..6517b8e3362b 100644 --- a/rasa/core/policies/policy.py +++ b/rasa/core/policies/policy.py @@ -143,7 +143,11 @@ def featurize_for_training( domain: Domain, interpreter: NaturalLanguageInterpreter, **kwargs: Any, - ) -> Tuple[List[List[Dict[Text, List["Features"]]]], np.ndarray]: + ) -> Tuple[ + List[List[Dict[Text, List["Features"]]]], + np.ndarray, + List[List[Dict[Text, List["Features"]]]], + ]: """Transform training trackers into a vector representation. The trackers, consisting of multiple turns, will be transformed @@ -163,7 +167,7 @@ def featurize_for_training( trackers """ - state_features, label_ids = self.featurizer.featurize_trackers( + state_features, label_ids, entity_tags = self.featurizer.featurize_trackers( training_trackers, domain, interpreter ) @@ -175,8 +179,9 @@ def featurize_for_training( ) state_features = state_features[:max_training_samples] label_ids = label_ids[:max_training_samples] + entity_tags = entity_tags[:max_training_samples] - return state_features, label_ids + return state_features, label_ids, entity_tags def train( self, diff --git a/rasa/core/policies/sklearn_policy.py b/rasa/core/policies/sklearn_policy.py index 93abf244d931..0126e60e15f7 100644 --- a/rasa/core/policies/sklearn_policy.py +++ b/rasa/core/policies/sklearn_policy.py @@ -233,7 +233,7 @@ def train( interpreter: NaturalLanguageInterpreter, **kwargs: Any, ) -> None: - tracker_state_features, label_ids = self.featurize_for_training( + tracker_state_features, label_ids, _ = self.featurize_for_training( training_trackers, domain, interpreter, **kwargs ) training_data, zero_state_features = model_data_utils.convert_to_data_format( diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index a7cac2b10880..65a6c376825e 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -28,6 +28,7 @@ VALID_FEATURE_TYPES, FEATURE_TYPE_SENTENCE, ENTITY_ATTRIBUTE_TYPE, + ENTITY_TAGS, ) from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter from rasa.core.policies.policy import Policy @@ -45,6 +46,7 @@ from rasa.utils.tensorflow.model_data_utils import convert_to_data_format from rasa.utils.tensorflow.constants import ( LABEL, + IDS, TRANSFORMER_SIZE, NUM_TRANSFORMER_LAYERS, NUM_HEADS, @@ -104,7 +106,7 @@ logger = logging.getLogger(__name__) LABEL_KEY = LABEL -LABEL_SUB_KEY = "ids" +LABEL_SUB_KEY = IDS LENGTH = "length" SENTENCE_FEATURES_TO_ENCODE = [INTENT, TEXT, ACTION_NAME, ACTION_TEXT] SEQUENCE_FEATURES_TO_ENCODE = [TEXT, ACTION_TEXT, f"{LABEL}_{ACTION_TEXT}"] @@ -138,7 +140,7 @@ class TEDPolicy(Policy): # Hidden layer sizes for layers before the dialogue and label embedding layers. # The number of hidden layers is equal to the length of the corresponding # list. - # TODO add 2 parallel NNs: transformer for text and ffnn for names + # Hidden layer sizes for layers before the embedding layers for user message # and labels. # The number of hidden layers is equal to the length of the corresponding @@ -157,10 +159,14 @@ class TEDPolicy(Policy): }, CONCAT_DIMENSION: {TEXT: 128, ACTION_TEXT: 128, f"{LABEL}_{ACTION_TEXT}": 128}, ENCODING_DIMENSION: 50, - # Number of units in transformer + # Number of units in sequence transformer TRANSFORMER_SIZE: 128, - # Number of transformer layers + # Number of sequence transformer layers NUM_TRANSFORMER_LAYERS: 1, + # Number of units in dialogue transformer + f"{DIALOGUE}_{TRANSFORMER_SIZE}": 128, + # Number of dialogue transformer layers + f"{DIALOGUE}_{NUM_TRANSFORMER_LAYERS}": 1, # Number of attention heads in transformer NUM_HEADS: 4, # If 'True' use key relative embeddings in attention @@ -360,6 +366,7 @@ def _create_model_data( self, tracker_state_features: List[List[Dict[Text, List["Features"]]]], label_ids: Optional[np.ndarray] = None, + entity_tags: Optional[List[List[Dict[Text, List["Features"]]]]] = None, encoded_all_labels: Optional[List[Dict[Text, List["Features"]]]] = None, ) -> RasaModelData: """Combine all model related data into RasaModelData. @@ -377,7 +384,11 @@ def _create_model_data( """ model_data = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY) - if label_ids is not None and encoded_all_labels is not None: + if ( + label_ids is not None + and entity_tags is not None + and encoded_all_labels is not None + ): label_ids = np.array( [np.expand_dims(seq_label_ids, -1) for seq_label_ids in label_ids] @@ -391,6 +402,19 @@ def _create_model_data( attribute_data, self.zero_state_features = convert_to_data_format( tracker_state_features, featurizers=self.config[FEATURIZERS] ) + if self.config[ENTITY_RECOGNITION]: + # check that there are real entity tags + if any([any(turn_tags) for turn_tags in entity_tags]): + entity_tags_data, _ = convert_to_data_format(entity_tags) + model_data.add_data(entity_tags_data) + else: + # there are no "real" entity tags + logger.debug( + f"Entity recognition cannot be performed," + f"set {ENTITY_RECOGNITION} to False" + ) + self.config[ENTITY_RECOGNITION] = False + else: # method is called during prediction attribute_data, _ = convert_to_data_format( @@ -401,9 +425,6 @@ def _create_model_data( model_data.add_data(attribute_data) model_data.add_lengths(TEXT, SEQUENCE_LENGTH, TEXT, SEQUENCE) - model_data.add_lengths( - ENTITIES, SEQUENCE_LENGTH, ENTITIES, ENTITY_ATTRIBUTE_TYPE - ) model_data.add_lengths(ACTION_TEXT, SEQUENCE_LENGTH, ACTION_TEXT, SEQUENCE) # add the dialogue lengths @@ -437,7 +458,7 @@ def train( return # dealing with training data - tracker_state_features, label_ids = self.featurize_for_training( + tracker_state_features, label_ids, entity_tags = self.featurize_for_training( training_trackers, domain, interpreter, **kwargs ) @@ -447,7 +468,7 @@ def train( # extract actual training data to feed to model model_data = self._create_model_data( - tracker_state_features, label_ids, encoded_all_labels + tracker_state_features, label_ids, entity_tags, encoded_all_labels ) if model_data.is_empty(): logger.error( @@ -768,7 +789,11 @@ def _prepare_layers(self) -> None: self._prepare_encoding_layers(name) self._prepare_transformer_layer( - DIALOGUE, self.config[DROP_RATE_DIALOGUE], self.config[DROP_RATE_ATTENTION] + DIALOGUE, + self.config[f"{DIALOGUE}_{NUM_TRANSFORMER_LAYERS}"], + self.config[f"{DIALOGUE}_{TRANSFORMER_SIZE}"], + self.config[DROP_RATE_DIALOGUE], + self.config[DROP_RATE_ATTENTION], ) self._prepare_embed_layers(DIALOGUE) @@ -841,7 +866,7 @@ def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: all_labels_encoded = {} for key in self.tf_label_data.keys(): if key != LABEL_KEY: - attribute_features, _ = self._encode_real_features_per_attribute( + attribute_features, _, _ = self._encode_real_features_per_attribute( self.tf_label_data, key ) all_labels_encoded[key] = attribute_features @@ -880,8 +905,6 @@ def _emebed_dialogue( ) dialogue_transformed = tfa.activations.gelu(dialogue_transformed) - dialogue_transformer_output = dialogue_transformed - if self.max_history_tracker_featurizer_used: # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( @@ -891,11 +914,11 @@ def _emebed_dialogue( dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed) - return dialogue_embed, mask, dialogue_transformer_output + return dialogue_embed, mask, dialogue_transformed def _encode_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: # The input is a representation of 4d tensor of # shape (batch-size x dialogue-len x sequence-len x units) in 3d of shape # (sum of dialogue history length for all tensors in the batch x @@ -921,7 +944,7 @@ def _encode_features_per_attribute( def _encode_fake_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: attribute_features_list = tf_batch_data[attribute][SENTENCE] attribute_mask = tf_batch_data[attribute][MASK][0] @@ -941,11 +964,33 @@ def _encode_fake_features_per_attribute( attribute_features = tf.zeros( (batch_dim, dialogue_dim, units), dtype=tf.float32 ) - return attribute_features, tf.zeros(([0, 0, units])) + if attribute == TEXT: + # TODO handle the case if transformer is not created + # if self.config[f"{DIALOGUE}_{NUM_TRANSFORMER_LAYERS}"] > 0: + # units = self.config[f"{DIALOGUE}_{TRANSFORMER_SIZE}"] + # elif self.config[HIDDEN_LAYERS_SIZES][TEXT]: + # units = self.config[HIDDEN_LAYERS_SIZES][TEXT] + # else: + # for f in attribute_features_list: + # if isinstance(f, tf.SparseTensor): + # units += self.config[DENSE_DIMENSION][attribute] + # else: + # units += f.shape[-1] + + text_transformer_output = tf.zeros( + (0, 0, self.config[f"{DIALOGUE}_{TRANSFORMER_SIZE}"]), dtype=tf.float32 + ) + text_sequence_lengths = tf.zeros((0, 1), dtype=tf.int32) + else: + # simulate None with empty tensor of zeros + text_transformer_output = tf.zeros((0,)) + text_sequence_lengths = tf.zeros((0,)) + + return attribute_features, text_transformer_output, text_sequence_lengths def _encode_real_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """Encodes features for a given attribute. Args: @@ -956,23 +1001,25 @@ def _encode_real_features_per_attribute( Returns: A tensor combining all features for `attribute` """ - text_transformer_output = tf.zeros([0, 0, 0]) + # simulate None with empty tensor of zeros + text_transformer_output = tf.zeros((0,)) + text_sequence_lengths = tf.zeros((0,)) if attribute in SEQUENCE_FEATURES_TO_ENCODE: # sequence_lengths contain `0` for "fake" features, while # tf_batch_data[attribute] contain only "real" features _sequence_lengths = tf_batch_data[attribute][SEQUENCE_LENGTH][0] # extract only nonzero lengths and cast to int - _sequence_lengths = tf.cast( + sequence_lengths = tf.cast( tf.boolean_mask(_sequence_lengths, _sequence_lengths), dtype=tf.int32 ) # boolean mask returns flat tensor - _sequence_lengths = tf.expand_dims(_sequence_lengths, axis=-1) + sequence_lengths = tf.expand_dims(sequence_lengths, axis=-1) mask_sequence_text = tf.squeeze( - self._compute_mask(_sequence_lengths), axis=1 + self._compute_mask(sequence_lengths), axis=1 ) - sequence_lengths = _sequence_lengths + 1 + sequence_lengths = sequence_lengths + 1 mask_text = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) attribute_features, _, _, _ = self._create_sequence( @@ -989,6 +1036,44 @@ def _encode_real_features_per_attribute( if attribute == TEXT: text_transformer_output = attribute_features + text_sequence_lengths = sequence_lengths + + if self.max_history_tracker_featurizer_used: + # get the location of all last dialogue inputs + dialogue_lengths = tf.cast( + tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 + ) + # TODO precompute dialogue_indices after creation of tf_batch_data + dialogue_indices = ( + tf.map_fn( + tf.range, + dialogue_lengths, + fn_output_signature=tf.RaggedTensorSpec( + shape=[None], dtype=tf.int32 + ), + ) + ).values + last_dialogue_mask = tf.math.logical_not( + tf.cast( + tf.concat( + [dialogue_indices, tf.zeros((1,), dtype=tf.int32)], + axis=0, + )[1:], + dtype=tf.bool, + ) + ) + + # get only the indices of real text inputs + last_dialogue_mask = tf.boolean_mask( + last_dialogue_mask, tf.reshape(_sequence_lengths, (-1,)) + ) + # pick last vector if max history featurizer is used + text_transformer_output = tf.boolean_mask( + text_transformer_output, last_dialogue_mask + ) + text_sequence_lengths = tf.boolean_mask( + text_sequence_lengths, last_dialogue_mask + ) # resulting attribute features will have shape # combined batch dimension and dialogue length x 1 x units @@ -1006,7 +1091,7 @@ def _encode_real_features_per_attribute( tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}" ) - if attribute in set(SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE): + if attribute in SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: attribute_features = self._tf_layers[f"ffnn.{attribute}"]( attribute_features ) @@ -1014,7 +1099,7 @@ def _encode_real_features_per_attribute( # attribute_mask has shape batch x dialogue_len x 1 attribute_mask = tf_batch_data[attribute][MASK][0] - if attribute in set(SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES): + if attribute in SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES: dialogue_lengths = tf.cast( tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 ) @@ -1027,197 +1112,21 @@ def _encode_real_features_per_attribute( # convert them back to their original shape of # batch size x dialogue length x units attribute_features = self._convert_to_original_shape( - attribute_features, attribute_mask, dialogue_lengths, False - ) - - return attribute_features, text_transformer_output - - def _batch_loss_entities( - self, - tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], - dialogue_transformer_output: tf.Tensor, - text_transformer_output: tf.Tensor, - ) -> tf.Tensor: - if ENTITY_ATTRIBUTE_TYPE not in tf_batch_data.get(ENTITIES, {}): - return tf.constant(0) - - # TODO tf.cond - if tf.shape(text_transformer_output)[0] == 0: - return tf.constant(0) - - # To calculate the loss for entities we need the output of the text - # sequence transformer (shape: combined batch dialogue dimension x - # sequence length x units), the output of the dialogue transformer - # (shape: batch size x dialogue length x units) and the tag ids for the - # entities (shape: combined batch dialogue dimension x sequence length x units) - # The combined batch dialogue dimension for the text sequence transformer - # and the tag ids matches. - # In order to process the tensors, they need to have the same shape. - # Convert the output of the dialogue transformer to shape - # (combined batch dialogue dimension x sequence length x units). - # Note: The CRF layer cannot handle 4D tensors. E.g. we cannot use the shape - # batch size x dialogue length x sequence length x units - - tag_ids = tf_batch_data[ENTITIES][ENTITY_ATTRIBUTE_TYPE][0] - # add a zero (no entity) for the sentence features to match the shape of - # inputs - tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) - - # convert the output of the dialogue transformer to shape - # combined batch dialogue dimension x sequence length x units - batch_dim = tf.shape(dialogue_transformer_output)[0] - dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( - dialogue_transformer_output, tf_batch_data - ) - # get only the dialogues that contain a user utterance - dialogue_transformer_output = tf.boolean_mask( - dialogue_transformer_output, - tf.squeeze(tf_batch_data[TEXT][SEQUENCE_LENGTH][0], axis=-1), - ) - - # repeat the dialogue transformer output sequence-length-times to get the - # same shape as the text sequence transformer output - sequence_dimension = tf.shape(text_transformer_output)[1] - dialogue_transformer_output = tf.repeat( - tf.expand_dims(dialogue_transformer_output, axis=1), - sequence_dimension, - axis=1, - ) - # add the output of the dialogue transformer to the output of the text - # sequence transformer (adding context) - # resulting shape - # (combined batch and dialogue dimension x sequence length x units) - text_transformed = tf.add(text_transformer_output, dialogue_transformer_output) - - # we need the sequence length and the mask for the CRF layer - _sequence_lengths = tf_batch_data[TEXT][SEQUENCE_LENGTH][0] - # extract only nonzero lengths and cast to int - _sequence_lengths = tf.cast( - tf.boolean_mask(_sequence_lengths, _sequence_lengths), dtype=tf.int32 - ) - # boolean mask returns flat tensor - _sequence_lengths = tf.expand_dims(_sequence_lengths, axis=-1) - # + 1 for sentence features - sequence_lengths = _sequence_lengths + 1 - mask = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) - - if self.max_history_tracker_featurizer_used: - # if the max history tracker featurizer is used we just want the last - # dialogues that contain a user utterance for every batch example - - # the attribute mask indicates which dialogue contains a user utterance - attribute_mask = tf_batch_data[TEXT][MASK][0] - # get indices of all dialogues that contain a user utterance - # shape: (combined batch dialogue dimension x 2) - # TODO it seems like there are sometimes dialogues that do not have any - # text features, but that should not be - indices_of_text_dialogues = tf.where( - tf.not_equal(tf.squeeze(attribute_mask), 0) - ) - # get the index of the last dialogues indices for every batch example - indices_of_last_text_dialogue_indices = ( - tf.cumsum( - tf.squeeze( - tf.cast(tf.reduce_sum(attribute_mask, axis=1), dtype=tf.int32) - ) - ) - - 1 - ) - # get only those the indices_of_text_dialogues of the last dialogues - # resulting shape of indices (batch size x 2) - indices_of_text_dialogues = tf.gather( - indices_of_text_dialogues, indices_of_last_text_dialogue_indices - ) - - # We now hove the indices of the relevant dialogues. However, - # text_transformed has a different shape (first dimension is the combined - # batch dialogue dimension). Thus we need to map the - # indices_of_text_dialogues into this shape. - cumsum_sequence_length = tf.squeeze( - tf.cast(tf.cumsum(sequence_lengths, axis=0), dtype=tf.int32) - ) - last_dialogue_indices = tf.map_fn( - lambda x: cumsum_sequence_length[x[0]] + x[1], - tf.cast(indices_of_text_dialogues, dtype=tf.int32), - ) - - # build up indices to get the last dialogues from text_transformed and the - # other tensors - dialogue_indices = tf.repeat( - tf.expand_dims(last_dialogue_indices, axis=1), - sequence_dimension, - axis=1, - ) - sequence_indices = tf.repeat( - tf.expand_dims(tf.range(sequence_dimension), axis=0), batch_dim, axis=0 - ) - indices = tf.stack([dialogue_indices, sequence_indices], axis=2) - - # get all last dialogues from text_transformed using the above indices - # resulting shape (batch size x sequence length x units) - text_transformed = tf.gather_nd(text_transformed, indices) - # do the same for the other tensors - tag_ids = tf.gather_nd(tag_ids, indices) - mask = tf.gather_nd(mask, indices) - # as sequence_lengths is a 1D tensor use tf.gather instead of tf.gather_nd - sequence_lengths = tf.gather(sequence_lengths, last_dialogue_indices) - - loss, f1, _ = self._calculate_entity_loss( - text_transformed, - tag_ids, - mask, - tf.squeeze(sequence_lengths), - ENTITY_ATTRIBUTE_TYPE, + attribute_features, attribute_mask, dialogue_lengths ) - self.entity_loss.update_state(loss) - self.entity_f1.update_state(f1) - - return loss - - @staticmethod - def _combine_batch_and_dialogue_dimension( - tensor: tf.Tensor, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] - ): - """Combines the batch and dialogue dimension of the given tensor. - - Before the tensor has shape (batch-size x dialogue-length x ...). - Afterwards the tensor will have shape - (combined batch and dialogue dimension x ...). - - Args: - tensor: The tensor - tf_batch_data: the batch data - - Returns: - The converted tensor - """ - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - - batch_dim = tf.shape(dialogue_lengths)[0] - batch_indices = tf.repeat(tf.range(batch_dim), dialogue_lengths) - dialogue_indices = ( - tf.map_fn( - tf.range, - dialogue_lengths, - fn_output_signature=tf.RaggedTensorSpec(shape=[None], dtype=tf.int32), - ) - ).values - indices = tf.stack([batch_indices, dialogue_indices], axis=1) - - return tf.gather_nd(tensor, indices) + return attribute_features, text_transformer_output, text_sequence_lengths @staticmethod def _convert_to_original_shape( attribute_features: tf.Tensor, attribute_mask: tf.Tensor, dialogue_lengths: tf.Tensor, - consider_sequence_dimension: bool, ) -> tf.Tensor: """Transform attribute features back to original shape. - Given shape: combined batch and dialogue dimension x sequence length x units - Original shape: batch x dialogue length x sequence length x units + Given shape: (combined batch and dialogue dimension x 1 x units) + Original shape: (batch x dialogue length x units) Args: attribute_features: the "real" features to convert @@ -1231,8 +1140,8 @@ def _convert_to_original_shape( """ # in order to convert the attribute features with shape - # combined batch-size and dialogue length x sequence length x units - # to a shape of batch-size x dialogue length x sequence length x units + # (combined batch-size and dialogue length x 1 x units) + # to a shape of (batch-size x dialogue length x units) # we use tf.scatter_nd. Therefore, we need the target shape and the indices # mapping the values of attribute features to the position in the resulting # tensor. @@ -1247,7 +1156,7 @@ def _convert_to_original_shape( non_fake_dialogue_lengths = tf.reduce_sum(attribute_mask, axis=-1) # create the batch indices batch_indices = tf.repeat(tf.range(batch_dim), non_fake_dialogue_lengths) - + # TODO precompute dialogue_indices after creation of tf_batch_data dialogue_indices = ( tf.map_fn( tf.range, @@ -1268,21 +1177,108 @@ def _convert_to_original_shape( indices = tf.stack([batch_indices, dialogue_indices], axis=1) - if consider_sequence_dimension: - sequence_length = tf.shape(attribute_features)[1] - shape = tf.convert_to_tensor( - [batch_dim, dialogue_dim, sequence_length, units] - ) - return tf.scatter_nd(indices, attribute_features, shape) - shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) attribute_features = tf.squeeze(attribute_features, axis=1) return tf.scatter_nd(indices, attribute_features, shape) + def _batch_loss_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> tf.Tensor: + + return tf.cond( + tf.shape(text_transformer_output)[0] > 0, + lambda: self._real_batch_loss_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ), + lambda: tf.constant(0.0), + ) + + def _real_batch_loss_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> tf.Tensor: + # To calculate the loss for entities we need the output of the text + # sequence transformer (shape: real entity dim x + # sequence length x units), the output of the dialogue transformer + # (shape: batch size x dialogue length x units) and the tag ids for the + # entities (shape: real entity dim x sequence length - 1 x units) + # The real entity dimension for the text sequence transformer + # and the tag ids matches. + # In order to process the tensors, they need to have the same shape. + # Convert the output of the dialogue transformer to shape + # (real entity dim x 1 x units). + # Note: The CRF layer cannot handle 4D tensors. E.g. we cannot use the shape + # batch size x dialogue length x sequence length x units + + # convert the output of the dialogue transformer + # to shape (real entity dim x 1 x units) + attribute_mask = tf_batch_data[TEXT][MASK][0] + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) + + if self.max_history_tracker_featurizer_used: + # pick last vector if max history featurizer is used + attribute_mask = tf.expand_dims( + self._last_token(attribute_mask, dialogue_lengths), axis=1 + ) + dialogue_transformer_output = tf.boolean_mask( + dialogue_transformer_output, tf.squeeze(attribute_mask, axis=-1) + ) + + # boolean mask removed axis=1, add it back + dialogue_transformer_output = tf.expand_dims( + dialogue_transformer_output, axis=1 + ) + + # broadcast the dialogue transformer output sequence-length-times to get the + # same shape as the text sequence transformer output + dialogue_transformer_output = tf.broadcast_to( + dialogue_transformer_output, tf.shape(text_transformer_output) + ) + + # concat the output of the dialogue transformer to the output of the text + # sequence transformer (adding context) + # resulting shape + # (real entity dim x sequence length x 2 units) + text_transformed = tf.concat( + [text_transformer_output, dialogue_transformer_output], axis=-1 + ) + + mask = tf.squeeze(self._compute_mask(text_sequence_lengths), axis=1) + # remove additional dims and sentence features + text_sequence_lengths = tf.reshape(text_sequence_lengths, (-1,)) - 1 + + tag_ids = tf_batch_data[ENTITY_TAGS][IDS][0] + # add a zero (no entity) for the sentence features to match the shape of + # inputs + tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) + + loss, f1, _ = self._calculate_entity_loss( + text_transformed, + tag_ids, + mask, + text_sequence_lengths, + ENTITY_ATTRIBUTE_TYPE, + ) + + self.entity_loss.update_state(loss) + self.entity_f1.update_state(f1) + + return loss + def _process_batch_data( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] - ) -> Tuple[tf.Tensor, Optional[tf.Tensor]]: + ) -> Tuple[tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]: """Encodes batch data. Combines intent and text and action name and action text if both are present. @@ -1294,18 +1290,21 @@ def _process_batch_data( Tensor: encoding of all features in the batch, combined; """ # encode each attribute present in tf_batch_data - text_transformer_output = tf.zeros([0, 0, 0]) - + text_transformer_output = None + text_sequence_lengths = None batch_encoded = {} - for key in tf_batch_data.keys(): - if LABEL_KEY not in key and DIALOGUE not in key: + for attribute in tf_batch_data.keys(): + if attribute in SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES: ( attribute_features, _text_transformer_output, - ) = self._encode_features_per_attribute(tf_batch_data, key) - batch_encoded[key] = attribute_features - if tf.shape(_text_transformer_output)[0] > 0: + _text_sequence_lengths, + ) = self._encode_features_per_attribute(tf_batch_data, attribute) + + batch_encoded[attribute] = attribute_features + if attribute == TEXT: text_transformer_output = _text_transformer_output + text_sequence_lengths = _text_sequence_lengths # if both action text and action name are present, combine them; otherwise, # return the one which is present @@ -1340,7 +1339,7 @@ def _process_batch_data( batch_features = tf.concat(batch_features, axis=-1) - return batch_features, text_transformer_output + return batch_features, text_transformer_output, text_sequence_lengths @staticmethod def _get_labels_embed( @@ -1372,7 +1371,11 @@ def batch_loss( label_ids = tf_batch_data[LABEL_KEY][LABEL_SUB_KEY][0] labels_embed = self._get_labels_embed(label_ids, all_labels_embed) - dialogue_in, text_transformer_output = self._process_batch_data(tf_batch_data) + ( + dialogue_in, + text_transformer_output, + text_sequence_lengths, + ) = self._process_batch_data(tf_batch_data) ( dialogue_embed, dialogue_mask, @@ -1392,10 +1395,17 @@ def batch_loss( ) losses.append(loss) - if self.config[ENTITY_RECOGNITION]: + if ( + self.config[ENTITY_RECOGNITION] + and text_transformer_output is not None + and text_sequence_lengths is not None + ): losses.append( self._batch_loss_entities( - tf_batch_data, dialogue_transformer_output, text_transformer_output + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, ) ) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index d6d74c63eaec..eb9268c02887 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -44,6 +44,7 @@ from rasa.nlu.model import Metadata from rasa.utils.tensorflow.constants import ( LABEL, + IDS, HIDDEN_LAYERS_SIZES, SHARE_HIDDEN_LAYERS, TRANSFORMER_SIZE, @@ -101,8 +102,7 @@ SPARSE = "sparse" DENSE = "dense" LABEL_KEY = LABEL -LABEL_SUB_KEY = "ids" -TAG_IDS = "tag_ids" +LABEL_SUB_KEY = IDS POSSIBLE_TAGS = [ENTITY_ATTRIBUTE_TYPE, ENTITY_ATTRIBUTE_ROLE, ENTITY_ATTRIBUTE_GROUP] diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py index d2afb3f84c81..0072a1e4f975 100644 --- a/rasa/shared/core/domain.py +++ b/rasa/shared/core/domain.py @@ -67,7 +67,7 @@ # State is a dictionary with keys (USER, PREVIOUS_ACTION, SLOTS, ACTIVE_LOOP) # representing the origin of a SubState; # the values are SubStates, that contain the information needed for featurization -SubState = Dict[Text, Union[Text, Tuple[Union[float, Text, Dict]]]] +SubState = Dict[Text, Union[Text, Tuple[Union[float, Text]]]] State = Dict[Text, SubState] logger = logging.getLogger(__name__) @@ -822,9 +822,7 @@ def input_states(self) -> List[Text]: + self.form_names ) - def _get_featurized_entities( - self, latest_message: UserUttered - ) -> List[Dict[Text, Any]]: + def _get_featurized_entities(self, latest_message: UserUttered) -> Set[Text]: intent_name = latest_message.intent.get( rasa.shared.nlu.constants.INTENT_NAME_KEY ) @@ -855,11 +853,7 @@ def _get_featurized_entities( # concatenated entity labels with their corresponding roles and groups labels wanted_entities = set(intent_config.get(USED_ENTITIES_KEY, entity_names)) - return [ - entity - for entity in latest_message.entities - if entity["entity"] in entity_names & wanted_entities - ] + return entity_names & wanted_entities def _get_user_sub_state( self, tracker: "DialogueStateTracker" diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py index 1f3f94240eaf..c254649a9870 100644 --- a/rasa/shared/core/events.py +++ b/rasa/shared/core/events.py @@ -458,7 +458,7 @@ def as_sub_state(self) -> Dict[Text, Union[None, Text, List[Optional[Text]]]]: if self.intent_name and not self.use_text_for_featurization: out[INTENT] = self.intent_name if entities: - out[ENTITIES] = self.entities + out[ENTITIES] = entities return out diff --git a/rasa/shared/core/generator.py b/rasa/shared/core/generator.py index dcb69f20cea4..994ee52fedaf 100644 --- a/rasa/shared/core/generator.py +++ b/rasa/shared/core/generator.py @@ -31,7 +31,6 @@ ) from rasa.shared.utils.io import is_logging_disabled import rasa.shared.utils.io -from rasa.shared.nlu.constants import ENTITIES logger = logging.getLogger(__name__) @@ -103,18 +102,10 @@ def past_states_for_hashing(self, domain: Domain) -> Deque[FrozenState]: @staticmethod def _unfreeze_states(frozen_states: Deque[FrozenState]) -> List[State]: - states = [] - for frozen_state in frozen_states: - state_dict = {} - for key, value in dict(frozen_state).items(): - _value = dict(value) - if ENTITIES in _value: - _value[ENTITIES] = [ - dict(frozen_entity) for frozen_entity in _value[ENTITIES] - ] - state_dict[key] = _value - states.append(state_dict) - return states + return [ + {key: dict(value) for key, value in dict(frozen_state).items()} + for frozen_state in frozen_states + ] def past_states(self, domain: Domain) -> List[State]: states_for_hashing = self.past_states_for_hashing(domain) diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py index 247c6825ba65..47c1c7744362 100644 --- a/rasa/shared/core/trackers.py +++ b/rasa/shared/core/trackers.py @@ -29,7 +29,6 @@ ENTITY_ATTRIBUTE_ROLE, ACTION_TEXT, ACTION_NAME, - ENTITIES, ) from rasa.shared.core import events from rasa.shared.core.constants import ( @@ -69,9 +68,7 @@ logger = logging.getLogger(__name__) # same as State but with Dict[...] substituted with FrozenSet[Tuple[...]] -FrozenState = FrozenSet[ - Tuple[Text, FrozenSet[Tuple[Text, Tuple[Union[float, Text, FrozenSet]]]]] -] +FrozenState = FrozenSet[Tuple[Text, FrozenSet[Tuple[Text, Tuple[Union[float, Text]]]]]] class EventVerbosity(Enum): @@ -234,19 +231,14 @@ def _events_for_verbosity( @staticmethod def freeze_current_state(state: State) -> FrozenState: - state_copy = copy.deepcopy(state) - frozen_state = {} - for key, values in state_copy.items(): - if isinstance(values, dict): - if ENTITIES in values and isinstance(values[ENTITIES][0], dict): - values[ENTITIES] = tuple( - [frozenset(e.items()) for e in values[ENTITIES]] - ) - frozen_state[key] = frozenset(values.items()) - else: - frozen_state[key] = frozenset(values) - - return frozenset(frozen_state.items()) + return frozenset( + { + key: frozenset(values.items()) + if isinstance(values, Dict) + else frozenset(values) + for key, values in state.items() + }.items() + ) def past_states(self, domain: Domain) -> List[State]: """Generate the past states of this tracker based on the history. diff --git a/rasa/shared/nlu/constants.py b/rasa/shared/nlu/constants.py index ee85a005f935..53040f0d4c53 100644 --- a/rasa/shared/nlu/constants.py +++ b/rasa/shared/nlu/constants.py @@ -26,6 +26,7 @@ TRAINABLE_EXTRACTORS = {"MitieEntityExtractor", "CRFEntityExtractor", "DIETClassifier"} ENTITIES = "entities" +ENTITY_TAGS = "entity_tags" ENTITY_ATTRIBUTE_TYPE = "entity" ENTITY_ATTRIBUTE_GROUP = "group" ENTITY_ATTRIBUTE_ROLE = "role" diff --git a/rasa/shared/nlu/training_data/features.py b/rasa/shared/nlu/training_data/features.py index c556d6e6c3ff..755215fae35e 100644 --- a/rasa/shared/nlu/training_data/features.py +++ b/rasa/shared/nlu/training_data/features.py @@ -16,21 +16,11 @@ def __init__( attribute: Text, origin: Union[Text, List[Text]], ) -> None: - self._validate_feature_type(feature_type) - self.features = features self.type = feature_type self.origin = origin self.attribute = attribute - @staticmethod - def _validate_feature_type(feature_type: Text) -> None: - if feature_type not in VALID_FEATURE_TYPES: - raise ValueError( - f"Invalid feature type '{feature_type}' used. Valid feature types are: " - f"{VALID_FEATURE_TYPES}." - ) - def is_sparse(self) -> bool: """Checks if features are sparse or not. diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 06f81775a673..7957e84f8351 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -1,6 +1,7 @@ # constants for configuration parameters of our tensorflow models LABEL = "label" +IDS = "ids" HIDDEN_LAYERS_SIZES = "hidden_layers_sizes" SHARE_HIDDEN_LAYERS = "share_hidden_layers" diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index bfe483ebffef..6b1242d90a0a 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -732,14 +732,16 @@ def _prepare_ffnn_layer( def _prepare_transformer_layer( self, name: Text, + num_layers: int, + units: int, drop_rate: float, drop_rate_attention: float, prefix: Text = "transformer", ): if self.config[NUM_TRANSFORMER_LAYERS] > 0: self._tf_layers[f"{prefix}.{name}"] = TransformerEncoder( - self.config[NUM_TRANSFORMER_LAYERS], - self.config[TRANSFORMER_SIZE], + num_layers, + units, self.config[NUM_HEADS], self.config[TRANSFORMER_SIZE] * 4, self.config[REGULARIZATION_CONSTANT], @@ -834,7 +836,11 @@ def _prepare_input_layers(self, name: Text) -> None: def _prepare_sequence_layers(self, name: Text) -> None: self._prepare_input_layers(name) self._prepare_transformer_layer( - name, self.config[DROP_RATE], self.config[DROP_RATE_ATTENTION] + name, + self.config[NUM_TRANSFORMER_LAYERS], + self.config[TRANSFORMER_SIZE], + self.config[DROP_RATE], + self.config[DROP_RATE_ATTENTION], ) def _prepare_entity_recognition_layers(self) -> None: From d9a5378d59320637a8ac6e96259017ddadde9953 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 12:16:48 +0100 Subject: [PATCH 45/62] update entity prediction --- rasa/core/policies/ted_policy.py | 373 ++++++++++++++----------------- 1 file changed, 168 insertions(+), 205 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 65a6c376825e..e6f0e44d0455 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -140,7 +140,6 @@ class TEDPolicy(Policy): # Hidden layer sizes for layers before the dialogue and label embedding layers. # The number of hidden layers is equal to the length of the corresponding # list. - # Hidden layer sizes for layers before the embedding layers for user message # and labels. # The number of hidden layers is equal to the length of the corresponding @@ -685,9 +684,9 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": model_data_example, data_signature=model_data_example.get_signature(), config=meta, - max_history_tracker_featurizer_used=isinstance( - featurizer, MaxHistoryTrackerFeaturizer - ), + # during prediction we don't care about previous dialogue turns, + # so to save computation time, use only the last one + use_only_last_dialogue_turn=True, label_data=label_data, entity_tag_specs=entity_tag_specs, ) @@ -721,13 +720,13 @@ def __init__( self, data_signature: Dict[Text, Dict[Text, List[FeatureSignature]]], config: Dict[Text, Any], - max_history_tracker_featurizer_used: bool, + use_only_last_dialogue_turn: bool, label_data: RasaModelData, entity_tag_specs: Optional[List[EntityTagSpec]], ) -> None: super().__init__("TED", config, data_signature, label_data) - self.max_history_tracker_featurizer_used = max_history_tracker_featurizer_used + self.use_only_last_dialogue_turn = use_only_last_dialogue_turn self.predict_data_signature = { feature_name: features @@ -775,6 +774,8 @@ def _check_data(self) -> None: f"Cannot train '{self.__class__.__name__}' model." ) + # ---CREATING LAYERS HELPERS--- + def _prepare_layers(self) -> None: for name in self.data_signature.keys(): self._prepare_sparse_dense_layer_for(name, self.data_signature) @@ -860,6 +861,8 @@ def _prepare_encoding_layers(self, name: Text) -> None: self.config[DROP_RATE_DIALOGUE], ) + # ---GRAPH BUILDING HELPERS--- + def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: all_label_ids = self.tf_label_data[LABEL_KEY][LABEL_SUB_KEY][0] # labels cannot have all features "fake" @@ -905,7 +908,7 @@ def _emebed_dialogue( ) dialogue_transformed = tfa.activations.gelu(dialogue_transformed) - if self.max_history_tracker_featurizer_used: + if self.use_only_last_dialogue_turn: # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( self._last_token(dialogue_transformed, dialogue_lengths), 1 @@ -1038,7 +1041,7 @@ def _encode_real_features_per_attribute( text_transformer_output = attribute_features text_sequence_lengths = sequence_lengths - if self.max_history_tracker_featurizer_used: + if self.use_only_last_dialogue_turn: # get the location of all last dialogue inputs dialogue_lengths = tf.cast( tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 @@ -1182,32 +1185,78 @@ def _convert_to_original_shape( return tf.scatter_nd(indices, attribute_features, shape) - def _batch_loss_entities( - self, - tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], - dialogue_transformer_output: tf.Tensor, - text_transformer_output: tf.Tensor, - text_sequence_lengths: tf.Tensor, - ) -> tf.Tensor: + def _process_batch_data( + self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] + ) -> Tuple[tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]: + """Encodes batch data. - return tf.cond( - tf.shape(text_transformer_output)[0] > 0, - lambda: self._real_batch_loss_entities( - tf_batch_data, - dialogue_transformer_output, - text_transformer_output, - text_sequence_lengths, - ), - lambda: tf.constant(0.0), - ) + Combines intent and text and action name and action text if both are present. - def _real_batch_loss_entities( + Args: + tf_batch_data: dictionary mapping every attribute to its features and masks + + Returns: + Tensor: encoding of all features in the batch, combined; + """ + # encode each attribute present in tf_batch_data + text_transformer_output = None + text_sequence_lengths = None + batch_encoded = {} + for attribute in tf_batch_data.keys(): + if attribute in SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES: + ( + attribute_features, + _text_transformer_output, + _text_sequence_lengths, + ) = self._encode_features_per_attribute(tf_batch_data, attribute) + + batch_encoded[attribute] = attribute_features + if attribute == TEXT: + text_transformer_output = _text_transformer_output + text_sequence_lengths = _text_sequence_lengths + + # if both action text and action name are present, combine them; otherwise, + # return the one which is present + + if ( + batch_encoded.get(ACTION_TEXT) is not None + and batch_encoded.get(ACTION_NAME) is not None + ): + batch_action = batch_encoded.pop(ACTION_TEXT) + batch_encoded.pop( + ACTION_NAME + ) + elif batch_encoded.get(ACTION_TEXT) is not None: + batch_action = batch_encoded.pop(ACTION_TEXT) + else: + batch_action = batch_encoded.pop(ACTION_NAME) + # same for user input + if ( + batch_encoded.get(INTENT) is not None + and batch_encoded.get(TEXT) is not None + ): + batch_user = batch_encoded.pop(INTENT) + batch_encoded.pop(TEXT) + elif batch_encoded.get(TEXT) is not None: + batch_user = batch_encoded.pop(TEXT) + else: + batch_user = batch_encoded.pop(INTENT) + + batch_features = [batch_user, batch_action] + # once we have user input and previous action, + # add all other attributes (SLOTS, ACTIVE_LOOP, etc.) to batch_features; + for key in batch_encoded.keys(): + batch_features.append(batch_encoded.get(key)) + + batch_features = tf.concat(batch_features, axis=-1) + + return batch_features, text_transformer_output, text_sequence_lengths + + def _reshape_for_entities( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], dialogue_transformer_output: tf.Tensor, text_transformer_output: tf.Tensor, text_sequence_lengths: tf.Tensor, - ) -> tf.Tensor: + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: # To calculate the loss for entities we need the output of the text # sequence transformer (shape: real entity dim x # sequence length x units), the output of the dialogue transformer @@ -1226,7 +1275,7 @@ def _real_batch_loss_entities( attribute_mask = tf_batch_data[TEXT][MASK][0] dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - if self.max_history_tracker_featurizer_used: + if self.use_only_last_dialogue_turn: # pick last vector if max history featurizer is used attribute_mask = tf.expand_dims( self._last_token(attribute_mask, dialogue_lengths), axis=1 @@ -1254,19 +1303,56 @@ def _real_batch_loss_entities( [text_transformer_output, dialogue_transformer_output], axis=-1 ) - mask = tf.squeeze(self._compute_mask(text_sequence_lengths), axis=1) + text_mask = tf.squeeze(self._compute_mask(text_sequence_lengths), axis=1) # remove additional dims and sentence features text_sequence_lengths = tf.reshape(text_sequence_lengths, (-1,)) - 1 + return text_transformed, text_mask, text_sequence_lengths + + # ---TRAINING--- + + def _batch_loss_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> tf.Tensor: + + return tf.cond( + tf.shape(text_transformer_output)[0] > 0, + lambda: self._real_batch_loss_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ), + lambda: tf.constant(0.0), + ) + + def _real_batch_loss_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> tf.Tensor: + + text_transformed, text_mask, text_sequence_lengths = self._reshape_for_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ) + tag_ids = tf_batch_data[ENTITY_TAGS][IDS][0] - # add a zero (no entity) for the sentence features to match the shape of - # inputs + # add a zero (no entity) for the sentence features to match the shape of inputs tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) loss, f1, _ = self._calculate_entity_loss( text_transformed, tag_ids, - mask, + text_mask, text_sequence_lengths, ENTITY_ATTRIBUTE_TYPE, ) @@ -1276,71 +1362,6 @@ def _real_batch_loss_entities( return loss - def _process_batch_data( - self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] - ) -> Tuple[tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]: - """Encodes batch data. - - Combines intent and text and action name and action text if both are present. - - Args: - tf_batch_data: dictionary mapping every attribute to its features and masks - - Returns: - Tensor: encoding of all features in the batch, combined; - """ - # encode each attribute present in tf_batch_data - text_transformer_output = None - text_sequence_lengths = None - batch_encoded = {} - for attribute in tf_batch_data.keys(): - if attribute in SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES: - ( - attribute_features, - _text_transformer_output, - _text_sequence_lengths, - ) = self._encode_features_per_attribute(tf_batch_data, attribute) - - batch_encoded[attribute] = attribute_features - if attribute == TEXT: - text_transformer_output = _text_transformer_output - text_sequence_lengths = _text_sequence_lengths - - # if both action text and action name are present, combine them; otherwise, - # return the one which is present - - if ( - batch_encoded.get(ACTION_TEXT) is not None - and batch_encoded.get(ACTION_NAME) is not None - ): - batch_action = batch_encoded.pop(ACTION_TEXT) + batch_encoded.pop( - ACTION_NAME - ) - elif batch_encoded.get(ACTION_TEXT) is not None: - batch_action = batch_encoded.pop(ACTION_TEXT) - else: - batch_action = batch_encoded.pop(ACTION_NAME) - # same for user input - if ( - batch_encoded.get(INTENT) is not None - and batch_encoded.get(TEXT) is not None - ): - batch_user = batch_encoded.pop(INTENT) + batch_encoded.pop(TEXT) - elif batch_encoded.get(TEXT) is not None: - batch_user = batch_encoded.pop(TEXT) - else: - batch_user = batch_encoded.pop(INTENT) - - batch_features = [batch_user, batch_action] - # once we have user input and previous action, - # add all other attributes (SLOTS, ACTIVE_LOOP, etc.) to batch_features; - for key in batch_encoded.keys(): - batch_features.append(batch_encoded.get(key)) - - batch_features = tf.concat(batch_features, axis=-1) - - return batch_features, text_transformer_output, text_sequence_lengths - @staticmethod def _get_labels_embed( label_ids: tf.Tensor, all_labels_embed: tf.Tensor @@ -1414,6 +1435,8 @@ def batch_loss( return tf.math.add_n(losses) + # ---PREDICTION--- + def prepare_for_predict(self) -> None: _, self.all_labels_embed = self._create_all_labels_embed() @@ -1438,7 +1461,11 @@ def batch_predict( batch_in, self.predict_data_signature ) - dialogue_in, text_transformer_output = self._process_batch_data(tf_batch_data) + ( + dialogue_in, + text_transformer_output, + text_sequence_lengths, + ) = self._process_batch_data(tf_batch_data) ( dialogue_embed, dialogue_mask, @@ -1446,15 +1473,6 @@ def batch_predict( ) = self._emebed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) - predictions = {} - - if self.config[ENTITY_RECOGNITION]: - predictions.update( - self._batch_predict_entities( - tf_batch_data, dialogue_transformer_output, text_transformer_output - ) - ) - sim_all = self._tf_layers[f"loss.{LABEL}"].sim( dialogue_embed[:, :, tf.newaxis, :], self.all_labels_embed[tf.newaxis, tf.newaxis, :, :], @@ -1464,8 +1482,22 @@ def batch_predict( scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim( sim_all, self.config[SIMILARITY_TYPE] ) + predictions = {"action_scores": scores, "similarities": sim_all} - predictions.update({"action_scores": scores, "similarities": sim_all}) + if ( + self.config[ENTITY_RECOGNITION] + and text_transformer_output is not None + and text_sequence_lengths is not None + ): + pred_ids, confidences = self._batch_predict_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ) + name = ENTITY_ATTRIBUTE_TYPE + predictions[f"e_{name}_ids"] = pred_ids + predictions[f"e_{name}_scores"] = confidences return predictions @@ -1474,112 +1506,43 @@ def _batch_predict_entities( tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], dialogue_transformer_output: tf.Tensor, text_transformer_output: tf.Tensor, - ) -> Dict[Text, tf.Tensor]: - predictions: Dict[Text, tf.Tensor] = {} - - # TODO Update according to batch loss entities - - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - - # convert the output of the text sequence transformer to shape - # batch-size x dialogue length x sequence length x 1 - text_seq_transformer_output = self._convert_to_original_shape( - text_transformer_output, - tf_batch_data[TEXT][MASK][0], - dialogue_lengths, - True, - ) - # convert the output of the text sequence transformer to shape - # combined batch dialogue dimension x sequence length x units - text_seq_transformer_output = self._combine_batch_and_dialogue_dimension( - text_seq_transformer_output, tf_batch_data - ) - - # convert the output of the dialogue transformer to shape - # combined batch dialogue dimension x sequence length x units - dialogue_transformer_output = self._combine_batch_and_dialogue_dimension( - dialogue_transformer_output, tf_batch_data + text_sequence_lengths: tf.Tensor, + ) -> Tuple[tf.Tensor, tf.Tensor]: + return tf.cond( + tf.shape(text_transformer_output)[0] > 0, + lambda: self._real_batch_predict_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, + ), + lambda: ( + # the output is of shape (batch_size, max_seq_len) + tf.zeros(tf.shape(text_transformer_output)[:2], dtype=tf.int32), + tf.zeros(tf.shape(text_transformer_output)[:2], dtype=tf.float32), + ), ) - # repeat the dialogue transformer output sequence-length-times to get the - # same shape as the text sequence transformer output - sequence_dimension = tf.shape(text_seq_transformer_output)[1] - dialogue_transformer_output = tf.repeat( - tf.expand_dims(dialogue_transformer_output, axis=1), - sequence_dimension, - axis=1, - ) - # add the output of the dialogue transformer to the output of the text - # sequence transformer (adding context) - # resulting shape - # (combined batch and dialogue dimension x sequence length x units) - text_transformed = tf.add( - text_seq_transformer_output, dialogue_transformer_output - ) + def _real_batch_predict_entities( + self, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + dialogue_transformer_output: tf.Tensor, + text_transformer_output: tf.Tensor, + text_sequence_lengths: tf.Tensor, + ) -> Tuple[tf.Tensor, tf.Tensor]: - # we need the sequence length and the mask for the CRF layer - _sequence_lengths = tf_batch_data[TEXT][SEQUENCE_LENGTH][0] - # extract only nonzero lengths and cast to int - _sequence_lengths = tf.cast( - tf.boolean_mask(_sequence_lengths, _sequence_lengths), dtype=tf.int32 - ) - # boolean mask returns flat tensor - _sequence_lengths = tf.expand_dims(_sequence_lengths, axis=-1) - # + 1 for sentence features - sequence_lengths = _sequence_lengths + 1 - - # convert mask and sequence length to correct shape - sequence_lengths = self._convert_to_original_shape( - tf.expand_dims(sequence_lengths, axis=-1), - tf_batch_data[TEXT][MASK][0], - dialogue_lengths, - True, - ) - sequence_lengths = self._combine_batch_and_dialogue_dimension( - sequence_lengths, tf_batch_data + text_transformed, _, text_sequence_lengths = self._reshape_for_entities( + tf_batch_data, + dialogue_transformer_output, + text_transformer_output, + text_sequence_lengths, ) - if self.max_history_tracker_featurizer_used: - batch_dim = tf.size(dialogue_lengths) - - # the first dimension of text transformed is the combined batch and dialogue - # dimension, which corresponds to the sum of all dialogue lengths - # if the max history tracker featurizer is used we just want the last - # dialogues of every batch example - - # get the indices of all last dialogues - last_dialogue_indices = tf.cumsum(dialogue_lengths) - 1 - - # build up indices to get the last dialogues from text_transformed - dialogue_indices = tf.repeat( - tf.expand_dims(last_dialogue_indices, axis=1), - sequence_dimension, - axis=1, - ) - sequence_indices = tf.repeat( - tf.expand_dims(tf.range(sequence_dimension), axis=0), batch_dim, axis=0 - ) - indices = tf.stack([dialogue_indices, sequence_indices], axis=2) - - # get all last dialogues from text_transformed using the above indices - # resulting shape (batch size x sequence length x units) - text_transformed = tf.gather_nd(text_transformed, indices) - # do the same for the other tensors - sequence_lengths = tf.gather( - tf.squeeze(sequence_lengths), last_dialogue_indices - ) - name = ENTITY_ATTRIBUTE_TYPE _logits = self._tf_layers[f"embed.{name}.logits"](text_transformed) - pred_ids, confidences = self._tf_layers[f"crf.{name}"]( - _logits, sequence_lengths - 1 - ) - - predictions[f"e_{name}_ids"] = pred_ids - predictions[f"e_{name}_scores"] = confidences - return predictions + return self._tf_layers[f"crf.{name}"](_logits, text_sequence_lengths) # pytype: enable=key-error From c287d8c14728c787ca054fa87ca17992877560c7 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 14:01:24 +0100 Subject: [PATCH 46/62] fix randomness and shapes --- examples/e2ebot/config.yml | 4 ++-- examples/e2ebot/data/stories.yml | 2 +- examples/e2ebot/domain.yml | 3 +++ rasa/core/policies/ted_policy.py | 12 +++++++++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/examples/e2ebot/config.yml b/examples/e2ebot/config.yml index f38558adb0ad..2d4a08aa4ae6 100644 --- a/examples/e2ebot/config.yml +++ b/examples/e2ebot/config.yml @@ -9,8 +9,8 @@ pipeline: analyzer: char_wb min_ngram: 1 max_ngram: 4 - - name: DIETClassifier - epochs: 200 +# - name: DIETClassifier +# epochs: 200 policies: - name: TEDPolicy epochs: 200 diff --git a/examples/e2ebot/data/stories.yml b/examples/e2ebot/data/stories.yml index bf884abf1856..cab5ea7113ca 100644 --- a/examples/e2ebot/data/stories.yml +++ b/examples/e2ebot/data/stories.yml @@ -10,7 +10,7 @@ stories: - story: sad path (text to text) steps: - - user: "Hello" + - user: "[Hello](bla)" - bot: "Welcome to moodbot. How are you feeling today?" - user: "Horrible" - bot: "Oh no! Here is a kitten photo. Did it help?" diff --git a/examples/e2ebot/domain.yml b/examples/e2ebot/domain.yml index 17b3faba2a75..d884f4cc40c3 100644 --- a/examples/e2ebot/domain.yml +++ b/examples/e2ebot/domain.yml @@ -9,3 +9,6 @@ actions: intents: - greet - mood_great + +entities: + - bla diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index e6f0e44d0455..059c5bce6711 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -438,6 +438,9 @@ def _create_model_data( FeatureArray(dialogue_lengths, number_of_dimensions=1) ] + # make sure all keys are in the same order during training and prediction + model_data.sort() + return model_data def train( @@ -1304,6 +1307,12 @@ def _reshape_for_entities( ) text_mask = tf.squeeze(self._compute_mask(text_sequence_lengths), axis=1) + # add zeros to match the shape of text_transformed, because + # max sequence length might differ, since it is calculated dynamically + # based on a subset of sequence lengths + sequence_diff = tf.shape(text_transformed)[1] - tf.shape(text_mask)[1] + text_mask = tf.pad(text_mask, [[0, 0], [0, sequence_diff], [0, 0]]) + # remove additional dims and sentence features text_sequence_lengths = tf.reshape(text_sequence_lengths, (-1,)) - 1 @@ -1347,7 +1356,8 @@ def _real_batch_loss_entities( tag_ids = tf_batch_data[ENTITY_TAGS][IDS][0] # add a zero (no entity) for the sentence features to match the shape of inputs - tag_ids = tf.pad(tag_ids, [[0, 0], [0, 1], [0, 0]]) + sequence_diff = tf.shape(text_transformed)[1] - tf.shape(tag_ids)[1] + tag_ids = tf.pad(tag_ids, [[0, 0], [0, sequence_diff], [0, 0]]) loss, f1, _ = self._calculate_entity_loss( text_transformed, From f87c134f7aed137944dbf3be49cb9da30bbeceee Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 16:43:18 +0100 Subject: [PATCH 47/62] fix ffnn encoding layer name --- examples/e2ebot/config.yml | 4 ++-- rasa/core/policies/ted_policy.py | 3 ++- rasa/utils/tensorflow/layers.py | 2 +- rasa/utils/tensorflow/models.py | 5 ++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/e2ebot/config.yml b/examples/e2ebot/config.yml index 2d4a08aa4ae6..f38558adb0ad 100644 --- a/examples/e2ebot/config.yml +++ b/examples/e2ebot/config.yml @@ -9,8 +9,8 @@ pipeline: analyzer: char_wb min_ngram: 1 max_ngram: 4 -# - name: DIETClassifier -# epochs: 200 + - name: DIETClassifier + epochs: 200 policies: - name: TEDPolicy epochs: 200 diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 059c5bce6711..529d5c02d7be 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -862,6 +862,7 @@ def _prepare_encoding_layers(self, name: Text) -> None: f"{name}", [self.config[ENCODING_DIMENSION]], self.config[DROP_RATE_DIALOGUE], + prefix="encoding_layer", ) # ---GRAPH BUILDING HELPERS--- @@ -1098,7 +1099,7 @@ def _encode_real_features_per_attribute( ) if attribute in SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE: - attribute_features = self._tf_layers[f"ffnn.{attribute}"]( + attribute_features = self._tf_layers[f"encoding_layer.{attribute}"]( attribute_features ) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 1d64b1b26cb3..0b0d00e4131a 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -136,7 +136,7 @@ def call(self, inputs: tf.SparseTensor) -> tf.Tensor: if len(inputs.shape) == 3: # reshape back outputs = tf.reshape( - outputs, (tf.shape(inputs)[0], tf.shape(inputs)[1], -1) + outputs, (tf.shape(inputs)[0], tf.shape(inputs)[1], self.units) ) if self.use_bias: diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 6b1242d90a0a..50e4903814c0 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -803,7 +803,10 @@ def _prepare_sparse_dense_layers( if not dense: # create dense labels for the input to use in negative sampling self._tf_layers[f"sparse_to_dense_ids.{name}"] = layers.DenseForSparse( - units=2, trainable=False, name=f"sparse_to_dense_ids.{name}" + units=2, + use_bias=False, + trainable=False, + name=f"sparse_to_dense_ids.{name}", ) def _prepare_input_layers(self, name: Text) -> None: From bfc2571169d786c2c87d2697041bf2e3a2d24e00 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 16:48:08 +0100 Subject: [PATCH 48/62] add todo --- rasa/core/policies/ted_policy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 529d5c02d7be..6a695a5bdc2c 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -550,6 +550,7 @@ def predict_action_probabilities( if ( len(tracker_state_features) == 2 and np.max(confidences[1]) > self.config[E2E_CONFIDENCE_THRESHOLD] + # TODO maybe compare confidences is better and np.max(similarities[1]) > np.max(similarities[0]) ): batch_index = 1 From 53b21599b7f436c42ba300afc7975675b30dba22 Mon Sep 17 00:00:00 2001 From: Vladimir Vlasov Date: Thu, 12 Nov 2020 16:49:57 +0100 Subject: [PATCH 49/62] Update rasa/core/policies/ted_policy.py Co-authored-by: Tanja --- rasa/core/policies/ted_policy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 6a695a5bdc2c..59c1b044aea9 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -1027,7 +1027,7 @@ def _encode_real_features_per_attribute( mask_sequence_text = tf.squeeze( self._compute_mask(sequence_lengths), axis=1 ) - sequence_lengths = sequence_lengths + 1 + sequence_lengths += 1 mask_text = tf.squeeze(self._compute_mask(sequence_lengths), axis=1) attribute_features, _, _, _ = self._create_sequence( From 05639b4a94f27707586d3ad43e5ccf928a2db421 Mon Sep 17 00:00:00 2001 From: Vladimir Vlasov Date: Thu, 12 Nov 2020 16:50:37 +0100 Subject: [PATCH 50/62] Update rasa/core/featurizers/single_state_featurizer.py Co-authored-by: Tanja --- rasa/core/featurizers/single_state_featurizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index e3ed3712ad76..792c618cfe87 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -43,7 +43,7 @@ class SingleStateFeaturizer: def __init__(self) -> None: self._default_feature_states = {} self.action_texts = [] - self.tag_id_mapping = {} + self.entity_tag_id_mapping = {} def get_entity_tag_ids(self) -> Dict[Text, int]: """Returns the tag to index mapping for entities. From 4e873f98e51cf9347cb719ca749d616cb5917bb9 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 16:52:32 +0100 Subject: [PATCH 51/62] rename to entity_tag_id_mapping --- rasa/core/featurizers/single_state_featurizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index 792c618cfe87..f77f20291154 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -79,7 +79,7 @@ def convert_to_dict(feature_states: List[Text]) -> Dict[Text, int]: self._default_feature_states[SLOTS] = convert_to_dict(domain.slot_states) self._default_feature_states[ACTIVE_LOOP] = convert_to_dict(domain.form_names) self.action_texts = domain.action_texts - self.tag_id_mapping = self.get_entity_tag_ids() + self.entity_tag_id_mapping = self.get_entity_tag_ids() def _state_features_for_attribute( self, sub_state: SubState, attribute: Text @@ -252,7 +252,7 @@ def encode_entity( # TODO # Should we support BILOU tagging? - if TEXT not in entity_data or len(self.tag_id_mapping) < 2: + if TEXT not in entity_data or len(self.entity_tag_id_mapping) < 2: # we cannot build a classifier if there are less than 2 class return {} @@ -265,7 +265,7 @@ def encode_entity( token, entities, attribute_key=ENTITY_ATTRIBUTE_TYPE ) # TODO handle if tag is not in mapping - _tags.append(self.tag_id_mapping[_tag]) + _tags.append(self.entity_tag_id_mapping[_tag]) # transpose to have seq_len x 1 return { From 563085b546f92d3938206f25acbd614aa5387342 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 17:06:51 +0100 Subject: [PATCH 52/62] add comment to last dial mask --- rasa/core/policies/ted_policy.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 59c1b044aea9..29dd1d7e0dde 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -1061,6 +1061,30 @@ def _encode_real_features_per_attribute( ), ) ).values + # since use_only_last_dialogue_turn is True, + # we need to find the locations of last dialogue turns in + # (combined batch dimension and dialogue length,) dimension + # so that we can use `_sequence_lengths` as a boolean to pick + # which ones are "real" textual input in these last dialogue turns + + # in order to do that we can use given `dialogue_lengths` + # for example: + # if we have `dialogue_lengths = [2, 1, 3]`, than + # `dialogue_indices = [0, 1, 0, 0, 1, 2]` here we can spot that `0` + # always indicates the first dialogue turn, + # which means that previous dialogue turn is the last one, + # combining this with the fact that the last element in + # `dialogue_indices` is always the last dialogue turn, we can add + # a `0` to the end, getting + # `_dialogue_indices = [0, 1, 0, 0, 1, 2, 0]`, + # then remove the first element + # `_last_dialogue_turn_inverse_indicator = [1, 0, 0, 1, 2, 0]` + # and we see that `0` points to last dialogue turn, + # the rest is to convert all positive numbers to `True` and take + # the inverse mask to get + # `last_dialogue_mask = [0, 1, 1, 0, 0, 1] + # which precisely corresponds to the fact that first dialogue is of + # length 2, the second 1 and the third 3 last_dialogue_mask = tf.math.logical_not( tf.cast( tf.concat( From 4a97b0b95183058e76500efe7af2b3087d69577a Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 17:17:31 +0100 Subject: [PATCH 53/62] add comments to tf.cond --- rasa/core/policies/ted_policy.py | 43 ++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 29dd1d7e0dde..c5f9b2f0c6ff 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -1061,30 +1061,30 @@ def _encode_real_features_per_attribute( ), ) ).values - # since use_only_last_dialogue_turn is True, + # Since use_only_last_dialogue_turn is True, # we need to find the locations of last dialogue turns in - # (combined batch dimension and dialogue length,) dimension - # so that we can use `_sequence_lengths` as a boolean to pick - # which ones are "real" textual input in these last dialogue turns + # (combined batch dimension and dialogue length,) dimension, + # so that we can use `_sequence_lengths` as a boolean mask to pick + # which ones are "real" textual input in these last dialogue turns. - # in order to do that we can use given `dialogue_lengths` - # for example: - # if we have `dialogue_lengths = [2, 1, 3]`, than + # In order to do that we can use given `dialogue_lengths`. + # For example: + # If we have `dialogue_lengths = [2, 1, 3]`, than # `dialogue_indices = [0, 1, 0, 0, 1, 2]` here we can spot that `0` # always indicates the first dialogue turn, - # which means that previous dialogue turn is the last one, - # combining this with the fact that the last element in + # which means that previous dialogue turn is the last dialogue turn. + # Combining this with the fact that the last element in # `dialogue_indices` is always the last dialogue turn, we can add # a `0` to the end, getting - # `_dialogue_indices = [0, 1, 0, 0, 1, 2, 0]`, - # then remove the first element + # `_dialogue_indices = [0, 1, 0, 0, 1, 2, 0]`. + # Then removing the first element # `_last_dialogue_turn_inverse_indicator = [1, 0, 0, 1, 2, 0]` - # and we see that `0` points to last dialogue turn, - # the rest is to convert all positive numbers to `True` and take + # we see that `0` points to the last dialogue turn. + # We convert all positive numbers to `True` and take # the inverse mask to get - # `last_dialogue_mask = [0, 1, 1, 0, 0, 1] + # `last_dialogue_mask = [0, 1, 1, 0, 0, 1], # which precisely corresponds to the fact that first dialogue is of - # length 2, the second 1 and the third 3 + # length 2, the second 1 and the third 3. last_dialogue_mask = tf.math.logical_not( tf.cast( tf.concat( @@ -1353,7 +1353,12 @@ def _batch_loss_entities( text_transformer_output: tf.Tensor, text_sequence_lengths: tf.Tensor, ) -> tf.Tensor: - + # It could happen that some batches don't contain "real" features for `text`, + # e.g. large number of stories are intent only. + # Therefore actual `text_transformer_output` will be empty. + # We cannot create a loss with empty tensors. + # Since we need actual numbers to create a full loss, we output + # zero in this case. return tf.cond( tf.shape(text_transformer_output)[0] > 0, lambda: self._real_batch_loss_entities( @@ -1544,6 +1549,12 @@ def _batch_predict_entities( text_transformer_output: tf.Tensor, text_sequence_lengths: tf.Tensor, ) -> Tuple[tf.Tensor, tf.Tensor]: + # It could happen that current prediction turn don't contain + # "real" features for `text`, + # Therefore actual `text_transformer_output` will be empty. + # We cannot predict entities with empty tensors. + # Since we need to output some tensors of the same shape, we output + # zero tensors. return tf.cond( tf.shape(text_transformer_output)[0] > 0, lambda: self._real_batch_predict_entities( From d2db7153594789dc6b9f15a1b1bd50aac6af041f Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 17:22:58 +0100 Subject: [PATCH 54/62] add docstrings --- .../featurizers/single_state_featurizer.py | 11 ++++++++++ rasa/core/featurizers/tracker_featurizers.py | 21 ++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index f77f20291154..aa93df4c35c5 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -243,6 +243,17 @@ def encode_state( def encode_entity( self, entity_data: Dict[Text, Any], interpreter: NaturalLanguageInterpreter ) -> Dict[Text, List["Features"]]: + """Encode the given entity data with the help of the given interpreter. + + Produce numeric entity tags for tokens. + + Args: + entity_data: The dict containing the text and entity labels and locations + interpreter: The interpreter used to encode the state + + Returns: + A dictionary of entity type to list of features. + """ from rasa.nlu.test import determine_token_labels # TODO diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py index 4973b1da98a7..08355883d716 100644 --- a/rasa/core/featurizers/tracker_featurizers.py +++ b/rasa/core/featurizers/tracker_featurizers.py @@ -115,6 +115,15 @@ def _entity_data(event: UserUttered) -> Dict[Text, Any]: def training_states_actions_and_entities( self, trackers: List[DialogueStateTracker], domain: Domain ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: + """Transforms list of trackers to lists of states, actions and entity data. + + Args: + trackers: The trackers to transform + domain: The domain + + Returns: + A tuple of list of states, list of actions and list of entity data. + """ raise NotImplementedError( "Featurizer must have the capacity to encode trackers to feature vectors" ) @@ -293,16 +302,14 @@ class FullDialogueTrackerFeaturizer(TrackerFeaturizer): def training_states_actions_and_entities( self, trackers: List[DialogueStateTracker], domain: Domain ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: - """Transforms list of trackers to lists of states and actions. - - Training data is padded up to the length of the longest dialogue with -1. + """Transforms list of trackers to lists of states, actions and entity data. Args: trackers: The trackers to transform domain: The domain Returns: - A tuple of list of states and list of actions. + A tuple of list of states, list of actions and list of entity data. """ trackers_as_states = [] @@ -438,16 +445,14 @@ def _hash_example( def training_states_actions_and_entities( self, trackers: List[DialogueStateTracker], domain: Domain ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: - """Transforms list of trackers to lists of states and actions. - - Training data is padded up to the length of the longest dialogue with -1. + """Transforms list of trackers to lists of states, actions and entity data. Args: trackers: The trackers to transform domain: The domain Returns: - A tuple of list of states and list of actions. + A tuple of list of states, list of actions and list of entity data. """ trackers_as_states = [] From b3b28d74a94b58a5c73631e4567e5470af423b51 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Thu, 12 Nov 2020 19:21:16 +0100 Subject: [PATCH 55/62] refactor number of dims check --- rasa/core/policies/memoization.py | 1 - rasa/utils/tensorflow/model_data.py | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/rasa/core/policies/memoization.py b/rasa/core/policies/memoization.py index 8510ab9c6852..e511f35563ac 100644 --- a/rasa/core/policies/memoization.py +++ b/rasa/core/policies/memoization.py @@ -1,4 +1,3 @@ -import copy import zlib import base64 diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index 3c8012f024ba..4f5dcab1c010 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -124,7 +124,7 @@ def __setstate__(self, state, **kwargs): def _validate_number_of_dimensions( number_of_dimensions: int, input_array: np.ndarray ) -> None: - """Validates if the given number of dimensions maps the with the dimensions of the input array. + """Validates if the the input array has given number of dimensions. Args: number_of_dimensions: number of dimensions @@ -142,7 +142,8 @@ def _validate_number_of_dimensions( break if isinstance(_sub_array, np.ndarray) and _sub_array.shape[0] == 0: # sequence dimension is 0, we are dealing with "fake" features - return + dim = i + break # If the resulting sub_array is sparse, the remaining number of dimensions # should be at least 2 @@ -150,7 +151,15 @@ def _validate_number_of_dimensions( if dim > 2: raise ValueError( f"Given number of dimensions '{number_of_dimensions}' does not " - f"match dimensiona of given input array: {input_array}." + f"match dimensions of given input array: {input_array}." + ) + elif isinstance(_sub_array, np.ndarray) and _sub_array.shape[0] == 0: + # sequence dimension is 0, we are dealing with "fake" features, + # but they should be of dim 2 + if dim > 2: + raise ValueError( + f"Given number of dimensions '{number_of_dimensions}' does not " + f"match dimensions of given input array: {input_array}." ) # If the resulting sub_array is dense, the sub_array should be a single number elif not np.issubdtype(type(_sub_array), np.integer) and not isinstance( From 779db7f1236913ae24db6c803d9d2ced52b610d3 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Fri, 13 Nov 2020 09:59:15 +0100 Subject: [PATCH 56/62] rename zero features to fake features --- rasa/core/policies/ted_policy.py | 24 ++++++------ rasa/utils/tensorflow/model_data_utils.py | 38 +++++++++---------- .../utils/tensorflow/test_model_data_utils.py | 34 ++++++++--------- 3 files changed, 48 insertions(+), 48 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index c5f9b2f0c6ff..02c9f06cebda 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -276,7 +276,7 @@ def __init__( priority: int = DEFAULT_POLICY_PRIORITY, max_history: Optional[int] = None, model: Optional[RasaModel] = None, - zero_state_features: Optional[Dict[Text, List["Features"]]] = None, + fake_features: Optional[Dict[Text, List["Features"]]] = None, entity_tag_specs: Optional[List[EntityTagSpec]] = None, **kwargs: Any, ) -> None: @@ -297,7 +297,7 @@ def __init__( self._entity_tag_specs = entity_tag_specs - self.zero_state_features = zero_state_features or defaultdict(list) + self.fake_features = fake_features or defaultdict(list) self._label_data: Optional[RasaModelData] = None self.data_example: Optional[Dict[Text, List[np.ndarray]]] = None @@ -398,7 +398,7 @@ def _create_model_data( [FeatureArray(label_ids, number_of_dimensions=3)], ) - attribute_data, self.zero_state_features = convert_to_data_format( + attribute_data, self.fake_features = convert_to_data_format( tracker_state_features, featurizers=self.config[FEATURIZERS] ) if self.config[ENTITY_RECOGNITION]: @@ -418,7 +418,7 @@ def _create_model_data( # method is called during prediction attribute_data, _ = convert_to_data_format( tracker_state_features, - self.zero_state_features, + self.fake_features, featurizers=self.config[FEATURIZERS], ) @@ -519,7 +519,7 @@ def predict_action_probabilities( # create model data from tracker tracker_state_features = [] if ( - INTENT in self.zero_state_features + INTENT in self.fake_features or not tracker.latest_action_name == ACTION_LISTEN_NAME ): # the first example in a batch uses intent @@ -528,7 +528,7 @@ def predict_action_probabilities( [tracker], domain, interpreter, use_text_for_last_user_input=False ) if ( - TEXT in self.zero_state_features + TEXT in self.fake_features and tracker.latest_action_name == ACTION_LISTEN_NAME ): # the second - text, but only after user utterance @@ -561,7 +561,7 @@ def predict_action_probabilities( else: # only one tracker present batch_index = 0 if tracker.latest_action_name == ACTION_LISTEN_NAME: - if TEXT in self.zero_state_features: + if TEXT in self.fake_features: is_e2e_prediction = True else: is_e2e_prediction = False @@ -609,8 +609,8 @@ def persist(self, path: Union[Text, Path]) -> None: model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl", self.data_example ) io_utils.pickle_dump( - model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl", - self.zero_state_features, + model_path / f"{SAVE_MODEL_FILE_NAME}.fake_features.pkl", + self.fake_features, ) io_utils.pickle_dump( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl", @@ -653,8 +653,8 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": label_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl" ) - zero_state_features = io_utils.pickle_load( - model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl" + fake_features = io_utils.pickle_load( + model_path / f"{SAVE_MODEL_FILE_NAME}.fake_features.pkl" ) label_data = RasaModelData(data=label_data) meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl") @@ -713,7 +713,7 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": featurizer=featurizer, priority=priority, model=model, - zero_state_features=zero_state_features, + fake_features=fake_features, entity_tag_specs=entity_tag_specs, **meta, ) diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index 8788a366f06b..1f556d22914d 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -166,7 +166,7 @@ def _filter_features(features: Optional[List["Features"]], featurizers: List[Tex return [f for f in features if f.origin in featurizers] -def _create_zero_features( +def _create_fake_features( all_features: List[List[List["Features"]]], ) -> List["Features"]: """Computes default feature values. @@ -191,8 +191,8 @@ def _create_zero_features( ) ) - # create zero_features for Nones - zero_features = [] + # create fake_features for Nones + fake_features = [] for _features in example_features: new_features = copy.deepcopy(_features) if _features.is_dense(): @@ -203,16 +203,16 @@ def _create_zero_features( new_features.features = scipy.sparse.coo_matrix( (0, _features.features.shape[-1]), _features.features.dtype ) - zero_features.append(new_features) + fake_features.append(new_features) - return zero_features + return fake_features def convert_to_data_format( features: Union[ List[List[Dict[Text, List["Features"]]]], List[Dict[Text, List["Features"]]] ], - zero_features: Optional[Dict[Text, List["Features"]]] = None, + fake_features: Optional[Dict[Text, List["Features"]]] = None, consider_dialogue_dimension: bool = True, featurizers: Optional[List[Text]] = None, ) -> Tuple[Data, Optional[Dict[Text, List["Features"]]]]: @@ -228,7 +228,7 @@ def convert_to_data_format( Args: features: a dictionary of attributes to a list of features for all examples in the training data - zero_features: Contains default feature values for attributes + fake_features: Contains default feature values for attributes consider_dialogue_dimension: If set to false the dialogue dimension will be removed from the resulting sequence features. featurizers: the featurizers to consider @@ -237,9 +237,9 @@ def convert_to_data_format( Input in "Data" format and zero features """ training = False - if not zero_features: + if not fake_features: training = True - zero_features = defaultdict(list) + fake_features = defaultdict(list) # unify format of incoming features if isinstance(features[0], Dict): @@ -254,7 +254,7 @@ def convert_to_data_format( if training: attributes = list(attribute_to_features.keys()) else: - attributes = list(zero_features.keys()) + attributes = list(fake_features.keys()) # In case an attribute is not present during prediction, replace it with # None values that will then be replaced by zero features @@ -271,14 +271,14 @@ def convert_to_data_format( empty_features, attribute_to_features, training, - zero_features, + fake_features, consider_dialogue_dimension, ) # ensure that all attributes are in the same order attribute_data = OrderedDict(sorted(attribute_data.items())) - return attribute_data, zero_features + return attribute_data, fake_features def _features_for_attribute( @@ -286,7 +286,7 @@ def _features_for_attribute( empty_features: List[Any], attribute_to_features: Dict[Text, List[List[List["Features"]]]], training: bool, - zero_features: Dict[Text, List["Features"]], + fake_features: Dict[Text, List["Features"]], consider_dialogue_dimension: bool, ) -> Dict[Text, List[FeatureArray]]: """Create the features for the given attribute from the all examples features. @@ -296,7 +296,7 @@ def _features_for_attribute( empty_features: empty features attribute_to_features: features for every example training: boolean indicating whether we are currently in training or not - zero_features: zero features + fake_features: zero features consider_dialogue_dimension: If set to false the dialogue dimension will be removed from the resulting sequence features. @@ -312,10 +312,10 @@ def _features_for_attribute( # in case some features for a specific attribute are # missing, replace them with a feature vector of zeros if training: - zero_features[attribute] = _create_zero_features(features) + fake_features[attribute] = _create_fake_features(features) (attribute_masks, _dense_features, _sparse_features) = _extract_features( - features, zero_features[attribute], attribute + features, fake_features[attribute], attribute ) sparse_features = {} @@ -363,7 +363,7 @@ def _features_for_attribute( def _extract_features( features: List[List[List["Features"]]], - zero_features: List["Features"], + fake_features: List["Features"], attribute: Text, ) -> Tuple[ List[np.ndarray], @@ -375,7 +375,7 @@ def _extract_features( Args: features: all features - zero_features: list of zero features + fake_features: list of zero features Returns: - a list of attribute masks @@ -399,7 +399,7 @@ def _extract_features( if list_of_features is None: # use zero features and set mask to zero attribute_mask[i] = 0 - list_of_features = zero_features + list_of_features = fake_features for features in list_of_features: # in case of ENTITIES, if the attribute type matches either 'entity', diff --git a/tests/utils/tensorflow/test_model_data_utils.py b/tests/utils/tensorflow/test_model_data_utils.py index f495222958df..2dab29353f3a 100644 --- a/tests/utils/tensorflow/test_model_data_utils.py +++ b/tests/utils/tensorflow/test_model_data_utils.py @@ -30,7 +30,7 @@ shape = 100 -def test_create_zero_features(): +def test_create_fake_features(): # DENSE FEATURES dense_feature_sentence_features = Features( features=np.random.rand(shape), @@ -40,10 +40,10 @@ def test_create_zero_features(): ) features = [[None, None, [dense_feature_sentence_features]]] - zero_features = model_data_utils._create_zero_features(features) - assert len(zero_features) == 1 - assert zero_features[0].is_dense() - assert zero_features[0].features.shape == (0, shape) + fake_features = model_data_utils._create_fake_features(features) + assert len(fake_features) == 1 + assert fake_features[0].is_dense() + assert fake_features[0].features.shape == (0, shape) # SPARSE FEATURES sparse_feature_sentence_features = Features( @@ -53,11 +53,11 @@ def test_create_zero_features(): origin=[], ) features = [[None, None, [sparse_feature_sentence_features]]] - zero_features = model_data_utils._create_zero_features(features) - assert len(zero_features) == 1 - assert zero_features[0].is_sparse() - assert zero_features[0].features.shape == (0, shape) - assert zero_features[0].features.nnz == 0 + fake_features = model_data_utils._create_fake_features(features) + assert len(fake_features) == 1 + assert fake_features[0].is_sparse() + assert fake_features[0].features.shape == (0, shape) + assert fake_features[0].features.nnz == 0 def test_surface_attributes(): @@ -142,18 +142,18 @@ def test_surface_attributes(): def test_extract_features(): - zero_features = np.zeros(shape) - zero_features_as_features = Features( - features=zero_features, attribute=INTENT, feature_type=SENTENCE, origin=[] + fake_features = np.zeros(shape) + fake_features_as_features = Features( + features=fake_features, attribute=INTENT, feature_type=SENTENCE, origin=[] ) # create zero features - zero_features_list = [zero_features_as_features] + fake_features_list = [fake_features_as_features] # create tracker state features by setting a random index in the array to 1 random_inds = np.random.randint(shape, size=6) list_of_features = [] for idx in random_inds: - current_features = copy.deepcopy(zero_features_as_features) + current_features = copy.deepcopy(fake_features_as_features) current_features.features[idx] = 1 list_of_features.append([current_features]) @@ -168,11 +168,11 @@ def test_extract_features(): attribute_masks, dense_features, sparse_features, - ) = model_data_utils._extract_features(tracker_features, zero_features_list, INTENT) + ) = model_data_utils._extract_features(tracker_features, fake_features_list, INTENT) expected_mask = np.array([[1, 0, 1], [0, 0, 1], [1, 1, 1]]) assert np.all(np.squeeze(np.array(attribute_masks), 2) == expected_mask) - assert np.array(dense_features[SENTENCE]).shape[-1] == zero_features.shape[-1] + assert np.array(dense_features[SENTENCE]).shape[-1] == fake_features.shape[-1] assert sparse_features == {} From ee85c17eb0d442dcc0cbb6064f8d30ca36f8ad0c Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Fri, 13 Nov 2020 10:29:42 +0100 Subject: [PATCH 57/62] pre compute dialogue_indices --- examples/e2ebot/config.yml | 4 +- rasa/core/policies/ted_policy.py | 84 +++++++++++++++++--------------- rasa/utils/tensorflow/models.py | 2 +- 3 files changed, 47 insertions(+), 43 deletions(-) diff --git a/examples/e2ebot/config.yml b/examples/e2ebot/config.yml index f38558adb0ad..2d4a08aa4ae6 100644 --- a/examples/e2ebot/config.yml +++ b/examples/e2ebot/config.yml @@ -9,8 +9,8 @@ pipeline: analyzer: char_wb min_ngram: 1 max_ngram: 4 - - name: DIETClassifier - epochs: 200 +# - name: DIETClassifier +# epochs: 200 policies: - name: TEDPolicy epochs: 200 diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 02c9f06cebda..812a04fc4478 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -868,6 +868,24 @@ def _prepare_encoding_layers(self, name: Text) -> None: # ---GRAPH BUILDING HELPERS--- + @staticmethod + def _compute_dialogue_indices( + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] + ) -> None: + dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32) + # wrap in a list, because that's the structure of tf_batch_data + tf_batch_data[DIALOGUE][IDS] = [ + ( + tf.map_fn( + tf.range, + dialogue_lengths, + fn_output_signature=tf.RaggedTensorSpec( + shape=[None], dtype=tf.int32 + ), + ) + ).values + ] + def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: all_label_ids = self.tf_label_data[LABEL_KEY][LABEL_SUB_KEY][0] # labels cannot have all features "fake" @@ -899,7 +917,7 @@ def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: return all_label_ids, all_labels_embed - def _emebed_dialogue( + def _embed_dialogue( self, dialogue_in: tf.Tensor, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], @@ -1047,20 +1065,7 @@ def _encode_real_features_per_attribute( text_sequence_lengths = sequence_lengths if self.use_only_last_dialogue_turn: - # get the location of all last dialogue inputs - dialogue_lengths = tf.cast( - tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 - ) - # TODO precompute dialogue_indices after creation of tf_batch_data - dialogue_indices = ( - tf.map_fn( - tf.range, - dialogue_lengths, - fn_output_signature=tf.RaggedTensorSpec( - shape=[None], dtype=tf.int32 - ), - ) - ).values + # Get the location of all last dialogue inputs. # Since use_only_last_dialogue_turn is True, # we need to find the locations of last dialogue turns in # (combined batch dimension and dialogue length,) dimension, @@ -1088,7 +1093,10 @@ def _encode_real_features_per_attribute( last_dialogue_mask = tf.math.logical_not( tf.cast( tf.concat( - [dialogue_indices, tf.zeros((1,), dtype=tf.int32)], + [ + tf_batch_data[DIALOGUE][IDS][0], + tf.zeros((1,), dtype=tf.int32), + ], axis=0, )[1:], dtype=tf.bool, @@ -1128,23 +1136,12 @@ def _encode_real_features_per_attribute( attribute_features ) - # attribute_mask has shape batch x dialogue_len x 1 - attribute_mask = tf_batch_data[attribute][MASK][0] - - if attribute in SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES: - dialogue_lengths = tf.cast( - tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 - ) - else: - # for labels, dialogue length is a fake dim and equal to 1 - dialogue_lengths = tf.ones((tf.shape(attribute_mask)[0],), dtype=tf.int32) - # attribute features have shape # (combined batch dimension and dialogue length x 1 x units) # convert them back to their original shape of # batch size x dialogue length x units attribute_features = self._convert_to_original_shape( - attribute_features, attribute_mask, dialogue_lengths + attribute_features, tf_batch_data, attribute ) return attribute_features, text_transformer_output, text_sequence_lengths @@ -1152,8 +1149,8 @@ def _encode_real_features_per_attribute( @staticmethod def _convert_to_original_shape( attribute_features: tf.Tensor, - attribute_mask: tf.Tensor, - dialogue_lengths: tf.Tensor, + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + attribute: Text, ) -> tf.Tensor: """Transform attribute features back to original shape. @@ -1178,6 +1175,19 @@ def _convert_to_original_shape( # mapping the values of attribute features to the position in the resulting # tensor. + # attribute_mask has shape batch x dialogue_len x 1 + attribute_mask = tf_batch_data[attribute][MASK][0] + + if attribute in SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES: + dialogue_lengths = tf.cast( + tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 + ) + dialogue_indices = tf_batch_data[DIALOGUE][IDS][0] + else: + # for labels, dialogue length is a fake dim and equal to 1 + dialogue_lengths = tf.ones((tf.shape(attribute_mask)[0],), dtype=tf.int32) + dialogue_indices = tf.zeros((tf.shape(attribute_mask)[0],), dtype=tf.int32) + batch_dim = tf.shape(attribute_mask)[0] dialogue_dim = tf.shape(attribute_mask)[1] units = attribute_features.shape[-1] @@ -1188,14 +1198,6 @@ def _convert_to_original_shape( non_fake_dialogue_lengths = tf.reduce_sum(attribute_mask, axis=-1) # create the batch indices batch_indices = tf.repeat(tf.range(batch_dim), non_fake_dialogue_lengths) - # TODO precompute dialogue_indices after creation of tf_batch_data - dialogue_indices = ( - tf.map_fn( - tf.range, - dialogue_lengths, - fn_output_signature=tf.RaggedTensorSpec(shape=[None], dtype=tf.int32), - ) - ).values # attribute_mask has shape (batch x dialogue_len x 1), while # dialogue_indices has shape (combined_dialogue_len,) @@ -1427,6 +1429,7 @@ def batch_loss( The loss of the given batch. """ tf_batch_data = self.batch_to_model_data_format(batch_in, self.data_signature) + self._compute_dialogue_indices(tf_batch_data) all_label_ids, all_labels_embed = self._create_all_labels_embed() @@ -1442,7 +1445,7 @@ def batch_loss( dialogue_embed, dialogue_mask, dialogue_transformer_output, - ) = self._emebed_dialogue(dialogue_in, tf_batch_data) + ) = self._embed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) losses = [] @@ -1501,6 +1504,7 @@ def batch_predict( tf_batch_data = self.batch_to_model_data_format( batch_in, self.predict_data_signature ) + self._compute_dialogue_indices(tf_batch_data) ( dialogue_in, @@ -1511,7 +1515,7 @@ def batch_predict( dialogue_embed, dialogue_mask, dialogue_transformer_output, - ) = self._emebed_dialogue(dialogue_in, tf_batch_data) + ) = self._embed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) sim_all = self._tf_layers[f"loss.{LABEL}"].sim( diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 50e4903814c0..2bad5f180e06 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -186,7 +186,7 @@ def fit( batch_strategy: Text, silent: bool = False, loading: bool = False, - eager: bool = False, + eager: bool = True, ) -> None: """Fit model data""" From fc48d4af957915c469365c0dcc641a60dc619b39 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Fri, 13 Nov 2020 10:37:56 +0100 Subject: [PATCH 58/62] create helper methods --- rasa/core/policies/ted_policy.py | 107 ++++++++++++++++--------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 812a04fc4478..5e24475825d6 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -690,7 +690,7 @@ def load(cls, path: Union[Text, Path]) -> "TEDPolicy": config=meta, # during prediction we don't care about previous dialogue turns, # so to save computation time, use only the last one - use_only_last_dialogue_turn=True, + use_only_last_dialogue_turns=True, label_data=label_data, entity_tag_specs=entity_tag_specs, ) @@ -724,13 +724,13 @@ def __init__( self, data_signature: Dict[Text, Dict[Text, List[FeatureSignature]]], config: Dict[Text, Any], - use_only_last_dialogue_turn: bool, + use_only_last_dialogue_turns: bool, label_data: RasaModelData, entity_tag_specs: Optional[List[EntityTagSpec]], ) -> None: super().__init__("TED", config, data_signature, label_data) - self.use_only_last_dialogue_turn = use_only_last_dialogue_turn + self.use_only_last_dialogue_turns = use_only_last_dialogue_turns self.predict_data_signature = { feature_name: features @@ -931,7 +931,7 @@ def _embed_dialogue( ) dialogue_transformed = tfa.activations.gelu(dialogue_transformed) - if self.use_only_last_dialogue_turn: + if self.use_only_last_dialogue_turns: # pick last vector if max history featurizer is used dialogue_transformed = tf.expand_dims( self._last_token(dialogue_transformed, dialogue_lengths), 1 @@ -1014,6 +1014,49 @@ def _encode_fake_features_per_attribute( return attribute_features, text_transformer_output, text_sequence_lengths + @staticmethod + def _create_last_dialogue_turns_mask( + tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text + ) -> tf.Tensor: + # Since use_only_last_dialogue_turns is True, + # we need to find the locations of last dialogue turns in + # (combined batch dimension and dialogue length,) dimension, + # so that we can use `_sequence_lengths` as a boolean mask to pick + # which ones are "real" textual input in these last dialogue turns. + + # In order to do that we can use given `dialogue_lengths`. + # For example: + # If we have `dialogue_lengths = [2, 1, 3]`, than + # `dialogue_indices = [0, 1, 0, 0, 1, 2]` here we can spot that `0` + # always indicates the first dialogue turn, + # which means that previous dialogue turn is the last dialogue turn. + # Combining this with the fact that the last element in + # `dialogue_indices` is always the last dialogue turn, we can add + # a `0` to the end, getting + # `_dialogue_indices = [0, 1, 0, 0, 1, 2, 0]`. + # Then removing the first element + # `_last_dialogue_turn_inverse_indicator = [1, 0, 0, 1, 2, 0]` + # we see that `0` points to the last dialogue turn. + # We convert all positive numbers to `True` and take + # the inverse mask to get + # `last_dialogue_mask = [0, 1, 1, 0, 0, 1], + # which precisely corresponds to the fact that first dialogue is of + # length 2, the second 1 and the third 3. + last_dialogue_turn_mask = tf.math.logical_not( + tf.cast( + tf.concat( + [tf_batch_data[DIALOGUE][IDS][0], tf.zeros((1,), dtype=tf.int32)], + axis=0, + )[1:], + dtype=tf.bool, + ) + ) + # get only the indices of real inputs + return tf.boolean_mask( + last_dialogue_turn_mask, + tf.reshape(tf_batch_data[attribute][SEQUENCE_LENGTH][0], (-1,)), + ) + def _encode_real_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: @@ -1034,10 +1077,10 @@ def _encode_real_features_per_attribute( if attribute in SEQUENCE_FEATURES_TO_ENCODE: # sequence_lengths contain `0` for "fake" features, while # tf_batch_data[attribute] contain only "real" features - _sequence_lengths = tf_batch_data[attribute][SEQUENCE_LENGTH][0] + sequence_lengths = tf_batch_data[attribute][SEQUENCE_LENGTH][0] # extract only nonzero lengths and cast to int sequence_lengths = tf.cast( - tf.boolean_mask(_sequence_lengths, _sequence_lengths), dtype=tf.int32 + tf.boolean_mask(sequence_lengths, sequence_lengths), dtype=tf.int32 ) # boolean mask returns flat tensor sequence_lengths = tf.expand_dims(sequence_lengths, axis=-1) @@ -1064,55 +1107,17 @@ def _encode_real_features_per_attribute( text_transformer_output = attribute_features text_sequence_lengths = sequence_lengths - if self.use_only_last_dialogue_turn: - # Get the location of all last dialogue inputs. - # Since use_only_last_dialogue_turn is True, - # we need to find the locations of last dialogue turns in - # (combined batch dimension and dialogue length,) dimension, - # so that we can use `_sequence_lengths` as a boolean mask to pick - # which ones are "real" textual input in these last dialogue turns. - - # In order to do that we can use given `dialogue_lengths`. - # For example: - # If we have `dialogue_lengths = [2, 1, 3]`, than - # `dialogue_indices = [0, 1, 0, 0, 1, 2]` here we can spot that `0` - # always indicates the first dialogue turn, - # which means that previous dialogue turn is the last dialogue turn. - # Combining this with the fact that the last element in - # `dialogue_indices` is always the last dialogue turn, we can add - # a `0` to the end, getting - # `_dialogue_indices = [0, 1, 0, 0, 1, 2, 0]`. - # Then removing the first element - # `_last_dialogue_turn_inverse_indicator = [1, 0, 0, 1, 2, 0]` - # we see that `0` points to the last dialogue turn. - # We convert all positive numbers to `True` and take - # the inverse mask to get - # `last_dialogue_mask = [0, 1, 1, 0, 0, 1], - # which precisely corresponds to the fact that first dialogue is of - # length 2, the second 1 and the third 3. - last_dialogue_mask = tf.math.logical_not( - tf.cast( - tf.concat( - [ - tf_batch_data[DIALOGUE][IDS][0], - tf.zeros((1,), dtype=tf.int32), - ], - axis=0, - )[1:], - dtype=tf.bool, - ) - ) - - # get only the indices of real text inputs - last_dialogue_mask = tf.boolean_mask( - last_dialogue_mask, tf.reshape(_sequence_lengths, (-1,)) + if self.use_only_last_dialogue_turns: + # get the location of all last dialogue inputs + last_dialogue_turns_mask = self._create_last_dialogue_turns_mask( + tf_batch_data, attribute ) # pick last vector if max history featurizer is used text_transformer_output = tf.boolean_mask( - text_transformer_output, last_dialogue_mask + text_transformer_output, last_dialogue_turns_mask ) text_sequence_lengths = tf.boolean_mask( - text_sequence_lengths, last_dialogue_mask + text_sequence_lengths, last_dialogue_turns_mask ) # resulting attribute features will have shape @@ -1306,7 +1311,7 @@ def _reshape_for_entities( attribute_mask = tf_batch_data[TEXT][MASK][0] dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) - if self.use_only_last_dialogue_turn: + if self.use_only_last_dialogue_turns: # pick last vector if max history featurizer is used attribute_mask = tf.expand_dims( self._last_token(attribute_mask, dialogue_lengths), axis=1 From 419f90c65613fe853e22f0765c8182167c012550 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Fri, 13 Nov 2020 11:41:26 +0100 Subject: [PATCH 59/62] calculate number of units for text_transformer_output --- rasa/core/policies/ted_policy.py | 70 ++++++++++++++++++++++---------- rasa/utils/tensorflow/models.py | 2 +- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 5e24475825d6..2f213859674d 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -259,8 +259,6 @@ class TEDPolicy(Policy): # By default all features in the pipeline are used. FEATURIZERS: [], # If set to true, entities are predicted in user utterances. - # TODO Do not communicate this option to users yet as we have to run some - # experiments first. ENTITY_RECOGNITION: True, } @@ -968,43 +966,71 @@ def _encode_features_per_attribute( lambda: self._encode_fake_features_per_attribute(tf_batch_data, attribute), ) + def _get_dense_units( + self, attribute_features_list: List[tf.Tensor], attribute: Text + ) -> int: + units = 0 + for f in attribute_features_list: + if isinstance(f, tf.SparseTensor): + units += self.config[DENSE_DIMENSION][attribute] + else: + units += f.shape[-1] + return units + + def _get_concat_units( + self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text + ) -> int: + # calculate concat sequence sentence dim + sentence_units = self._get_dense_units( + tf_batch_data[attribute][SENTENCE], attribute + ) + sequence_units = self._get_dense_units( + tf_batch_data[attribute][SEQUENCE], attribute + ) + + if sequence_units and not sentence_units: + return sequence_units + + if sentence_units and not sequence_units: + return sentence_units + + if sentence_units != sequence_units: + return self.config[CONCAT_DIMENSION][TEXT] + + return sentence_units + def _encode_fake_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: - attribute_features_list = tf_batch_data[attribute][SENTENCE] + # we need to create real zero tensors with appropriate batch and dialogue dim + # because they are passed to dialogue transformer attribute_mask = tf_batch_data[attribute][MASK][0] batch_dim = tf.shape(attribute_mask)[0] dialogue_dim = tf.shape(attribute_mask)[1] - if attribute in set(SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE): units = self.config[ENCODING_DIMENSION] else: - units = 0 - for f in attribute_features_list: - if isinstance(f, tf.SparseTensor): - units += self.config[DENSE_DIMENSION][attribute] - else: - units += f.shape[-1] + units = self._get_dense_units(tf_batch_data[attribute][SENTENCE], attribute) attribute_features = tf.zeros( (batch_dim, dialogue_dim, units), dtype=tf.float32 ) if attribute == TEXT: - # TODO handle the case if transformer is not created - # if self.config[f"{DIALOGUE}_{NUM_TRANSFORMER_LAYERS}"] > 0: - # units = self.config[f"{DIALOGUE}_{TRANSFORMER_SIZE}"] - # elif self.config[HIDDEN_LAYERS_SIZES][TEXT]: - # units = self.config[HIDDEN_LAYERS_SIZES][TEXT] - # else: - # for f in attribute_features_list: - # if isinstance(f, tf.SparseTensor): - # units += self.config[DENSE_DIMENSION][attribute] - # else: - # units += f.shape[-1] + # if the input features are fake, we don't process them further, + # but we need to calculate correct last dim (units) so that tf could infer + # the last shape of the tensors + if self.config[f"{DIALOGUE}_{NUM_TRANSFORMER_LAYERS}"] > 0: + text_transformer_units = self.config[f"{DIALOGUE}_{TRANSFORMER_SIZE}"] + elif self.config[HIDDEN_LAYERS_SIZES][TEXT]: + text_transformer_units = self.config[HIDDEN_LAYERS_SIZES][TEXT][-1] + else: + text_transformer_units = self._get_concat_units( + tf_batch_data, attribute + ) text_transformer_output = tf.zeros( - (0, 0, self.config[f"{DIALOGUE}_{TRANSFORMER_SIZE}"]), dtype=tf.float32 + (0, 0, text_transformer_units), dtype=tf.float32 ) text_sequence_lengths = tf.zeros((0, 1), dtype=tf.int32) else: diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 2bad5f180e06..50e4903814c0 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -186,7 +186,7 @@ def fit( batch_strategy: Text, silent: bool = False, loading: bool = False, - eager: bool = True, + eager: bool = False, ) -> None: """Fit model data""" From 229723af95ca640a3a14fc6eea1632dc475406fa Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Fri, 13 Nov 2020 11:42:55 +0100 Subject: [PATCH 60/62] add todo --- rasa/core/policies/ted_policy.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 2f213859674d..d9084f053a43 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -969,6 +969,7 @@ def _encode_features_per_attribute( def _get_dense_units( self, attribute_features_list: List[tf.Tensor], attribute: Text ) -> int: + # TODO this should be done in corresponding layers once in init units = 0 for f in attribute_features_list: if isinstance(f, tf.SparseTensor): @@ -980,6 +981,7 @@ def _get_dense_units( def _get_concat_units( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text ) -> int: + # TODO this should be done in corresponding layers once in init # calculate concat sequence sentence dim sentence_units = self._get_dense_units( tf_batch_data[attribute][SENTENCE], attribute From 76ca209d6066bcf49ac35ab390194bb17fc1c18c Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Fri, 13 Nov 2020 13:29:40 +0100 Subject: [PATCH 61/62] fix tests --- .../featurizers/single_state_featurizer.py | 2 +- rasa/core/featurizers/tracker_featurizers.py | 2 +- rasa/utils/tensorflow/layers.py | 16 +-- .../test_single_state_featurizers.py | 101 ++++++------------ .../featurizers/test_tracker_featurizer.py | 8 +- tests/shared/core/test_domain.py | 22 ++-- 6 files changed, 62 insertions(+), 89 deletions(-) diff --git a/rasa/core/featurizers/single_state_featurizer.py b/rasa/core/featurizers/single_state_featurizer.py index aa93df4c35c5..e702134a0bf6 100644 --- a/rasa/core/featurizers/single_state_featurizer.py +++ b/rasa/core/featurizers/single_state_featurizer.py @@ -240,7 +240,7 @@ def encode_state( return state_features - def encode_entity( + def encode_entities( self, entity_data: Dict[Text, Any], interpreter: NaturalLanguageInterpreter ) -> Dict[Text, List["Features"]]: """Encode the given entity data with the help of the given interpreter. diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py index 08355883d716..d12b92a42e79 100644 --- a/rasa/core/featurizers/tracker_featurizers.py +++ b/rasa/core/featurizers/tracker_featurizers.py @@ -98,7 +98,7 @@ def _create_entity_tags( ) -> List[List[Dict[Text, List["Features"]]]]: return [ [ - self.state_featurizer.encode_entity(entity_data, interpreter) + self.state_featurizer.encode_entities(entity_data, interpreter) for entity_data in trackers_entities ] for trackers_entities in trackers_as_entities diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 0b0d00e4131a..a9017094e945 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -630,13 +630,15 @@ def body(idx: tf.Tensor, out: tf.Tensor) -> List[tf.Tensor]: # create first random array of indices out1 = rand_idxs() # (1, num_neg) - return tf.while_loop( - cond, - body, - loop_vars=[idx1, out1], - shape_invariants=[idx1.shape, tf.TensorShape([None, self.num_neg])], - parallel_iterations=self.parallel_iterations, - back_prop=False, + return tf.nest.map_structure( + tf.stop_gradient, + tf.while_loop( + cond, + body, + loop_vars=[idx1, out1], + shape_invariants=[idx1.shape, tf.TensorShape([None, self.num_neg])], + parallel_iterations=self.parallel_iterations, + ), )[1] @staticmethod diff --git a/tests/core/featurizers/test_single_state_featurizers.py b/tests/core/featurizers/test_single_state_featurizers.py index a43028a64784..2f5819e8e659 100644 --- a/tests/core/featurizers/test_single_state_featurizers.py +++ b/tests/core/featurizers/test_single_state_featurizers.py @@ -19,6 +19,7 @@ ENTITY_ATTRIBUTE_VALUE, ENTITY_ATTRIBUTE_START, ENTITY_ATTRIBUTE_END, + ENTITY_TAGS, ) from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS from rasa.shared.nlu.interpreter import RegexInterpreter @@ -186,61 +187,41 @@ def test_single_state_featurizer_with_entity_roles_and_groups( from rasa.core.agent import Agent interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter - + # TODO roles and groups are not supported in e2e yet + domain = Domain( + intents=[], + entities=["city", f"city{ENTITY_LABEL_SEPARATOR}to"], + slots=[], + templates={}, + forms={}, + action_names=[], + ) f = SingleStateFeaturizer() - f._default_feature_states[INTENT] = {"inform": 0, "greet": 1} - f._default_feature_states[ENTITIES] = { - "city": 0, - "name": 1, - f"city{ENTITY_LABEL_SEPARATOR}to": 2, - f"city{ENTITY_LABEL_SEPARATOR}from": 3, - } - f._default_feature_states[ACTION_NAME] = { - "utter_ask_where_to": 0, - "utter_greet": 1, - "action_listen": 2, - } - f._default_feature_states[SLOTS] = {"slot_1": 0, "slot_2": 1, "slot_3": 2} - f._default_feature_states[ACTIVE_LOOP] = { - "active_loop_1": 0, - "active_loop_2": 1, - "active_loop_3": 2, - "active_loop_4": 3, - } - encoded = f.encode_state( + f.prepare_from_domain(domain) + encoded = f.encode_entities( { - "user": { - "text": "I am flying from London to Paris", - "intent": "inform", - "entities": [ - { - ENTITY_ATTRIBUTE_TYPE: "city", - ENTITY_ATTRIBUTE_VALUE: "London", - ENTITY_ATTRIBUTE_START: 17, - ENTITY_ATTRIBUTE_END: 23, - }, - { - ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to", - ENTITY_ATTRIBUTE_VALUE: "Paris", - ENTITY_ATTRIBUTE_START: 27, - ENTITY_ATTRIBUTE_END: 32, - }, - ], - }, - "prev_action": { - "action_name": "action_listen", - "action_text": "throw a ball", - }, - "active_loop": {"name": "active_loop_4"}, - "slots": {"slot_1": (1.0,)}, + TEXT: "I am flying from London to Paris", + ENTITIES: [ + { + ENTITY_ATTRIBUTE_TYPE: "city", + ENTITY_ATTRIBUTE_VALUE: "London", + ENTITY_ATTRIBUTE_START: 17, + ENTITY_ATTRIBUTE_END: 23, + }, + { + ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to", + ENTITY_ATTRIBUTE_VALUE: "Paris", + ENTITY_ATTRIBUTE_START: 27, + ENTITY_ATTRIBUTE_END: 32, + }, + ], }, interpreter=interpreter, ) - # check all the features are encoded and *_text features are encoded by a densefeaturizer - assert sorted(list(encoded.keys())) == sorted( - [TEXT, ENTITIES, ACTION_NAME, SLOTS, ACTIVE_LOOP, INTENT, ACTION_TEXT] + assert sorted(list(encoded.keys())) == sorted([ENTITY_TAGS]) + assert np.all( + encoded[ENTITY_TAGS][0].features == [[0], [0], [0], [0], [1], [0], [2]] ) - assert np.all(encoded[ENTITIES][0].features.toarray() == [1, 0, 1, 0]) def test_single_state_featurizer_uses_dtype_float(): @@ -268,7 +249,7 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen( interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter f = SingleStateFeaturizer() - f._default_feature_states[INTENT] = {"inform": 0, "greet": 1} + f._default_feature_states[INTENT] = {"greet": 0, "inform": 1} f._default_feature_states[ENTITIES] = { "city": 0, "name": 1, @@ -280,7 +261,8 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen( "utter_greet": 1, "action_listen": 2, } - f._default_feature_states[SLOTS] = {"slot_1": 0, "slot_2": 1, "slot_3": 2} + # `_0` in slots represent feature dimension + f._default_feature_states[SLOTS] = {"slot_1_0": 0, "slot_2_0": 1, "slot_3_0": 2} f._default_feature_states[ACTIVE_LOOP] = { "active_loop_1": 0, "active_loop_2": 1, @@ -292,20 +274,7 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen( "user": { "text": "I am flying from London to Paris", "intent": "inform", - "entities": [ - { - ENTITY_ATTRIBUTE_TYPE: "city", - ENTITY_ATTRIBUTE_VALUE: "London", - ENTITY_ATTRIBUTE_START: 17, - ENTITY_ATTRIBUTE_END: 23, - }, - { - ENTITY_ATTRIBUTE_TYPE: f"city{ENTITY_LABEL_SEPARATOR}to", - ENTITY_ATTRIBUTE_VALUE: "Paris", - ENTITY_ATTRIBUTE_START: 27, - ENTITY_ATTRIBUTE_END: 32, - }, - ], + "entities": ["city", f"city{ENTITY_LABEL_SEPARATOR}to"], }, "prev_action": { "action_name": "action_listen", @@ -328,7 +297,7 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen( assert ( encoded[ACTION_NAME][0].features != scipy.sparse.coo_matrix([[0, 0, 1]]) ).nnz == 0 - assert encoded[ENTITIES][0].features.shape[-1] == 1 + assert encoded[ENTITIES][0].features.shape[-1] == 4 assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]])).nnz == 0 assert ( encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]]) diff --git a/tests/core/featurizers/test_tracker_featurizer.py b/tests/core/featurizers/test_tracker_featurizer.py index 98f323bd3279..f6b904d8397b 100644 --- a/tests/core/featurizers/test_tracker_featurizer.py +++ b/tests/core/featurizers/test_tracker_featurizer.py @@ -67,7 +67,7 @@ def test_featurize_trackers_with_full_dialogue_tracker_featurizer( tracker = tracker_from_dialogue_file( "data/test_dialogues/moodbot.json", moodbot_domain ) - state_features, labels = tracker_featurizer.featurize_trackers( + state_features, labels, entity_tags = tracker_featurizer.featurize_trackers( [tracker], moodbot_domain, RegexInterpreter() ) @@ -75,6 +75,8 @@ def test_featurize_trackers_with_full_dialogue_tracker_featurizer( assert len(state_features) > 0 assert labels is not None assert len(labels) > 0 + # moodbot doesn't contain e2e entities + assert not any([any(turn_tags) for turn_tags in entity_tags]) def test_featurize_trackers_with_max_history_tracker_featurizer(moodbot_domain: Domain): @@ -84,7 +86,7 @@ def test_featurize_trackers_with_max_history_tracker_featurizer(moodbot_domain: tracker = tracker_from_dialogue_file( "data/test_dialogues/moodbot.json", moodbot_domain ) - state_features, labels = tracker_featurizer.featurize_trackers( + state_features, labels, entity_tags = tracker_featurizer.featurize_trackers( [tracker], moodbot_domain, RegexInterpreter() ) @@ -92,3 +94,5 @@ def test_featurize_trackers_with_max_history_tracker_featurizer(moodbot_domain: assert len(state_features) > 0 assert labels is not None assert len(labels) > 0 + # moodbot doesn't contain e2e entities + assert not any([any(turn_tags) for turn_tags in entity_tags]) diff --git a/tests/shared/core/test_domain.py b/tests/shared/core/test_domain.py index 800f04ffd5a1..630542fa44e4 100644 --- a/tests/shared/core/test_domain.py +++ b/tests/shared/core/test_domain.py @@ -75,7 +75,7 @@ async def test_create_train_data_no_history(default_domain: Domain): assert hashed == [ "[{}]", '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', - '[{"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', + '[{"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', '[{"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "utter_default"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', @@ -83,7 +83,7 @@ async def test_create_train_data_no_history(default_domain: Domain): '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', - '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', + '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', ] @@ -104,13 +104,13 @@ async def test_create_train_data_with_history(default_domain: Domain): hashed = sorted(hashed) assert hashed == [ - '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', + '[{"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}, {"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}]', '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "utter_default"}, "user": {"intent": "default"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', - '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', - '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', - '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": [{"end": 22, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', + '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"intent": "default"}}]', + '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', + '[{}, {"prev_action": {"action_name": "action_listen"}, "slots": {"name": [1.0]}, "user": {"entities": ["name"], "intent": "greet"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "default"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}, {"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', '[{}, {"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}]', @@ -156,7 +156,7 @@ async def test_create_train_data_unfeaturized_entities(): assert hashed == [ "[{}]", '[{"prev_action": {"action_name": "utter_greet"}, "user": {"intent": "greet"}}]', - '[{"prev_action": {"action_name": "utter_greet"}, "user": {"entities": [{"end": 81, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', + '[{"prev_action": {"action_name": "utter_greet"}, "user": {"entities": ["name"], "intent": "greet"}}]', '[{"prev_action": {"action_name": "utter_goodbye"}, "user": {"intent": "goodbye"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "why"}}]', '[{"prev_action": {"action_name": "utter_default"}, "user": {"intent": "thank"}}]', @@ -166,9 +166,9 @@ async def test_create_train_data_unfeaturized_entities(): '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "thank"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "greet"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"intent": "goodbye"}}]', - '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": [{"end": 81, "entity": "name", "start": 5, "value": "Peter"}], "intent": "greet"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": [], "intent": "default"}}]', '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": [], "intent": "ask"}}]', + '[{"prev_action": {"action_name": "action_listen"}, "user": {"entities": ["name"], "intent": "greet"}}]', ] @@ -1057,7 +1057,7 @@ def test_get_featurized_entities(): featurized_entities = domain._get_featurized_entities(user_uttered) - assert featurized_entities == [] + assert featurized_entities == set() user_uttered = UserUttered( text="I am going to London", @@ -1067,6 +1067,4 @@ def test_get_featurized_entities(): featurized_entities = domain._get_featurized_entities(user_uttered) - assert featurized_entities == [ - {"entity": "GPE", "role": "destination", "value": "London"} - ] + assert featurized_entities == {"GPE", f"GPE{ENTITY_LABEL_SEPARATOR}destination"} From ce4098e55ead338d3b164ed9f292013a43998b95 Mon Sep 17 00:00:00 2001 From: Vova Vv Date: Fri, 13 Nov 2020 17:21:44 +0100 Subject: [PATCH 62/62] use indices constant --- examples/e2ebot/config.yml | 4 ++-- examples/e2ebot/domain.yml | 2 +- rasa/core/policies/ted_policy.py | 10 +++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/e2ebot/config.yml b/examples/e2ebot/config.yml index 2d4a08aa4ae6..f38558adb0ad 100644 --- a/examples/e2ebot/config.yml +++ b/examples/e2ebot/config.yml @@ -9,8 +9,8 @@ pipeline: analyzer: char_wb min_ngram: 1 max_ngram: 4 -# - name: DIETClassifier -# epochs: 200 + - name: DIETClassifier + epochs: 200 policies: - name: TEDPolicy epochs: 200 diff --git a/examples/e2ebot/domain.yml b/examples/e2ebot/domain.yml index d884f4cc40c3..5c35c3d83a7b 100644 --- a/examples/e2ebot/domain.yml +++ b/examples/e2ebot/domain.yml @@ -1,5 +1,5 @@ version: "2.0" - +# TODO create a bot that makes sense actions: - utter_greet - utter_happy diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index d9084f053a43..b228ea4ec2e8 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -108,6 +108,7 @@ LABEL_KEY = LABEL LABEL_SUB_KEY = IDS LENGTH = "length" +INDICES = "indices" SENTENCE_FEATURES_TO_ENCODE = [INTENT, TEXT, ACTION_NAME, ACTION_TEXT] SEQUENCE_FEATURES_TO_ENCODE = [TEXT, ACTION_TEXT, f"{LABEL}_{ACTION_TEXT}"] LABEL_FEATURES_TO_ENCODE = [f"{LABEL}_{ACTION_NAME}", f"{LABEL}_{ACTION_TEXT}"] @@ -872,7 +873,7 @@ def _compute_dialogue_indices( ) -> None: dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32) # wrap in a list, because that's the structure of tf_batch_data - tf_batch_data[DIALOGUE][IDS] = [ + tf_batch_data[DIALOGUE][INDICES] = [ ( tf.map_fn( tf.range, @@ -1073,7 +1074,10 @@ def _create_last_dialogue_turns_mask( last_dialogue_turn_mask = tf.math.logical_not( tf.cast( tf.concat( - [tf_batch_data[DIALOGUE][IDS][0], tf.zeros((1,), dtype=tf.int32)], + [ + tf_batch_data[DIALOGUE][INDICES][0], + tf.zeros((1,), dtype=tf.int32), + ], axis=0, )[1:], dtype=tf.bool, @@ -1215,7 +1219,7 @@ def _convert_to_original_shape( dialogue_lengths = tf.cast( tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 ) - dialogue_indices = tf_batch_data[DIALOGUE][IDS][0] + dialogue_indices = tf_batch_data[DIALOGUE][INDICES][0] else: # for labels, dialogue length is a fake dim and equal to 1 dialogue_lengths = tf.ones((tf.shape(attribute_mask)[0],), dtype=tf.int32)