From 1e6fa3fca46da01a7d6292e6e4f794dfdd070612 Mon Sep 17 00:00:00 2001 From: Vladimir Vlasov Date: Mon, 9 Nov 2020 15:58:11 +0100 Subject: [PATCH] create empty fakes (#7198) * substitute fake features with empty arrays and use attribute mask to rebuild input * remove unused import, remove comment * refactor, add comments, add types * support empty features * add prepare_for_predict to precalculate self.all_labels_embed * return to default config * add error * add prepare_for_predict to diet * fix test_model_data_utils * fix test gen_batch * Update rasa/core/policies/ted_policy.py Co-authored-by: Tanja * rename to filter fakes and create dial len beforehand * add dtype= * fix comment * add comments about fake features Co-authored-by: Tanja --- examples/e2ebot/domain.yml | 2 +- rasa/core/policies/ted_policy.py | 179 +++++++++++++----- rasa/nlu/classifiers/diet_classifier.py | 9 +- rasa/shared/core/trackers.py | 2 - rasa/utils/tensorflow/model_data.py | 65 +++++-- rasa/utils/tensorflow/model_data_utils.py | 30 ++- rasa/utils/tensorflow/models.py | 10 + tests/core/test_policies.py | 115 +++++++---- .../utils/tensorflow/test_model_data_utils.py | 7 +- 9 files changed, 287 insertions(+), 132 deletions(-) diff --git a/examples/e2ebot/domain.yml b/examples/e2ebot/domain.yml index c15888fdeb38..17b3faba2a75 100644 --- a/examples/e2ebot/domain.yml +++ b/examples/e2ebot/domain.yml @@ -8,4 +8,4 @@ actions: intents: - greet - - mood_great \ No newline at end of file + - mood_great diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 3eaa9399bdb5..98f9c8cd3215 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -4,7 +4,6 @@ from collections import defaultdict import numpy as np -from tensorflow import RaggedTensorSpec import rasa.shared.utils.io import tensorflow as tf @@ -105,7 +104,7 @@ LABEL_SUB_KEY = "ids" LENGTH = "length" SENTENCE_FEATURES_TO_ENCODE = [INTENT, TEXT, ACTION_NAME, ACTION_TEXT] -SEQUENCE_FEATURES_TO_ENCODE = [TEXT, ACTION_TEXT] +SEQUENCE_FEATURES_TO_ENCODE = [TEXT, ACTION_TEXT, f"{LABEL}_{ACTION_TEXT}"] LABEL_FEATURES_TO_ENCODE = [f"{LABEL}_{ACTION_NAME}", f"{LABEL}_{ACTION_TEXT}"] STATE_LEVEL_FEATURES = [ENTITIES, SLOTS, ACTIVE_LOOP] @@ -141,19 +140,19 @@ class TEDPolicy(Policy): # and labels. # The number of hidden layers is equal to the length of the corresponding # list. - HIDDEN_LAYERS_SIZES: {TEXT: [], ACTION_TEXT: []}, + HIDDEN_LAYERS_SIZES: {TEXT: [], ACTION_TEXT: [], f"{LABEL}_{ACTION_TEXT}": []}, DENSE_DIMENSION: { TEXT: 128, ACTION_TEXT: 128, - ENTITIES: 128, - SLOTS: 128, - ACTIVE_LOOP: 128, - f"{LABEL}_{ACTION_TEXT}": 20, + f"{LABEL}_{ACTION_TEXT}": 128, INTENT: 20, ACTION_NAME: 20, f"{LABEL}_{ACTION_NAME}": 20, + ENTITIES: 20, + SLOTS: 20, + ACTIVE_LOOP: 20, }, - CONCAT_DIMENSION: {TEXT: 128, ACTION_TEXT: 128}, + CONCAT_DIMENSION: {TEXT: 128, ACTION_TEXT: 128, f"{LABEL}_{ACTION_TEXT}": 128}, ENCODING_DIMENSION: 50, # Number of units in transformer TRANSFORMER_SIZE: 128, @@ -310,6 +309,12 @@ def _create_label_data( label_data = RasaModelData() label_data.add_data(attribute_data, key_prefix=f"{LABEL_KEY}_") + label_data.add_lengths( + f"{LABEL}_{ACTION_TEXT}", + SEQUENCE_LENGTH, + f"{LABEL}_{ACTION_TEXT}", + SEQUENCE, + ) label_ids = np.arange(domain.num_actions) label_data.add_features( @@ -685,6 +690,8 @@ def _prepare_layers(self) -> None: for name in self.label_signature.keys(): self._prepare_sparse_dense_layer_for(name, self.label_signature) + if name in SEQUENCE_FEATURES_TO_ENCODE: + self._prepare_sequence_layers(name) self._prepare_encoding_layers(name) self._prepare_transformer_layer( @@ -754,9 +761,9 @@ def _prepare_encoding_layers(self, name: Text) -> None: def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]: all_label_ids = self.tf_label_data[LABEL_KEY][LABEL_SUB_KEY][0] - + # labels cannot have all features "fake" all_labels_encoded = { - key: self._encode_features_per_attribute(self.tf_label_data, key) + key: self._encode_real_features_per_attribute(self.tf_label_data, key) for key in self.tf_label_data.keys() if key != LABEL_KEY } @@ -808,7 +815,54 @@ def _emebed_dialogue( def _encode_features_per_attribute( self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text - ) -> Optional[tf.Tensor]: + ) -> tf.Tensor: + # The input is a representation of 4d tensor of + # shape (batch-size x dialogue-len x sequence-len x units) in 3d of shape + # (sum of dialogue history length for all tensors in the batch x + # max sequence length x number of features). + + # However, some dialogue turns contain non existent state features, + # e.g. `intent` and `text` features are mutually exclusive, + # as well as `action_name` and `action_text` are mutually exclusive, + # or some dialogue turns don't contain any `slots`. + # In order to create 4d full tensors, we created "fake" zero features for + # these non existent state features. And filtered them during batch generation. + # Therefore the first dimensions for different attributes are different. + # It could happen that some batches don't contain "real" features at all, + # e.g. large number of stories don't contain any `slots`. + # Therefore actual input tensors will be empty. + # Since we need actual numbers to create dialogue turn features, we create + # zero tensors in `_encode_fake_features_per_attribute` for these attributes. + return tf.cond( + tf.shape(tf_batch_data[attribute][SENTENCE][0])[0] > 0, + lambda: self._encode_real_features_per_attribute(tf_batch_data, attribute), + lambda: self._encode_fake_features_per_attribute(tf_batch_data, attribute), + ) + + def _encode_fake_features_per_attribute( + self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text + ) -> tf.Tensor: + attribute_features_list = tf_batch_data[attribute][SENTENCE] + attribute_mask = tf_batch_data[attribute][MASK][0] + + batch_dim = tf.shape(attribute_mask)[0] + dialogue_dim = tf.shape(attribute_mask)[1] + + if attribute in set(SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE): + units = self.config[ENCODING_DIMENSION] + else: + units = 0 + for f in attribute_features_list: + if isinstance(f, tf.SparseTensor): + units += self.config[DENSE_DIMENSION][attribute] + else: + units += f.shape[-1] + + return tf.zeros((batch_dim, dialogue_dim, units), dtype=tf.float32) + + def _encode_real_features_per_attribute( + self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text + ) -> tf.Tensor: """Encodes features for a given attribute. Args: @@ -819,13 +873,17 @@ def _encode_features_per_attribute( Returns: A tensor combining all features for `attribute` """ - attribute_mask = tf_batch_data[attribute][MASK][0] - if attribute in SEQUENCE_FEATURES_TO_ENCODE: + # sequence_lengths contain `0` for "fake" features, while + # tf_batch_data[attribute] contain only "real" features + _sequence_lengths = tf_batch_data[attribute][SEQUENCE_LENGTH][0] + # extract only nonzero lengths and cast to int _sequence_lengths = tf.cast( - tf_batch_data[attribute][SEQUENCE_LENGTH][0], dtype=tf.int32 + tf.boolean_mask(_sequence_lengths, _sequence_lengths), dtype=tf.int32 ) - _sequence_lengths = tf.squeeze(_sequence_lengths, axis=-1) + # boolean mask returns flat tensor + _sequence_lengths = tf.expand_dims(_sequence_lengths, axis=-1) + mask_sequence_text = tf.squeeze( self._compute_mask(_sequence_lengths), axis=1 ) @@ -859,41 +917,38 @@ def _encode_features_per_attribute( # resulting attribute features will have shape # combined batch dimension and dialogue length x 1 x units attribute_features = self._combine_sparse_dense_features( - tf_batch_data[attribute][SENTENCE], - f"{attribute}_{SENTENCE}", - mask=attribute_mask, + tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}", ) - if attribute in set( - SENTENCE_FEATURES_TO_ENCODE - + SEQUENCE_FEATURES_TO_ENCODE - + LABEL_FEATURES_TO_ENCODE - ): + if attribute in set(SENTENCE_FEATURES_TO_ENCODE + LABEL_FEATURES_TO_ENCODE): attribute_features = self._tf_layers[f"ffnn.{attribute}"]( attribute_features ) - attribute_features = attribute_features * attribute_mask + # attribute_mask has shape batch x dialogue_len x 1 + attribute_mask = tf_batch_data[attribute][MASK][0] - if attribute in set( - SENTENCE_FEATURES_TO_ENCODE - + SEQUENCE_FEATURES_TO_ENCODE - + STATE_LEVEL_FEATURES - ): - # attribute features have shape - # combined batch dimension and dialogue length x 1 x units - # convert them back to their original shape of - # batch size x dialogue length x units - attribute_features = self._convert_to_original_shape( - attribute_features, tf_batch_data + if attribute in set(SENTENCE_FEATURES_TO_ENCODE + STATE_LEVEL_FEATURES): + dialogue_lengths = tf.cast( + tf_batch_data[DIALOGUE][LENGTH][0], dtype=tf.int32 ) - - return attribute_features + else: + # for labels, dialogue length is a fake dim and equal to 1 + dialogue_lengths = tf.ones((tf.shape(attribute_mask)[0],), dtype=tf.int32) + + # attribute features have shape + # (combined batch dimension and dialogue length x 1 x units) + # convert them back to their original shape of + # batch size x dialogue length x units + return self._convert_to_original_shape( + attribute_features, attribute_mask, dialogue_lengths + ) @staticmethod def _convert_to_original_shape( attribute_features: tf.Tensor, - tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], + attribute_mask: tf.Tensor, + dialogue_lengths: tf.Tensor, ) -> tf.Tensor: """Transform attribute features back to original shape. @@ -901,29 +956,34 @@ def _convert_to_original_shape( Original shape: batch x dialogue length x units Args: - attribute_features: the features to convert - tf_batch_data: the batch data + attribute_features: the "real" features to convert + attribute_mask: the tensor containing the position of "real" features + in the dialogue, shape is (batch-size x dialogue_len x 1) + dialogue_lengths: the tensor containing the actual dialogue length, + shape is (batch-size,) Returns: The converted attribute features """ - # dialogue lengths contains the actual dialogue length - # shape is batch-size x 1 - dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32) # in order to convert the attribute features with shape # combined batch-size and dialogue length x 1 x units - # to a shape of - # batch-size x dialogue length x units + # to a shape of batch-size x dialogue length x units # we use tf.scatter_nd. Therefore, we need to the target shape and the indices # mapping the values of attribute features to the position in the resulting # tensor. - batch_dim = tf.size(dialogue_lengths) - dialogue_dim = tf.reduce_max(dialogue_lengths) + batch_dim = tf.shape(attribute_mask)[0] + dialogue_dim = tf.shape(attribute_mask)[1] units = attribute_features.shape[-1] - batch_indices = tf.repeat(tf.range(batch_dim), dialogue_lengths) + # attribute_mask has shape (batch x dialogue_len x 1), remove last dimension + attribute_mask = tf.cast(tf.squeeze(attribute_mask, axis=-1), dtype=tf.int32) + # sum of attribute mask contains number of dialogue turns with "real" features + non_fake_dialogue_lengths = tf.reduce_sum(attribute_mask, axis=-1) + + batch_indices = tf.repeat(tf.range(batch_dim), non_fake_dialogue_lengths) + dialogue_indices = ( tf.map_fn( tf.range, @@ -931,6 +991,17 @@ def _convert_to_original_shape( fn_output_signature=tf.RaggedTensorSpec(shape=[None], dtype=tf.int32), ) ).values + + # attribute_mask has shape (batch x dialogue_len x 1), while + # dialogue_indices has shape (combined_dialogue_len,) + # in order to find positions of real input we need to flatten + # attribute mask to (combined_dialogue_len,) + dialogue_indices_mask = tf.boolean_mask( + attribute_mask, tf.sequence_mask(dialogue_lengths, dtype=tf.int32) + ) + # pick only those indices that contain "real" input + dialogue_indices = tf.boolean_mask(dialogue_indices, dialogue_indices_mask) + indices = tf.stack([batch_indices, dialogue_indices], axis=1) shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units]) @@ -1041,6 +1112,9 @@ def batch_loss( return loss + def prepare_for_predict(self) -> None: + _, self.all_labels_embed = self._create_all_labels_embed() + def batch_predict( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> Dict[Text, tf.Tensor]: @@ -1052,13 +1126,16 @@ def batch_predict( Returns: The output to predict. """ + if self.all_labels_embed is None: + raise ValueError( + "The model was not prepared for prediction. " + "Call `prepare_for_predict` first." + ) + tf_batch_data = self.batch_to_model_data_format( batch_in, self.predict_data_signature ) - if self.all_labels_embed is None: - _, self.all_labels_embed = self._create_all_labels_embed() - dialogue_in = self._process_batch_data(tf_batch_data) dialogue_embed, dialogue_mask = self._emebed_dialogue( dialogue_in, tf_batch_data diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index fe0890c8a66c..09e6ea59b653 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -1580,6 +1580,10 @@ def _update_entity_metrics(self, loss: tf.Tensor, f1: tf.Tensor, tag_name: Text) self.entity_role_loss.update_state(loss) self.entity_role_f1.update_state(f1) + def prepare_for_predict(self) -> None: + if self.config[INTENT_CLASSIFICATION]: + _, self.all_labels_embed = self._create_all_labels() + def batch_predict( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> Dict[Text, tf.Tensor]: @@ -1665,7 +1669,10 @@ def _batch_predict_intents( ) -> Dict[Text, tf.Tensor]: if self.all_labels_embed is None: - _, self.all_labels_embed = self._create_all_labels() + raise ValueError( + "The model was not prepared for prediction. " + "Call `prepare_for_predict` first." + ) # get sentence feature vector for intent classification sentence_vector = self._last_token(text_transformed, sequence_lengths) diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py index 99bc485c631f..c507e5f2c74a 100644 --- a/rasa/shared/core/trackers.py +++ b/rasa/shared/core/trackers.py @@ -455,8 +455,6 @@ def applied_events(self) -> List[Event]: ) if event.use_text_for_featurization is None: event.use_text_for_featurization = use_text_for_featurization - elif event.use_text_for_featurization != use_text_for_featurization: - logger.debug("Got contradicting user featurization info.") applied_events.append(event) else: diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index 6d0c1bca2a92..46cf8fd5bd66 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -1039,6 +1039,23 @@ def _create_label_ids(label_ids: FeatureArray) -> np.ndarray: raise ValueError("Unsupported label_ids dimensions") + @staticmethod + def _filter_out_fake_inputs( + array_of_array_of_features: FeatureArray, + ) -> Union[List[List[np.ndarray]], List[List[scipy.sparse.spmatrix]]]: + return list( + filter( + # filter empty lists created by another filter + lambda x: len(x) > 0, + [ + # filter all the "fake" inputs, we know the input is "fake", + # when sequence dimension is `0` + list(filter(lambda x: x.shape[0] > 0, array_of_features)) + for array_of_features in array_of_array_of_features + ], + ) + ) + @staticmethod def _pad_dense_data(array_of_dense: FeatureArray) -> np.ndarray: """Pad data of different lengths. @@ -1082,6 +1099,17 @@ def _pad_4d_dense_data(array_of_array_of_dense: FeatureArray) -> np.ndarray: # the original shape and the original dialogue length is passed on to the model # it can be used to transform the 3D tensor back into 4D + # in order to create 4d tensor inputs, we created "fake" zero features + # for nonexistent inputs. To save calculation we filter this features before + # input to tf methods. + number_of_features = array_of_array_of_dense[0][0].shape[-1] + array_of_array_of_dense = RasaModelData._filter_out_fake_inputs( + array_of_array_of_dense + ) + if not array_of_array_of_dense: + # return empty 3d array with appropriate last dims + return np.zeros((0, 0, number_of_features), dtype=np.float32) + combined_dialogue_len = sum( len(array_of_dense) for array_of_dense in array_of_array_of_dense ) @@ -1094,11 +1122,7 @@ def _pad_4d_dense_data(array_of_array_of_dense: FeatureArray) -> np.ndarray: ) data_padded = np.zeros( - [ - combined_dialogue_len, - max_seq_len, - array_of_array_of_dense[0][0].shape[-1], - ], + [combined_dialogue_len, max_seq_len, number_of_features,], dtype=array_of_array_of_dense[0][0].dtype, ) @@ -1163,6 +1187,21 @@ def _4d_scipy_matrix_to_values( # the original shape and the original dialogue length is passed on to the model # it can be used to transform the 3D tensor back into 4D + # in order to create 4d tensor inputs, we created "fake" zero features + # for nonexistent inputs. To save calculation we filter this features before + # input to tf methods. + number_of_features = array_of_array_of_sparse[0][0].shape[-1] + array_of_array_of_sparse = RasaModelData._filter_out_fake_inputs( + array_of_array_of_sparse + ) + if not array_of_array_of_sparse: + # create empty array with appropriate last dims + return [ + np.empty((0, 3), dtype=np.int64), + np.array([], dtype=np.float32), + np.array([0, 0, number_of_features], dtype=np.int64), + ] + # we need to make sure that the matrices are coo_matrices otherwise the # transformation does not work (e.g. you cannot access x.row, x.col) if not isinstance(array_of_array_of_sparse[0][0], scipy.sparse.coo_matrix): @@ -1171,9 +1210,10 @@ def _4d_scipy_matrix_to_values( for array_of_sparse in array_of_array_of_sparse ] - combined_dialogue_len = sum( + dialogue_len = [ len(array_of_sparse) for array_of_sparse in array_of_array_of_sparse - ) + ] + combined_dialogue_len = sum(dialogue_len) max_seq_len = max( [ x.shape[0] @@ -1185,15 +1225,7 @@ def _4d_scipy_matrix_to_values( indices = np.hstack( [ np.vstack( - [ - sum( - len(array_of_sparse) - for array_of_sparse in array_of_array_of_sparse[:i] - ) - + j * np.ones_like(x.row), - x.row, - x.col, - ] + [sum(dialogue_len[:i]) + j * np.ones_like(x.row), x.row, x.col,] ) for i, array_of_sparse in enumerate(array_of_array_of_sparse) for j, x in enumerate(array_of_sparse) @@ -1208,7 +1240,6 @@ def _4d_scipy_matrix_to_values( ] ) - number_of_features = array_of_array_of_sparse[0][0].shape[-1] shape = np.array((combined_dialogue_len, max_seq_len, number_of_features)) return [ diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index ffdf1c21d67b..ca3d000b7310 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -191,15 +191,17 @@ def _create_zero_features( ) ) - # create zero_features for nones + # create zero_features for Nones zero_features = [] for _features in example_features: new_features = copy.deepcopy(_features) if _features.is_dense(): - new_features.features = np.zeros_like(_features.features) + new_features.features = np.zeros( + (0, _features.features.shape[-1]), _features.features.dtype + ) if _features.is_sparse(): new_features.features = scipy.sparse.coo_matrix( - _features.features.shape, _features.features.dtype + (0, _features.features.shape[-1]), _features.features.dtype ) zero_features.append(new_features) @@ -337,18 +339,9 @@ def _features_for_attribute( np.array([v[0] for v in values]), number_of_dimensions=3 ) - if consider_dialogue_dimension: - attribute_to_feature_arrays = { - MASK: [FeatureArray(np.array(attribute_masks), number_of_dimensions=4)] - } - else: - attribute_to_feature_arrays = { - MASK: [ - FeatureArray( - np.array(np.squeeze(attribute_masks, -1)), number_of_dimensions=3 - ) - ] - } + attribute_to_feature_arrays = { + MASK: [FeatureArray(np.array(attribute_masks), number_of_dimensions=3)] + } feature_types = set() feature_types.update(list(dense_features.keys())) @@ -433,9 +426,10 @@ def _extract_features( for key, value in dialogue_dense_features.items(): dense_features[key].append(value) - # add additional dimensions to attribute mask to get a 3D vector - # resulting shape dialogue length x 1 x 1 - attribute_mask = np.expand_dims(np.expand_dims(attribute_mask, -1), -1) + # add additional dimension to attribute mask + # to get a vector of shape (dialogue length x 1), + # the batch dim will be added later + attribute_mask = np.expand_dims(attribute_mask, -1) attribute_masks.append(attribute_mask) return attribute_masks, dense_features, sparse_features diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index af78b765fa76..3708f4a8a99b 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -153,6 +153,15 @@ def batch_loss( """ raise NotImplementedError + def prepare_for_predict(self) -> None: + """Prepares tf graph fpr prediction. + + This method should contain necessary tf calculations + and set self variables that are used in `batch_predict`. + For example, pre calculation of `self.all_labels_embed`. + """ + pass + def batch_predict( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> Dict[Text, tf.Tensor]: @@ -311,6 +320,7 @@ def build_for_predict( self, predict_data: RasaModelData, eager: bool = False ) -> None: self._training = False # needed for tf graph mode + self.prepare_for_predict() self._predict_function = self._get_tf_call_model_function( predict_data.as_tf_dataset, self.batch_predict, eager, "prediction" ) diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py index 9ef6239529b6..c54f9e16314c 100644 --- a/tests/core/test_policies.py +++ b/tests/core/test_policies.py @@ -360,21 +360,21 @@ async def test_gen_batch(self, trained_policy, default_domain): ( batch_label_ids, batch_action_name_mask, - batch_action_name_sentence_1, - batch_action_name_sentence_2, - batch_action_name_sentence_3, + batch_action_name_sentence_indices, + batch_action_name_sentence_data, + batch_action_name_sentence_shape, batch_entities_mask, - batch_entities_sentence_1, - batch_entities_sentence_2, - batch_entities_sentence_3, + batch_entities_sentence_indices, + batch_entities_sentence_data, + batch_entities_sentence_shape, batch_intent_mask, - batch_intent_sentence_1, - batch_intent_sentence_2, - batch_intent_sentence_3, + batch_intent_sentence_indices, + batch_intent_sentence_data, + batch_intent_sentence_shape, batch_slots_mask, - batch_slots_sentence_1, - batch_slots_sentence_2, - batch_slots_sentence_3, + batch_slots_sentence_indices, + batch_slots_sentence_data, + batch_slots_sentence_shape, batch_dialogue_length, ) = next(model_data._gen_batch(batch_size=batch_size)) @@ -382,39 +382,57 @@ async def test_gen_batch(self, trained_policy, default_domain): batch_label_ids.shape[0] == batch_size and batch_dialogue_length.shape[0] == batch_size ) - # batch and dialogue dimensions are combined - first_dimension_size = batch_size if self.max_history <= 1 else batch_size + 1 + # batch and dialogue dimensions are NOT combined for masks assert ( - batch_slots_mask.shape[0] == first_dimension_size - and batch_intent_mask.shape[0] == first_dimension_size - and batch_entities_mask.shape[0] == first_dimension_size - and batch_action_name_mask.shape[0] == first_dimension_size + batch_slots_mask.shape[0] == batch_size + and batch_intent_mask.shape[0] == batch_size + and batch_entities_mask.shape[0] == batch_size + and batch_action_name_mask.shape[0] == batch_size + ) + # some features might be "fake" so there sequence is `0` + seq_len = max( + [ + batch_intent_sentence_shape[1], + batch_action_name_sentence_shape[1], + batch_entities_sentence_shape[1], + batch_slots_sentence_shape[1], + ] + ) + assert ( + batch_intent_sentence_shape[1] == seq_len + or batch_intent_sentence_shape[1] == 0 + ) + assert ( + batch_action_name_sentence_shape[1] == seq_len + or batch_action_name_sentence_shape[1] == 0 + ) + assert ( + batch_entities_sentence_shape[1] == seq_len + or batch_entities_sentence_shape[1] == 0 ) assert ( - batch_intent_sentence_3[1] - == batch_action_name_sentence_3[1] - == batch_entities_sentence_3[1] - == batch_slots_sentence_3[1] + batch_slots_sentence_shape[1] == seq_len + or batch_slots_sentence_shape[1] == 0 ) ( batch_label_ids, + batch_action_name_mask, + batch_action_name_sentence_indices, + batch_action_name_sentence_data, + batch_action_name_sentence_shape, batch_entities_mask, - batch_entities_sentence_1, - batch_entities_sentence_2, - batch_entities_sentence_3, + batch_entities_sentence_indices, + batch_entities_sentence_data, + batch_entities_sentence_shape, batch_intent_mask, - batch_intent_sentence_1, - batch_intent_sentence_2, - batch_intent_sentence_3, + batch_intent_sentence_indices, + batch_intent_sentence_data, + batch_intent_sentence_shape, batch_slots_mask, - batch_slots_sentence_1, - batch_slots_sentence_2, - batch_slots_sentence_3, - batch_action_name_mask, - batch_action_name_sentence_1, - batch_action_name_sentence_2, - batch_action_name_sentence_3, + batch_slots_sentence_indices, + batch_slots_sentence_data, + batch_slots_sentence_shape, batch_dialogue_length, ) = next( model_data._gen_batch( @@ -426,11 +444,30 @@ async def test_gen_batch(self, trained_policy, default_domain): batch_label_ids.shape[0] == batch_size and batch_dialogue_length.shape[0] == batch_size ) + # some features might be "fake" so there sequence is `0` + seq_len = max( + [ + batch_intent_sentence_shape[1], + batch_action_name_sentence_shape[1], + batch_entities_sentence_shape[1], + batch_slots_sentence_shape[1], + ] + ) + assert ( + batch_intent_sentence_shape[1] == seq_len + or batch_intent_sentence_shape[1] == 0 + ) + assert ( + batch_action_name_sentence_shape[1] == seq_len + or batch_action_name_sentence_shape[1] == 0 + ) + assert ( + batch_entities_sentence_shape[1] == seq_len + or batch_entities_sentence_shape[1] == 0 + ) assert ( - batch_intent_sentence_3[1] - == batch_action_name_sentence_3[1] - == batch_entities_sentence_3[1] - == batch_slots_sentence_3[1] + batch_slots_sentence_shape[1] == seq_len + or batch_slots_sentence_shape[1] == 0 ) diff --git a/tests/utils/tensorflow/test_model_data_utils.py b/tests/utils/tensorflow/test_model_data_utils.py index 11bc5723d528..f495222958df 100644 --- a/tests/utils/tensorflow/test_model_data_utils.py +++ b/tests/utils/tensorflow/test_model_data_utils.py @@ -43,7 +43,7 @@ def test_create_zero_features(): zero_features = model_data_utils._create_zero_features(features) assert len(zero_features) == 1 assert zero_features[0].is_dense() - assert (zero_features[0].features == np.zeros(shape)).all() + assert zero_features[0].features.shape == (0, shape) # SPARSE FEATURES sparse_feature_sentence_features = Features( @@ -56,7 +56,8 @@ def test_create_zero_features(): zero_features = model_data_utils._create_zero_features(features) assert len(zero_features) == 1 assert zero_features[0].is_sparse() - assert (zero_features[0].features != scipy.sparse.coo_matrix((1, shape))).nnz == 0 + assert zero_features[0].features.shape == (0, shape) + assert zero_features[0].features.nnz == 0 def test_surface_attributes(): @@ -168,7 +169,7 @@ def test_extract_features(): dense_features, sparse_features, ) = model_data_utils._extract_features(tracker_features, zero_features_list, INTENT) - expected_mask = np.array([[[1], [0], [1]], [[0], [0], [1]], [[1], [1], [1]]]) + expected_mask = np.array([[1, 0, 1], [0, 0, 1], [1, 1, 1]]) assert np.all(np.squeeze(np.array(attribute_masks), 2) == expected_mask) assert np.array(dense_features[SENTENCE]).shape[-1] == zero_features.shape[-1]