RasaHQ · Ghostvv · Nov 9, 2020 · Nov 6, 2020 · Nov 6, 2020 · Nov 6, 2020
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
@@ -819,13 +819,20 @@ def _encode_features_per_attribute(
         Returns:
             A tensor combining  all features for `attribute`
         """
-        attribute_mask = tf_batch_data[attribute][MASK][0]
+        # attribute_mask = tf_batch_data[attribute][MASK][0]
 
         if attribute in SEQUENCE_FEATURES_TO_ENCODE:
             _sequence_lengths = tf.cast(
                 tf_batch_data[attribute][SEQUENCE_LENGTH][0], dtype=tf.int32
             )
-            _sequence_lengths = tf.squeeze(_sequence_lengths, axis=-1)
+            _mask_sequence_lengths = tf.cast(_sequence_lengths, dtype=tf.bool)
+            # extract only nonzero lengths
+            _sequence_lengths = tf.boolean_mask(
+                _sequence_lengths, _mask_sequence_lengths
+            )
+            # boolean mask returns flatten tensor
+            _sequence_lengths = tf.expand_dims(_sequence_lengths, axis=-1)
+
             mask_sequence_text = tf.squeeze(
                 self._compute_mask(_sequence_lengths), axis=1
             )
@@ -859,9 +866,7 @@ def _encode_features_per_attribute(
             # resulting attribute features will have shape
             # combined batch dimension and dialogue length x 1 x units
             attribute_features = self._combine_sparse_dense_features(
-                tf_batch_data[attribute][SENTENCE],
-                f"{attribute}_{SENTENCE}",
-                mask=attribute_mask,
+                tf_batch_data[attribute][SENTENCE], f"{attribute}_{SENTENCE}",
             )
 
         if attribute in set(
@@ -873,8 +878,8 @@ def _encode_features_per_attribute(
                 attribute_features
             )
 
-        attribute_features = attribute_features * attribute_mask
-
+        # attribute_mask has shape batch x dialogue_len x 1
+        attribute_mask = tf_batch_data[attribute][MASK][0]
         if attribute in set(
             SENTENCE_FEATURES_TO_ENCODE
             + SEQUENCE_FEATURES_TO_ENCODE
@@ -884,16 +889,22 @@ def _encode_features_per_attribute(
             # combined batch dimension and dialogue length x 1 x units
             # convert them back to their original shape of
             # batch size x dialogue length x units
+            dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32)
+            attribute_features = self._convert_to_original_shape(
+                attribute_features, attribute_mask, dialogue_lengths
+            )
+        elif attribute in LABEL_FEATURES_TO_ENCODE:
             attribute_features = self._convert_to_original_shape(
-                attribute_features, tf_batch_data
+                attribute_features, attribute_mask
             )
 
         return attribute_features
 
     @staticmethod
     def _convert_to_original_shape(
         attribute_features: tf.Tensor,
-        tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]],
+        attribute_mask: tf.Tensor,
+        dialogue_lengths: Optional[tf.Tensor] = None,
     ) -> tf.Tensor:
         """Transform attribute features back to original shape.
 
@@ -909,7 +920,6 @@ def _convert_to_original_shape(
         """
         # dialogue lengths contains the actual dialogue length
         # shape is batch-size x 1
-        dialogue_lengths = tf.cast(tf_batch_data[DIALOGUE][LENGTH][0], tf.int32)
 
         # in order to convert the attribute features with shape
         # combined batch-size and dialogue length x 1 x units
@@ -919,18 +929,37 @@ def _convert_to_original_shape(
         # mapping the values of attribute features to the position in the resulting
         # tensor.
 
-        batch_dim = tf.size(dialogue_lengths)
-        dialogue_dim = tf.reduce_max(dialogue_lengths)
+        batch_dim = tf.shape(attribute_mask)[0]
+        dialogue_dim = tf.shape(attribute_mask)[1]
         units = attribute_features.shape[-1]
+        if dialogue_lengths is None:
+            dialogue_lengths = tf.ones((batch_dim,), dtype=tf.int32)
+
+        attribute_mask = tf.cast(tf.squeeze(attribute_mask, axis=-1), tf.int32)
+        # sum of attribute mask contains number of dialogue turns with "real" features
+        non_fake_dialogue_lengths = tf.reduce_sum(attribute_mask, axis=-1)
+
+        batch_indices = tf.repeat(tf.range(batch_dim), non_fake_dialogue_lengths)
 
-        batch_indices = tf.repeat(tf.range(batch_dim), dialogue_lengths)
         dialogue_indices = (
             tf.map_fn(
                 tf.range,
                 dialogue_lengths,
                 fn_output_signature=tf.RaggedTensorSpec(shape=[None], dtype=tf.int32),
             )
         ).values
+
+        # attribute_mask has shape (batch x dialogue_len x 1), while
+        # dialogue_indices has shape (combined_dialoge_len,)
+        # in order to find positions of real input we use need to flatten
+        # attribute mask to (combined_dialoge_len,)
+        combined_dialoge_len_mask = tf.sequence_mask(dialogue_lengths, dtype=tf.int32)
+        dialogue_indices_mask = tf.boolean_mask(
+            attribute_mask, combined_dialoge_len_mask
+        )
+        # pick only those indices that contain "real" input
+        dialogue_indices = tf.boolean_mask(dialogue_indices, dialogue_indices_mask)
+
         indices = tf.stack([batch_indices, dialogue_indices], axis=1)
 
         shape = tf.convert_to_tensor([batch_dim, dialogue_dim, units])

diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py
@@ -455,8 +455,6 @@ def applied_events(self) -> List[Event]:
                 )
                 if event.use_text_for_featurization is None:
                     event.use_text_for_featurization = use_text_for_featurization
-                elif event.use_text_for_featurization != use_text_for_featurization:
-                    logger.debug("Got contradicting user featurization info.")
 
                 applied_events.append(event)
             else:

diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py
@@ -1039,6 +1039,18 @@ def _create_label_ids(label_ids: FeatureArray) -> np.ndarray:
 
         raise ValueError("Unsupported label_ids dimensions")
 
+    @staticmethod
+    def _filter_4d_arrays(array_of_array_of_features):
+        return list(
+            filter(
+                lambda x: len(x) > 0,
+                [
+                    list(filter(lambda x: x.shape[0] > 0, array_of_features))
+                    for array_of_features in array_of_array_of_features
+                ],
+            )
+        )
+
     @staticmethod
     def _pad_dense_data(array_of_dense: FeatureArray) -> np.ndarray:
         """Pad data of different lengths.
@@ -1082,6 +1094,10 @@ def _pad_4d_dense_data(array_of_array_of_dense: FeatureArray) -> np.ndarray:
         # the original shape and the original dialogue length is passed on to the model
         # it can be used to transform the 3D tensor back into 4D
 
+        array_of_array_of_dense = RasaModelData._filter_4d_arrays(
+            array_of_array_of_dense
+        )
+
         combined_dialogue_len = sum(
             len(array_of_dense) for array_of_dense in array_of_array_of_dense
         )
@@ -1163,6 +1179,10 @@ def _4d_scipy_matrix_to_values(
         # the original shape and the original dialogue length is passed on to the model
         # it can be used to transform the 3D tensor back into 4D
 
+        array_of_array_of_sparse = RasaModelData._filter_4d_arrays(
+            array_of_array_of_sparse
+        )
+
         # we need to make sure that the matrices are coo_matrices otherwise the
         # transformation does not work (e.g. you cannot access x.row, x.col)
         if not isinstance(array_of_array_of_sparse[0][0], scipy.sparse.coo_matrix):
@@ -1171,9 +1191,10 @@ def _4d_scipy_matrix_to_values(
                 for array_of_sparse in array_of_array_of_sparse
             ]
 
-        combined_dialogue_len = sum(
+        dialogue_len = [
             len(array_of_sparse) for array_of_sparse in array_of_array_of_sparse
-        )
+        ]
+        combined_dialogue_len = sum(dialogue_len)
         max_seq_len = max(
             [
                 x.shape[0]
@@ -1185,15 +1206,7 @@ def _4d_scipy_matrix_to_values(
         indices = np.hstack(
             [
                 np.vstack(
-                    [
-                        sum(
-                            len(array_of_sparse)
-                            for array_of_sparse in array_of_array_of_sparse[:i]
-                        )
-                        + j * np.ones_like(x.row),
-                        x.row,
-                        x.col,
-                    ]
+                    [sum(dialogue_len[:i]) + j * np.ones_like(x.row), x.row, x.col,]
                 )
                 for i, array_of_sparse in enumerate(array_of_array_of_sparse)
                 for j, x in enumerate(array_of_sparse)

diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py
@@ -196,10 +196,12 @@ def _create_zero_features(
     for _features in example_features:
         new_features = copy.deepcopy(_features)
         if _features.is_dense():
-            new_features.features = np.zeros_like(_features.features)
+            new_features.features = np.zeros(
+                (0, _features.features.shape[-1]), _features.features.dtype
+            )
         if _features.is_sparse():
             new_features.features = scipy.sparse.coo_matrix(
-                _features.features.shape, _features.features.dtype
+                (0, _features.features.shape[-1]), _features.features.dtype
             )
         zero_features.append(new_features)
 
@@ -337,18 +339,9 @@ def _features_for_attribute(
                 np.array([v[0] for v in values]), number_of_dimensions=3
             )
 
-    if consider_dialogue_dimension:
-        attribute_to_feature_arrays = {
-            MASK: [FeatureArray(np.array(attribute_masks), number_of_dimensions=4)]
-        }
-    else:
-        attribute_to_feature_arrays = {
-            MASK: [
-                FeatureArray(
-                    np.array(np.squeeze(attribute_masks, -1)), number_of_dimensions=3
-                )
-            ]
-        }
+    attribute_to_feature_arrays = {
+        MASK: [FeatureArray(np.array(attribute_masks), number_of_dimensions=3)]
+    }
 
     feature_types = set()
     feature_types.update(list(dense_features.keys()))
@@ -434,8 +427,9 @@ def _extract_features(
             dense_features[key].append(value)
 
         # add additional dimensions to attribute mask to get a 3D vector
-        # resulting shape dialogue length x 1 x 1
-        attribute_mask = np.expand_dims(np.expand_dims(attribute_mask, -1), -1)
+        # resulting shape dialogue length x 1
+        # but keep attribute_mask in 3d: (batch x dialogue_len x 1)
+        attribute_mask = np.expand_dims(attribute_mask, -1)
         attribute_masks.append(attribute_mask)
 
     return attribute_masks, dense_features, sparse_features