From 5c3870fb1a49fc0a9c181683a839c3100890bce2 Mon Sep 17 00:00:00 2001 From: Daksh Date: Sat, 19 Dec 2020 16:21:33 +0100 Subject: [PATCH 01/44] first version --- rasa/utils/tensorflow/layers.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index a9017094e945..aba25b46f86f 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -853,16 +853,33 @@ def _loss_softmax( ) -> tf.Tensor: """Define softmax loss.""" - logits = tf.concat( + softmax_logits = tf.concat( + [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 + ) + + sigmoid_logits = tf.concat( [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 ) # create label_ids for softmax - label_ids = tf.zeros_like(logits[..., 0], tf.int32) + softmax_label_ids = tf.zeros_like(softmax_logits[..., 0], tf.int32) + + sigmoid_label_ids = tf.concat( + [ + tf.expand_dims(tf.ones_like(sigmoid_logits[..., 0], tf.float32), -1), + tf.zeros_like(sigmoid_logits[..., 1:], tf.float32), + ], + axis=-1, + ) - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=label_ids, logits=logits + softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=softmax_label_ids, logits=softmax_logits ) + sigmoid_loss = tf.nn.sigmoid_cross_entropy_with_logits( + labels=sigmoid_label_ids, logits=sigmoid_logits + ) + + loss = softmax_loss + tf.reduce_mean(sigmoid_loss, axis=-1) if self.scale_loss: # in case of cross entropy log_likelihood = -loss @@ -878,6 +895,14 @@ def _loss_softmax( else: loss = tf.reduce_mean(loss, axis=-1) + tf.print( + tf.reduce_mean(sim_pos), + tf.reduce_mean(sim_neg_ii), + tf.reduce_mean(sim_neg_il), + tf.reduce_mean(sim_neg_ll), + tf.reduce_mean(sim_neg_li), + ) + # average the loss over the batch return tf.reduce_mean(loss) From 8cff4ec91f953f465c42e5f634d07eb3247c5ea6 Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 21 Dec 2020 16:23:41 +0100 Subject: [PATCH 02/44] remove extra terms from softmax --- rasa/utils/tensorflow/layers.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index aba25b46f86f..07f16851edde 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -853,9 +853,7 @@ def _loss_softmax( ) -> tf.Tensor: """Define softmax loss.""" - softmax_logits = tf.concat( - [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 - ) + softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) sigmoid_logits = tf.concat( [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 @@ -864,7 +862,7 @@ def _loss_softmax( # create label_ids for softmax softmax_label_ids = tf.zeros_like(softmax_logits[..., 0], tf.int32) - sigmoid_label_ids = tf.concat( + sigmoid_labels = tf.concat( [ tf.expand_dims(tf.ones_like(sigmoid_logits[..., 0], tf.float32), -1), tf.zeros_like(sigmoid_logits[..., 1:], tf.float32), @@ -876,7 +874,7 @@ def _loss_softmax( labels=softmax_label_ids, logits=softmax_logits ) sigmoid_loss = tf.nn.sigmoid_cross_entropy_with_logits( - labels=sigmoid_label_ids, logits=sigmoid_logits + labels=sigmoid_labels, logits=sigmoid_logits ) loss = softmax_loss + tf.reduce_mean(sigmoid_loss, axis=-1) From 7d971d8084dd67ee024d02069efae59c5d15a031 Mon Sep 17 00:00:00 2001 From: Daksh Date: Wed, 6 Jan 2021 15:42:00 +0100 Subject: [PATCH 03/44] refactor based on config option. Ready for test --- rasa/core/policies/ted_policy.py | 4 ++ rasa/nlu/classifiers/diet_classifier.py | 4 ++ rasa/nlu/selectors/response_selector.py | 4 ++ rasa/utils/tensorflow/constants.py | 1 + rasa/utils/tensorflow/layers.py | 62 ++++++++++++++++--------- rasa/utils/tensorflow/models.py | 2 + 6 files changed, 54 insertions(+), 23 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index f06530b26a54..29c3e1ac9dfc 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -99,6 +99,7 @@ HIDDEN_LAYERS_SIZES, FEATURIZERS, ENTITY_RECOGNITION, + CONSTRAIN_SIMILARITIES, ) from rasa.shared.core.events import EntitiesAdded, Event from rasa.shared.nlu.training_data.message import Message @@ -272,6 +273,9 @@ class TEDPolicy(Policy): FEATURIZERS: [], # If set to true, entities are predicted in user utterances. ENTITY_RECOGNITION: True, + # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to + # ensure that similarity values are approximately bounded. Used inside softmax loss only. + CONSTRAIN_SIMILARITIES: True, } @staticmethod diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index d1f26fec25fd..8bf50afefc81 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -95,6 +95,7 @@ SEQUENCE_LENGTH, DENSE_DIMENSION, MASK, + CONSTRAIN_SIMILARITIES, ) logger = logging.getLogger(__name__) @@ -252,6 +253,9 @@ def required_components(cls) -> List[Type[Component]]: # Split entities by comma, this makes sense e.g. for a list of ingredients # in a recipie, but it doesn't make sense for the parts of an address SPLIT_ENTITIES_BY_COMMA: True, + # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to + # ensure that similarity values are approximately bounded. Used inside softmax loss only. + CONSTRAIN_SIMILARITIES: True, } # init helpers diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index ac78b6d3964a..a1b325ba8600 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -73,6 +73,7 @@ FEATURIZERS, CHECKPOINT_MODEL, DENSE_DIMENSION, + CONSTRAIN_SIMILARITIES, ) from rasa.nlu.constants import ( RESPONSE_SELECTOR_PROPERTY_NAME, @@ -230,6 +231,9 @@ def required_components(cls) -> List[Type[Component]]: FEATURIZERS: [], # Perform model checkpointing CHECKPOINT_MODEL: False, + # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to + # ensure that similarity values are approximately bounded. Used inside softmax loss only. + CONSTRAIN_SIMILARITIES: True, } def __init__( diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 29c046258dac..6595fbfa090e 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -38,6 +38,7 @@ DROP_RATE_ATTENTION = "drop_rate_attention" DROP_RATE_DIALOGUE = "drop_rate_dialogue" DROP_RATE_LABEL = "drop_rate_label" +CONSTRAIN_SIMILARITIES = "constrain_similarities" WEIGHT_SPARSITY = "weight_sparsity" diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 07f16851edde..fb1afc334165 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -566,6 +566,10 @@ class DotProductLoss(tf.keras.layers.Layer): to run in parallel. same_sampling: Boolean, if 'True' sample same negative labels for the whole batch. + constrain_similarities: Boolean, if 'True' applies sigmoid on all + similarity terms and adds to the loss function to + ensure that similarity values are approximately bounded. + Used inside _loss_softmax() only. """ def __init__( @@ -580,6 +584,7 @@ def __init__( name: Optional[Text] = None, parallel_iterations: int = 1000, same_sampling: bool = False, + constrain_similarities=True, ) -> None: super().__init__(name=name) self.num_neg = num_neg @@ -591,6 +596,7 @@ def __init__( self.scale_loss = scale_loss self.parallel_iterations = parallel_iterations self.same_sampling = same_sampling + self.constrain_similarities = constrain_similarities @staticmethod def _make_flat(x: tf.Tensor) -> tf.Tensor: @@ -853,31 +859,49 @@ def _loss_softmax( ) -> tf.Tensor: """Define softmax loss.""" + # Similarity terms between input and label should be optimized relative + # to each other and hence use them as logits for softmax term softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) - sigmoid_logits = tf.concat( - [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 - ) + if not self.constrain_similarities: + # Concatenate other similarity terms as well. Due to this, + # similarity values between input and label may not be + # approximately bounded in a defined range. + softmax_logits = tf.concat( + [softmax_logits, sim_neg_ii, sim_neg_ll], axis=-1 + ) # create label_ids for softmax softmax_label_ids = tf.zeros_like(softmax_logits[..., 0], tf.int32) - sigmoid_labels = tf.concat( - [ - tf.expand_dims(tf.ones_like(sigmoid_logits[..., 0], tf.float32), -1), - tf.zeros_like(sigmoid_logits[..., 1:], tf.float32), - ], - axis=-1, - ) - softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=softmax_label_ids, logits=softmax_logits ) - sigmoid_loss = tf.nn.sigmoid_cross_entropy_with_logits( - labels=sigmoid_labels, logits=sigmoid_logits - ) - loss = softmax_loss + tf.reduce_mean(sigmoid_loss, axis=-1) + loss = softmax_loss + + if self.constrain_similarities: + # Constrain similarity values in a range by applying sigmoid + # on them individually so that they saturate at extreme values. + sigmoid_logits = tf.concat( + [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 + ) + + sigmoid_labels = tf.concat( + [ + tf.expand_dims( + tf.ones_like(sigmoid_logits[..., 0], tf.float32), -1 + ), + tf.zeros_like(sigmoid_logits[..., 1:], tf.float32), + ], + axis=-1, + ) + + sigmoid_loss = tf.nn.sigmoid_cross_entropy_with_logits( + labels=sigmoid_labels, logits=sigmoid_logits + ) + + loss += tf.reduce_mean(sigmoid_loss, axis=-1) if self.scale_loss: # in case of cross entropy log_likelihood = -loss @@ -893,14 +917,6 @@ def _loss_softmax( else: loss = tf.reduce_mean(loss, axis=-1) - tf.print( - tf.reduce_mean(sim_pos), - tf.reduce_mean(sim_neg_ii), - tf.reduce_mean(sim_neg_il), - tf.reduce_mean(sim_neg_ll), - tf.reduce_mean(sim_neg_li), - ) - # average the loss over the batch return tf.reduce_mean(loss) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index f4ff88562645..bb81c0ac1772 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -55,6 +55,7 @@ CONCAT_DIMENSION, DROP_RATE_ATTENTION, SCALE_LOSS, + CONSTRAIN_SIMILARITIES, ) from rasa.utils.tensorflow import layers from rasa.utils.tensorflow.transformer import TransformerEncoder @@ -790,6 +791,7 @@ def _prepare_dot_product_loss( scale_loss, # set to 1 to get deterministic behaviour parallel_iterations=1 if self.random_seed is not None else 1000, + constrain_similarities=self.config[CONSTRAIN_SIMILARITIES], ) def _prepare_sparse_dense_dropout_layers( From bcecf412bd7125410c40d66006eab635a2a5a021 Mon Sep 17 00:00:00 2001 From: Daksh Date: Thu, 7 Jan 2021 11:45:53 +0100 Subject: [PATCH 04/44] add sigmoid based prediction during inference --- rasa/core/policies/ted_policy.py | 9 +++++++- rasa/nlu/classifiers/diet_classifier.py | 10 ++++++++- rasa/nlu/selectors/response_selector.py | 3 +++ rasa/utils/tensorflow/constants.py | 1 + rasa/utils/tensorflow/layers.py | 26 +++++++++++++++++----- rasa/utils/tensorflow/models.py | 2 ++ rasa/utils/train_utils.py | 29 ++++++++++++++++++++----- 7 files changed, 68 insertions(+), 12 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 29c3e1ac9dfc..37f9a424663d 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -100,6 +100,7 @@ FEATURIZERS, ENTITY_RECOGNITION, CONSTRAIN_SIMILARITIES, + RELATIVE_CONFIDENCE, ) from rasa.shared.core.events import EntitiesAdded, Event from rasa.shared.nlu.training_data.message import Message @@ -276,6 +277,8 @@ class TEDPolicy(Policy): # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, + # Return sigmoid based probabilities during prediction. + RELATIVE_CONFIDENCE: True, } @staticmethod @@ -613,10 +616,14 @@ def predict_action_probabilities( confidence, is_e2e_prediction = self._pick_confidence(confidences, similarities) if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0: - confidence = rasa.utils.train_utils.normalize( + confidence = rasa.utils.train_utils.sort_and_rank( confidence, self.config[RANKING_LENGTH] ) + if self.config[RELATIVE_CONFIDENCE]: + # Normalize the values if returned probabilities are from softmax. + confidence = rasa.utils.train_utils.normalize(confidence) + optional_events = self._create_optional_event_for_entities( output, is_e2e_prediction, interpreter, tracker ) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 8bf50afefc81..9fc18febf5cb 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -96,6 +96,7 @@ DENSE_DIMENSION, MASK, CONSTRAIN_SIMILARITIES, + RELATIVE_CONFIDENCE, ) logger = logging.getLogger(__name__) @@ -256,6 +257,8 @@ def required_components(cls) -> List[Type[Component]]: # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, + # Return sigmoid based probabilities during prediction. + RELATIVE_CONFIDENCE: True, } # init helpers @@ -858,10 +861,15 @@ def _predict_label( self.component_config[LOSS_TYPE] == SOFTMAX and self.component_config[RANKING_LENGTH] > 0 ): - message_sim = train_utils.normalize( + message_sim = train_utils.sort_and_rank( message_sim, self.component_config[RANKING_LENGTH] ) + if self.component_config[RELATIVE_CONFIDENCE]: + # Normalize the values if returned probabilities are from + # softmax(hence relative to each other). + message_sim = train_utils.normalize(message_sim) + message_sim[::-1].sort() message_sim = message_sim.tolist() diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index a1b325ba8600..ba00a29e9bf0 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -74,6 +74,7 @@ CHECKPOINT_MODEL, DENSE_DIMENSION, CONSTRAIN_SIMILARITIES, + RELATIVE_CONFIDENCE, ) from rasa.nlu.constants import ( RESPONSE_SELECTOR_PROPERTY_NAME, @@ -234,6 +235,8 @@ def required_components(cls) -> List[Type[Component]]: # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, + # Return sigmoid based probabilities during prediction. + RELATIVE_CONFIDENCE: False, } def __init__( diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 6595fbfa090e..6f462ec1381e 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -53,6 +53,7 @@ DENSE_INPUT_DROPOUT = "use_dense_input_dropout" RANKING_LENGTH = "ranking_length" +RELATIVE_CONFIDENCE = "relative_confidence" BILOU_FLAG = "BILOU_flag" diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index fb1afc334165..93a294c72bab 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -570,6 +570,8 @@ class DotProductLoss(tf.keras.layers.Layer): similarity terms and adds to the loss function to ensure that similarity values are approximately bounded. Used inside _loss_softmax() only. + relative_confidence: Boolean, if 'True' confidence is calculated by applying + softmax over similarities, else sigmoid is applied on individual similarities. """ def __init__( @@ -584,7 +586,8 @@ def __init__( name: Optional[Text] = None, parallel_iterations: int = 1000, same_sampling: bool = False, - constrain_similarities=True, + constrain_similarities: bool = True, + relative_confidence: bool = True, ) -> None: super().__init__(name=name) self.num_neg = num_neg @@ -597,6 +600,7 @@ def __init__( self.parallel_iterations = parallel_iterations self.same_sampling = same_sampling self.constrain_similarities = constrain_similarities + self.relative_confidence = relative_confidence @staticmethod def _make_flat(x: tf.Tensor) -> tf.Tensor: @@ -737,14 +741,26 @@ def sim(a: tf.Tensor, b: tf.Tensor, mask: Optional[tf.Tensor] = None) -> tf.Tens return sim - @staticmethod - def confidence_from_sim(sim: tf.Tensor, similarity_type: Text) -> tf.Tensor: + def confidence_from_sim(self, sim: tf.Tensor, similarity_type: Text) -> tf.Tensor: + """Computes model confidence/probability from computed similarities. + + Args: + sim: Computed similarities + similarity_type: Similarity function to use - COSINE, INNER, AUTO. + + Returns: + Confidences corresponding to each similarity value. + """ if similarity_type == COSINE: # clip negative values to zero return tf.nn.relu(sim) else: - # normalize result to [0, 1] with softmax - return tf.nn.softmax(sim) + if self.relative_confidence: + # normalize result to [0, 1] with softmax + return tf.nn.softmax(sim) + else: + # Convert each individual similarity to probability + return tf.nn.sigmoid(sim) def _train_sim( self, diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index bb81c0ac1772..1abe040732ee 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -56,6 +56,7 @@ DROP_RATE_ATTENTION, SCALE_LOSS, CONSTRAIN_SIMILARITIES, + RELATIVE_CONFIDENCE, ) from rasa.utils.tensorflow import layers from rasa.utils.tensorflow.transformer import TransformerEncoder @@ -792,6 +793,7 @@ def _prepare_dot_product_loss( # set to 1 to get deterministic behaviour parallel_iterations=1 if self.random_seed is not None else 1000, constrain_similarities=self.config[CONSTRAIN_SIMILARITIES], + relative_confidence=self.config[RELATIVE_CONFIDENCE], ) def _prepare_sparse_dense_dropout_layers( diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index fb9ea1faf6ed..9eec452176cb 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -32,19 +32,38 @@ from rasa.nlu.tokenizers.tokenizer import Token -def normalize(values: np.ndarray, ranking_length: Optional[int] = 0) -> np.ndarray: +def normalize(values: np.ndarray) -> np.ndarray: """Normalizes an array of positive numbers over the top `ranking_length` values. + Args: + values: Values to normalize + + Returns: + Normalized values. + """ + new_values = values.copy() + + if np.sum(new_values) > 0: + new_values = new_values / np.sum(new_values) + + return new_values + + +def sort_and_rank(values: np.ndarray, ranking_length: Optional[int] = 0) -> np.ndarray: + """Sorts the values in descending order and keep only top `ranking_length` values. + Other values will be set to 0. + Args: + values: Values to sort and rank + ranking_length: number of values to maintain above 0. + + Returns: + Modified values. """ new_values = values.copy() # prevent mutation of the input if 0 < ranking_length < len(new_values): ranked = sorted(new_values, reverse=True) new_values[new_values < ranked[ranking_length - 1]] = 0 - - if np.sum(new_values) > 0: - new_values = new_values / np.sum(new_values) - return new_values From 6746bbdee0d817542a7cfb4b33683210690c6497 Mon Sep 17 00:00:00 2001 From: Daksh Date: Thu, 7 Jan 2021 12:56:11 +0100 Subject: [PATCH 05/44] docs, changelog, docstrings --- changelog/7616.improvement.md | 6 ++++++ docs/docs/components.mdx | 24 ++++++++++++++++++++++++ docs/docs/policies.mdx | 12 ++++++++++++ rasa/core/policies/ted_policy.py | 2 +- rasa/nlu/classifiers/diet_classifier.py | 2 +- rasa/nlu/selectors/response_selector.py | 6 +++--- rasa/utils/tensorflow/layers.py | 2 +- 7 files changed, 48 insertions(+), 6 deletions(-) create mode 100644 changelog/7616.improvement.md diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md new file mode 100644 index 000000000000..bd6007ae1bc3 --- /dev/null +++ b/changelog/7616.improvement.md @@ -0,0 +1,6 @@ +Constrain similarity values to an approximate range in `DotProductLoss` by applying sigmoid over them during training. + +This affects the default behaviour of the loss function(`loss_type=softmax`) inside machine learning(ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. +If you notice a degradation in performance, set `constrain_similarities=False` in the respective ML component. + +Also, adds an option `relative_confidence` to each ML component. Contrary to the default behaviour, if `relative_confidence` is set to `False`, the confidences for each label will be between 0 and 1 but they will not sum up to 1. \ No newline at end of file diff --git a/docs/docs/components.mdx b/docs/docs/components.mdx index fa23ed2d93cd..f57866432aa8 100644 --- a/docs/docs/components.mdx +++ b/docs/docs/components.mdx @@ -1616,6 +1616,18 @@ However, additional parameters exist that can be adapted. | | | ... | | | | ``` | +---------------------------------+------------------+--------------------------------------------------------------+ +| constrain_similarities | True | If `True`, applies sigmoid on all similarity terms and adds | +| | | it to the loss function to ensure that similarity values are | +| | | approximately bounded. Used only when `loss_type=softmax` | ++---------------------------------+------------------+--------------------------------------------------------------+ +| relative_confidence | True | If `True`, applies softmax on all similarity values for pairs| +| | | of input and labels. This means that output confidences | +| | | will always add up to 1. | +| | | If `False`, applies sigmoid on all similarity values for | +| | | pairs of input and labels. This means that confidence for | +| | | each label will be between 0 and 1 but all of them won't add | +| | | up to 1. | ++---------------------------------+------------------+--------------------------------------------------------------+ ``` :::note @@ -2809,6 +2821,18 @@ However, additional parameters exist that can be adapted. | | | Requires `evaluate_on_number_of_examples > 0` and | | | | `evaluate_every_number_of_epochs > 0` | +---------------------------------+-------------------+--------------------------------------------------------------+ +| constrain_similarities | True | If `True`, applies sigmoid on all similarity terms and adds | +| | | it to the loss function to ensure that similarity values are | +| | | approximately bounded. Used only when `loss_type=softmax` | ++---------------------------------+------------------+--------------------------------------------------------------+ +| relative_confidence | True | If `True`, applies softmax on all similarity values for pairs| +| | | of input and labels. This means that output confidences | +| | | will always add up to 1. | +| | | If `False`, applies sigmoid on all similarity values for | +| | | pairs of input and labels. This means that confidence for | +| | | each label will be between 0 and 1 but all of them won't add | +| | | up to 1. | ++---------------------------------+------------------+--------------------------------------------------------------+ ``` :::note diff --git a/docs/docs/policies.mdx b/docs/docs/policies.mdx index 6dcd139907cf..788e03c43c55 100644 --- a/docs/docs/policies.mdx +++ b/docs/docs/policies.mdx @@ -320,6 +320,18 @@ However, additional parameters exist that can be adapted. | entity_recognition | True | If 'True' entity recognition is trained and entities are | | | | extracted. | +---------------------------------------+------------------------+--------------------------------------------------------------+ +| constrain_similarities | True | If `True`, applies sigmoid on all similarity terms and adds | +| | | it to the loss function to ensure that similarity values are | +| | | approximately bounded. Used only when `loss_type=softmax` | ++---------------------------------------+------------------------+--------------------------------------------------------------+ +| relative_confidence | True | If `True`, applies softmax on all similarity values for pairs| +| | | of input and labels. This means that output confidences | +| | | will always add up to 1. | +| | | If `False`, applies sigmoid on all similarity values for | +| | | pairs of input and labels. This means that confidence for | +| | | each label will be between 0 and 1 but all of them won't add | +| | | up to 1. | ++---------------------------------------+------------------------+--------------------------------------------------------------+ ``` :::note diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 37f9a424663d..4e401befff6a 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -277,7 +277,7 @@ class TEDPolicy(Policy): # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, - # Return sigmoid based probabilities during prediction. + # Return softmax based probabilities during prediction. RELATIVE_CONFIDENCE: True, } diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 9fc18febf5cb..d13ba9e47bec 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -257,7 +257,7 @@ def required_components(cls) -> List[Type[Component]]: # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, - # Return sigmoid based probabilities during prediction. + # Return softmax based probabilities during prediction. RELATIVE_CONFIDENCE: True, } diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index ba00a29e9bf0..779a50afccf4 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -235,8 +235,8 @@ def required_components(cls) -> List[Type[Component]]: # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, - # Return sigmoid based probabilities during prediction. - RELATIVE_CONFIDENCE: False, + # Return softmax based probabilities during prediction. + RELATIVE_CONFIDENCE: True, } def __init__( @@ -249,7 +249,7 @@ def __init__( responses: Optional[Dict[Text, List[Dict[Text, Any]]]] = None, finetune_mode: bool = False, ) -> None: - + """Declare instance variables with default values.""" component_config = component_config or {} # the following properties cannot be adapted for the ResponseSelector diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 93a294c72bab..04f98eb6fc85 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -589,6 +589,7 @@ def __init__( constrain_similarities: bool = True, relative_confidence: bool = True, ) -> None: + """Declare instance variables with default values.""" super().__init__(name=name) self.num_neg = num_neg self.loss_type = loss_type @@ -874,7 +875,6 @@ def _loss_softmax( mask: Optional[tf.Tensor], ) -> tf.Tensor: """Define softmax loss.""" - # Similarity terms between input and label should be optimized relative # to each other and hence use them as logits for softmax term softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) From b674d731d0d85661cdd53fb0ca5781528cb77b09 Mon Sep 17 00:00:00 2001 From: Daksh Date: Thu, 7 Jan 2021 14:14:13 +0100 Subject: [PATCH 06/44] add tests --- rasa/core/policies/ted_policy.py | 14 +++++ rasa/nlu/classifiers/diet_classifier.py | 16 +++++- tests/nlu/classifiers/test_diet_classifier.py | 52 +++++++++++++++++++ tests/utils/test_train_utils.py | 21 ++++++++ 4 files changed, 102 insertions(+), 1 deletion(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 4e401befff6a..c8a5af723d9a 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -324,11 +324,25 @@ def __init__( self._label_data: Optional[RasaModelData] = None self.data_example: Optional[Dict[Text, List[np.ndarray]]] = None + def _check_similarity_confidence_setting(self) -> None: + if ( + not self.config[CONSTRAIN_SIMILARITIES] + and not self.config[RELATIVE_CONFIDENCE] + ): + raise ValueError( + f"If {CONSTRAIN_SIMILARITIES} is set to False, " + f"{RELATIVE_CONFIDENCE} cannot be set to False as" + f"similarities need to be constrained during training " + f"time in order to compute appropriate confidence values " + f"for each label at inference time." + ) + def _load_params(self, **kwargs: Dict[Text, Any]) -> None: new_config = rasa.utils.train_utils.check_core_deprecated_options(kwargs) self.config = rasa.utils.train_utils.override_defaults( self.defaults, new_config ) + self._check_similarity_confidence_setting() self.config = rasa.utils.train_utils.update_similarity_type(self.config) self.config = rasa.utils.train_utils.update_evaluation_parameters(self.config) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index d13ba9e47bec..01c6ed3f6390 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -254,7 +254,7 @@ def required_components(cls) -> List[Type[Component]]: # Split entities by comma, this makes sense e.g. for a list of ingredients # in a recipie, but it doesn't make sense for the parts of an address SPLIT_ENTITIES_BY_COMMA: True, - # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to + # If 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, # Return softmax based probabilities during prediction. @@ -290,6 +290,19 @@ def _check_share_hidden_layers_sizes(self) -> None: f"{HIDDEN_LAYERS_SIZES} must coincide." ) + def _check_similarity_confidence_setting(self) -> None: + if ( + not self.component_config[CONSTRAIN_SIMILARITIES] + and not self.component_config[RELATIVE_CONFIDENCE] + ): + raise ValueError( + f"If {CONSTRAIN_SIMILARITIES} is set to False, " + f"{RELATIVE_CONFIDENCE} cannot be set to False as" + f"similarities need to be constrained during training " + f"time in order to compute appropriate confidence values " + f"for each label at inference time." + ) + def _check_config_parameters(self) -> None: self.component_config = train_utils.check_deprecated_options( self.component_config @@ -297,6 +310,7 @@ def _check_config_parameters(self) -> None: self._check_masked_lm() self._check_share_hidden_layers_sizes() + self._check_similarity_confidence_setting() self.component_config = train_utils.update_similarity_type( self.component_config diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py index 90f20a61039e..be5f338b0f22 100644 --- a/tests/nlu/classifiers/test_diet_classifier.py +++ b/tests/nlu/classifiers/test_diet_classifier.py @@ -4,6 +4,7 @@ import pytest from unittest.mock import Mock from typing import List, Text, Dict, Any +from _pytest.monkeypatch import MonkeyPatch from rasa.shared.nlu.training_data.features import Features from rasa.nlu import train @@ -29,6 +30,7 @@ BILOU_FLAG, ENTITY_RECOGNITION, INTENT_CLASSIFICATION, + RELATIVE_CONFIDENCE, ) from rasa.nlu.components import ComponentBuilder from rasa.nlu.classifiers.diet_classifier import DIETClassifier @@ -311,6 +313,56 @@ async def test_softmax_normalization( assert parse_data.get("intent") == intent_ranking[0] +@pytest.mark.parametrize( + "classifier_params, output_length", + [({RANDOM_SEED: 42, EPOCHS: 1, RELATIVE_CONFIDENCE: False}, LABEL_RANKING_LENGTH)], +) +async def test_softmax_with_absolute_confidence( + component_builder, + tmp_path, + classifier_params, + output_length, + monkeypatch: MonkeyPatch, +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "DIETClassifier" + ) + assert pipeline[2]["name"] == "DIETClassifier" + pipeline[2].update(classifier_params) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, + path=str(tmp_path), + data="data/test/many_intents.md", + component_builder=component_builder, + ) + loaded = Interpreter.load(persisted_path, component_builder) + + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + + parse_data = loaded.parse("hello") + intent_ranking = parse_data.get("intent_ranking") + + # check that the output was correctly truncated + assert len(intent_ranking) == output_length + + intent_confidences = [intent.get("confidence") for intent in intent_ranking] + + # check each confidence is in range + confidence_in_range = [ + 0.0 <= confidence <= 1.0 for confidence in intent_confidences + ] + assert all(confidence_in_range) + + # normalize shouldn't have been called + mock.normalize.assert_not_called() + + # check whether the normalization of rankings is reflected in intent prediction + assert parse_data.get("intent") == intent_ranking[0] + + @pytest.mark.parametrize( "classifier_params, output_length", [({LOSS_TYPE: "margin", RANDOM_SEED: 42, EPOCHS: 1}, LABEL_RANKING_LENGTH)], diff --git a/tests/utils/test_train_utils.py b/tests/utils/test_train_utils.py index 8400a2be68e9..3bdd6d743ef6 100644 --- a/tests/utils/test_train_utils.py +++ b/tests/utils/test_train_utils.py @@ -1,4 +1,6 @@ import numpy as np +import pytest +from typing import List import rasa.utils.train_utils as train_utils from rasa.nlu.constants import NUMBER_OF_SUB_TOKENS @@ -26,3 +28,22 @@ def test_align_token_features(): assert np.all(actual_features[0][3] == np.mean(token_features[0][3:5], axis=0)) # embedding is split into 4 sub-tokens assert np.all(actual_features[0][4] == np.mean(token_features[0][5:10], axis=0)) + + +def test_normalize(): + input_values = [0.7, 0.1, 0.1] + normalized_values = train_utils.normalize(np.array(input_values)) + assert np.allclose( + normalized_values, np.array([0.77777778, 0.11111111, 0.11111111]), atol=1e-5 + ) + + +@pytest.mark.parametrize( + "input_values, ranking_length, output_values", + [([0.5, 0.8, 0.1], 2, [0.5, 0.8, 0.0]), ([0.5, 0.3, 0.9], 5, [0.5, 0.3, 0.9]),], +) +def test_sort_and_rank( + input_values: List[float], ranking_length: int, output_values: List[float] +): + ranked_values = train_utils.sort_and_rank(np.array(input_values), ranking_length) + assert np.array_equal(ranked_values, output_values) From 4d4d52e7f0c5f9cb7599b9258faf3572f32774de Mon Sep 17 00:00:00 2001 From: Daksh Date: Tue, 12 Jan 2021 01:12:58 +0100 Subject: [PATCH 07/44] review comments --- changelog/7616.improvement.md | 8 ++-- rasa/core/policies/ted_policy.py | 16 +++++-- rasa/nlu/classifiers/diet_classifier.py | 33 ++++++------- rasa/nlu/selectors/response_selector.py | 6 +-- rasa/utils/tensorflow/constants.py | 1 + rasa/utils/tensorflow/layers.py | 62 +++++++++++++++++++------ rasa/utils/train_utils.py | 58 ++++++++++++++++++++++- 7 files changed, 140 insertions(+), 44 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index bd6007ae1bc3..b4849350acb4 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -1,6 +1,8 @@ -Constrain similarity values to an approximate range in `DotProductLoss` by applying sigmoid over them during training. +Added cross-entropy loss over sigmoid of all similarity values to constrain them in an approximate range in `DotProductLoss`. -This affects the default behaviour of the loss function(`loss_type=softmax`) inside machine learning(ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. +This affects the default behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. If you notice a degradation in performance, set `constrain_similarities=False` in the respective ML component. -Also, adds an option `relative_confidence` to each ML component. Contrary to the default behaviour, if `relative_confidence` is set to `False`, the confidences for each label will be between 0 and 1 but they will not sum up to 1. \ No newline at end of file +Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. + +Also, added an option `relative_confidence` to each ML component. Contrary to the default behaviour, if `relative_confidence` is set to `False`, the confidences for each label will be between 0 and 1 but they will not sum up to 1. \ No newline at end of file diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index c8a5af723d9a..37049f84365f 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -79,7 +79,8 @@ KEY_RELATIVE_ATTENTION, VALUE_RELATIVE_ATTENTION, MAX_RELATIVE_POSITION, - SOFTMAX, + CROSS_ENTROPY, + INNER, AUTO, BALANCED, TENSORBOARD_LOG_DIR, @@ -210,7 +211,7 @@ class TEDPolicy(Policy): # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'. SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'softmax' or 'margin'. - LOSS_TYPE: SOFTMAX, + LOSS_TYPE: CROSS_ENTROPY, # Number of top actions to normalize scores for loss type 'softmax'. # Set to 0 to turn off normalization. RANKING_LENGTH: 10, @@ -342,7 +343,9 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self.config = rasa.utils.train_utils.override_defaults( self.defaults, new_config ) - self._check_similarity_confidence_setting() + rasa.utils.train_utils._check_similarity_confidence_setting(self.config) + rasa.utils.train_utils._check_similarity_loss_setting(self.config) + self.config = rasa.utils.train_utils.update_loss_type(self.config) self.config = rasa.utils.train_utils.update_similarity_type(self.config) self.config = rasa.utils.train_utils.update_evaluation_parameters(self.config) @@ -629,12 +632,15 @@ def predict_action_probabilities( # take correct prediction from batch confidence, is_e2e_prediction = self._pick_confidence(confidences, similarities) - if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0: + if self.config[LOSS_TYPE] == CROSS_ENTROPY and self.config[RANKING_LENGTH] > 0: confidence = rasa.utils.train_utils.sort_and_rank( confidence, self.config[RANKING_LENGTH] ) - if self.config[RELATIVE_CONFIDENCE]: + if ( + self.config[SIMILARITY_TYPE] == INNER + and self.config[RELATIVE_CONFIDENCE] + ): # Normalize the values if returned probabilities are from softmax. confidence = rasa.utils.train_utils.normalize(confidence) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 01c6ed3f6390..12ba2cb4e692 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -83,9 +83,10 @@ KEY_RELATIVE_ATTENTION, VALUE_RELATIVE_ATTENTION, MAX_RELATIVE_POSITION, - SOFTMAX, AUTO, + INNER, BALANCED, + CROSS_ENTROPY, TENSORBOARD_LOG_LEVEL, CONCAT_DIMENSION, FEATURIZERS, @@ -184,8 +185,8 @@ def required_components(cls) -> List[Type[Component]]: NUM_NEG: 20, # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'. SIMILARITY_TYPE: AUTO, - # The type of the loss function, either 'softmax' or 'margin'. - LOSS_TYPE: SOFTMAX, + # The type of the loss function, either 'cross_entropy' or 'margin'. + LOSS_TYPE: CROSS_ENTROPY, # Number of top actions to normalize scores for loss type 'softmax'. # Set to 0 to turn off normalization. RANKING_LENGTH: 10, @@ -290,19 +291,6 @@ def _check_share_hidden_layers_sizes(self) -> None: f"{HIDDEN_LAYERS_SIZES} must coincide." ) - def _check_similarity_confidence_setting(self) -> None: - if ( - not self.component_config[CONSTRAIN_SIMILARITIES] - and not self.component_config[RELATIVE_CONFIDENCE] - ): - raise ValueError( - f"If {CONSTRAIN_SIMILARITIES} is set to False, " - f"{RELATIVE_CONFIDENCE} cannot be set to False as" - f"similarities need to be constrained during training " - f"time in order to compute appropriate confidence values " - f"for each label at inference time." - ) - def _check_config_parameters(self) -> None: self.component_config = train_utils.check_deprecated_options( self.component_config @@ -310,7 +298,11 @@ def _check_config_parameters(self) -> None: self._check_masked_lm() self._check_share_hidden_layers_sizes() - self._check_similarity_confidence_setting() + + train_utils._check_similarity_confidence_setting(self.component_config) + train_utils._check_similarity_loss_setting(self.component_config) + + self.component_config = train_utils.update_loss_type(self.component_config) self.component_config = train_utils.update_similarity_type( self.component_config @@ -872,14 +864,17 @@ def _predict_label( label_ids = message_sim.argsort()[::-1] if ( - self.component_config[LOSS_TYPE] == SOFTMAX + self.component_config[LOSS_TYPE] == CROSS_ENTROPY and self.component_config[RANKING_LENGTH] > 0 ): message_sim = train_utils.sort_and_rank( message_sim, self.component_config[RANKING_LENGTH] ) - if self.component_config[RELATIVE_CONFIDENCE]: + if ( + self.component_config[SIMILARITY_TYPE] == INNER + and self.component_config[RELATIVE_CONFIDENCE] + ): # Normalize the values if returned probabilities are from # softmax(hence relative to each other). message_sim = train_utils.normalize(message_sim) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index 779a50afccf4..fa065eb7cdb7 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -64,7 +64,7 @@ MAX_RELATIVE_POSITION, RETRIEVAL_INTENT, USE_TEXT_AS_LABEL, - SOFTMAX, + CROSS_ENTROPY, AUTO, BALANCED, TENSORBOARD_LOG_DIR, @@ -171,8 +171,8 @@ def required_components(cls) -> List[Type[Component]]: NUM_NEG: 20, # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'. SIMILARITY_TYPE: AUTO, - # The type of the loss function, either 'softmax' or 'margin'. - LOSS_TYPE: SOFTMAX, + # The type of the loss function, either 'cross_entropy' or 'margin'. + LOSS_TYPE: CROSS_ENTROPY, # Number of top actions to normalize scores for loss type 'softmax'. # Set to 0 to turn off normalization. RANKING_LENGTH: 10, diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 6f462ec1381e..f8371d5c6975 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -66,6 +66,7 @@ AUTO = "auto" INNER = "inner" COSINE = "cosine" +CROSS_ENTROPY = "cross_entropy" BALANCED = "balanced" diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 04f98eb6fc85..3a034115a561 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -5,7 +5,13 @@ import rasa.utils.tensorflow.crf from tensorflow.python.keras.utils import tf_utils from tensorflow.python.keras import backend as K -from rasa.utils.tensorflow.constants import SOFTMAX, MARGIN, COSINE, INNER +from rasa.utils.tensorflow.constants import ( + SOFTMAX, + MARGIN, + COSINE, + INNER, + CROSS_ENTROPY, +) logger = logging.getLogger(__name__) @@ -589,7 +595,37 @@ def __init__( constrain_similarities: bool = True, relative_confidence: bool = True, ) -> None: - """Declare instance variables with default values.""" + """Declare instance variables with default values. + + Args: + num_neg: Positive integer, the number of incorrect labels; + the algorithm will minimize their similarity to the input. + loss_type: The type of the loss function, either 'softmax' or 'margin'. + mu_pos: Float, indicates how similar the algorithm should + try to make embedding vectors for correct labels; + should be 0.0 < ... < 1.0 for 'cosine' similarity type. + mu_neg: Float, maximum negative similarity for incorrect labels, + should be -1.0 < ... < 1.0 for 'cosine' similarity type. + use_max_sim_neg: Boolean, if 'True' the algorithm only minimizes + maximum similarity over incorrect intent labels, + used only if 'loss_type' is set to 'margin'. + neg_lambda: Float, the scale of how important is to minimize + the maximum similarity between embeddings of different labels, + used only if 'loss_type' is set to 'margin'. + scale_loss: Boolean, if 'True' scale loss inverse proportionally to + the confidence of the correct prediction. + name: Optional name of the layer. + parallel_iterations: Positive integer, the number of iterations allowed + to run in parallel. + same_sampling: Boolean, if 'True' sample same negative labels + for the whole batch. + constrain_similarities: Boolean, if 'True' applies sigmoid on all + similarity terms and adds to the loss function to + ensure that similarity values are approximately bounded. + Used inside _loss_softmax() only. + relative_confidence: Boolean, if 'True' confidence is calculated by applying + softmax over similarities, else sigmoid is applied on individual similarities. + """ super().__init__(name=name) self.num_neg = num_neg self.loss_type = loss_type @@ -755,13 +791,12 @@ def confidence_from_sim(self, sim: tf.Tensor, similarity_type: Text) -> tf.Tenso if similarity_type == COSINE: # clip negative values to zero return tf.nn.relu(sim) + if self.relative_confidence: + # normalize result to [0, 1] with softmax + return tf.nn.softmax(sim) else: - if self.relative_confidence: - # normalize result to [0, 1] with softmax - return tf.nn.softmax(sim) - else: - # Convert each individual similarity to probability - return tf.nn.sigmoid(sim) + # Convert each individual similarity to probability + return tf.nn.sigmoid(sim) def _train_sim( self, @@ -865,7 +900,7 @@ def _loss_margin( return loss - def _loss_softmax( + def _loss_cross_entropy( self, sim_pos: tf.Tensor, sim_neg_il: tf.Tensor, @@ -874,7 +909,7 @@ def _loss_softmax( sim_neg_li: tf.Tensor, mask: Optional[tf.Tensor], ) -> tf.Tensor: - """Define softmax loss.""" + """Define cross entropy loss.""" # Similarity terms between input and label should be optimized relative # to each other and hence use them as logits for softmax term softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) @@ -917,6 +952,7 @@ def _loss_softmax( labels=sigmoid_labels, logits=sigmoid_logits ) + # average over logits axis loss += tf.reduce_mean(sigmoid_loss, axis=-1) if self.scale_loss: @@ -942,12 +978,12 @@ def _chosen_loss(self) -> Callable: if self.loss_type == MARGIN: return self._loss_margin - elif self.loss_type == SOFTMAX: - return self._loss_softmax + elif self.loss_type == CROSS_ENTROPY: + return self._loss_cross_entropy else: raise ValueError( f"Wrong loss type '{self.loss_type}', " - f"should be '{MARGIN}' or '{SOFTMAX}'" + f"should be '{MARGIN}' or '{CROSS_ENTROPY}'" ) # noinspection PyMethodOverriding diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 9eec452176cb..94daf5631962 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -19,9 +19,12 @@ AUTO, INNER, COSINE, + CROSS_ENTROPY, TRANSFORMER_SIZE, NUM_TRANSFORMER_LAYERS, DENSE_DIMENSION, + CONSTRAIN_SIMILARITIES, + RELATIVE_CONFIDENCE, ) from rasa.shared.nlu.constants import ACTION_NAME, INTENT, ENTITIES from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS @@ -77,7 +80,7 @@ def update_similarity_type(config: Dict[Text, Any]) -> Dict[Text, Any]: Returns: updated model configuration """ if config.get(SIMILARITY_TYPE) == AUTO: - if config[LOSS_TYPE] == SOFTMAX: + if config[LOSS_TYPE] == CROSS_ENTROPY: config[SIMILARITY_TYPE] = INNER elif config[LOSS_TYPE] == MARGIN: config[SIMILARITY_TYPE] = COSINE @@ -85,6 +88,27 @@ def update_similarity_type(config: Dict[Text, Any]) -> Dict[Text, Any]: return config +def update_loss_type(config: Dict[Text, Any]) -> Dict[Text, Any]: + """ + If LOSS_TYPE is set to 'softmax', update it to 'cross_entropy' since former is deprecated. + Args: + config: model configuration + + Returns: updated model configuration + """ + # TODO: Completely deprecate this with 3.0 + if config.get(LOSS_TYPE) == SOFTMAX: + rasa.shared.utils.io.raise_deprecation_warning( + f"`{LOSS_TYPE}={SOFTMAX}` is deprecated. " + f"Please update your configuration file to use" + f"`{LOSS_TYPE}={CROSS_ENTROPY}` instead.", + warn_until_version=NEXT_MAJOR_VERSION_FOR_DEPRECATIONS, + ) + config[LOSS_TYPE] = CROSS_ENTROPY + + return config + + def align_token_features( list_of_tokens: List[List["Token"]], in_token_features: np.ndarray, @@ -354,3 +378,35 @@ def override_defaults( config[key] = custom[key] return config + + +def _check_similarity_confidence_setting(component_config) -> None: + if ( + not component_config[CONSTRAIN_SIMILARITIES] + and not component_config[RELATIVE_CONFIDENCE] + ): + raise ValueError( + f"If {CONSTRAIN_SIMILARITIES} is set to False, " + f"{RELATIVE_CONFIDENCE} cannot be set to False as" + f"similarities need to be constrained during training " + f"time in order to compute appropriate confidence values " + f"for each label at inference time." + ) + + +def _check_similarity_loss_setting(component_config) -> None: + if ( + component_config[SIMILARITY_TYPE] == COSINE + and component_config[LOSS_TYPE] == CROSS_ENTROPY + or component_config[SIMILARITY_TYPE] == INNER + and component_config[LOSS_TYPE] == MARGIN + ): + raise rasa.shared.utils.io.raise_warning( + f"`{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]}`" + f" and `{LOSS_TYPE}={component_config[LOSS_TYPE]}` " + f"is not a recommended setting as it may not lead to best results." + f"Ideally use `{SIMILARITY_TYPE}={INNER}`" + f" and `{LOSS_TYPE}={CROSS_ENTROPY}` or" + f"`{SIMILARITY_TYPE}={COSINE}` and `{LOSS_TYPE}={MARGIN}`.", + category=UserWarning, + ) From db17411e22ba05d701288b4554ca105d25be6658 Mon Sep 17 00:00:00 2001 From: Daksh Date: Tue, 12 Jan 2021 12:50:20 +0100 Subject: [PATCH 08/44] review comments --- changelog/7616.improvement.md | 4 +-- docs/docs/migration-guide.mdx | 16 +++++++++ rasa/core/policies/ted_policy.py | 15 ++------- rasa/nlu/classifiers/diet_classifier.py | 2 ++ rasa/utils/tensorflow/layers.py | 44 ++++--------------------- rasa/utils/train_utils.py | 16 +++++++-- 6 files changed, 42 insertions(+), 55 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index b4849350acb4..c1cdb8c8c966 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -1,8 +1,8 @@ -Added cross-entropy loss over sigmoid of all similarity values to constrain them in an approximate range in `DotProductLoss`. +Added sigmoid cross-entropy loss on all similarity values to constrain them to an approximate range in `DotProductLoss`. This affects the default behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. If you notice a degradation in performance, set `constrain_similarities=False` in the respective ML component. Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -Also, added an option `relative_confidence` to each ML component. Contrary to the default behaviour, if `relative_confidence` is set to `False`, the confidences for each label will be between 0 and 1 but they will not sum up to 1. \ No newline at end of file +Also, added an option `relative_confidence` to each ML component. Contrary to the default behaviour, if `relative_confidence` is set to `False`, the confidences for each label will be between 0 and 1 but they will not sum up to 1. It is also recommended to set `relative_confidence=False` as it will be made default in Rasa Open Source 3.0. You may need to tune fallback confidence thresholds after making this change. \ No newline at end of file diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 203b3d7bcc3b..2c34fdc904a5 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -10,6 +10,22 @@ description: | This page contains information about changes between major versions and how you can migrate from one version to another. +## Rasa 2.2 to Rasa 2.3 + +### Machine Learning Components + +Few changes have been made to the default loss function inside machine learning (ML) +components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. These include: +- Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. +- The default loss function (`loss_type=cross_entropy`) adds a sigmoid cross-entropy loss of all similarity values to constrain +them to an approximate range. If you notice a degradation in performance, set `constrain_similarities=False` +in the respective ML component. +- Added an option `relative_confidence` to each ML component. Contrary to the default behaviour, +if `relative_confidence` is set to `False`, the confidences for each label will be between 0 and 1 +but they will not sum up to 1. It is also recommended to set `relative_confidence=False` as it will be made +default in Rasa Open Source 3.0. You may need to tune fallback confidence thresholds after making this change. + + ## Rasa 2.1 to Rasa 2.2 ### General diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 37049f84365f..20191a300cfa 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -325,24 +325,12 @@ def __init__( self._label_data: Optional[RasaModelData] = None self.data_example: Optional[Dict[Text, List[np.ndarray]]] = None - def _check_similarity_confidence_setting(self) -> None: - if ( - not self.config[CONSTRAIN_SIMILARITIES] - and not self.config[RELATIVE_CONFIDENCE] - ): - raise ValueError( - f"If {CONSTRAIN_SIMILARITIES} is set to False, " - f"{RELATIVE_CONFIDENCE} cannot be set to False as" - f"similarities need to be constrained during training " - f"time in order to compute appropriate confidence values " - f"for each label at inference time." - ) - def _load_params(self, **kwargs: Dict[Text, Any]) -> None: new_config = rasa.utils.train_utils.check_core_deprecated_options(kwargs) self.config = rasa.utils.train_utils.override_defaults( self.defaults, new_config ) + rasa.utils.train_utils._check_confidence_setting(self.config) rasa.utils.train_utils._check_similarity_confidence_setting(self.config) rasa.utils.train_utils._check_similarity_loss_setting(self.config) self.config = rasa.utils.train_utils.update_loss_type(self.config) @@ -813,6 +801,7 @@ def load( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data ) meta = rasa.utils.train_utils.update_similarity_type(meta) + meta = rasa.utils.train_utils.update_loss_type(meta) meta[EPOCHS] = epoch_override diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 12ba2cb4e692..b4931627620f 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -299,6 +299,7 @@ def _check_config_parameters(self) -> None: self._check_masked_lm() self._check_share_hidden_layers_sizes() + train_utils._check_confidence_setting(self.component_config) train_utils._check_similarity_confidence_setting(self.component_config) train_utils._check_similarity_loss_setting(self.component_config) @@ -1018,6 +1019,7 @@ def load( ) = cls._load_from_files(meta, model_dir) meta = train_utils.update_similarity_type(meta) + meta = train_utils.update_loss_type(meta) model = cls._load_model( entity_tag_specs, diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 3a034115a561..18ad1ec38378 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -548,37 +548,7 @@ def f1_score( class DotProductLoss(tf.keras.layers.Layer): - """Dot-product loss layer. - - Arguments: - num_neg: Positive integer, the number of incorrect labels; - the algorithm will minimize their similarity to the input. - loss_type: The type of the loss function, either 'softmax' or 'margin'. - mu_pos: Float, indicates how similar the algorithm should - try to make embedding vectors for correct labels; - should be 0.0 < ... < 1.0 for 'cosine' similarity type. - mu_neg: Float, maximum negative similarity for incorrect labels, - should be -1.0 < ... < 1.0 for 'cosine' similarity type. - use_max_sim_neg: Boolean, if 'True' the algorithm only minimizes - maximum similarity over incorrect intent labels, - used only if 'loss_type' is set to 'margin'. - neg_lambda: Float, the scale of how important is to minimize - the maximum similarity between embeddings of different labels, - used only if 'loss_type' is set to 'margin'. - scale_loss: Boolean, if 'True' scale loss inverse proportionally to - the confidence of the correct prediction. - name: Optional name of the layer. - parallel_iterations: Positive integer, the number of iterations allowed - to run in parallel. - same_sampling: Boolean, if 'True' sample same negative labels - for the whole batch. - constrain_similarities: Boolean, if 'True' applies sigmoid on all - similarity terms and adds to the loss function to - ensure that similarity values are approximately bounded. - Used inside _loss_softmax() only. - relative_confidence: Boolean, if 'True' confidence is calculated by applying - softmax over similarities, else sigmoid is applied on individual similarities. - """ + """Dot-product loss layer""" def __init__( self, @@ -600,7 +570,7 @@ def __init__( Args: num_neg: Positive integer, the number of incorrect labels; the algorithm will minimize their similarity to the input. - loss_type: The type of the loss function, either 'softmax' or 'margin'. + loss_type: The type of the loss function, either 'cross_entropy' or 'margin'. mu_pos: Float, indicates how similar the algorithm should try to make embedding vectors for correct labels; should be 0.0 < ... < 1.0 for 'cosine' similarity type. @@ -622,7 +592,7 @@ def __init__( constrain_similarities: Boolean, if 'True' applies sigmoid on all similarity terms and adds to the loss function to ensure that similarity values are approximately bounded. - Used inside _loss_softmax() only. + Used inside _loss_cross_entropy() only. relative_confidence: Boolean, if 'True' confidence is calculated by applying softmax over similarities, else sigmoid is applied on individual similarities. """ @@ -791,12 +761,12 @@ def confidence_from_sim(self, sim: tf.Tensor, similarity_type: Text) -> tf.Tenso if similarity_type == COSINE: # clip negative values to zero return tf.nn.relu(sim) - if self.relative_confidence: + elif self.relative_confidence: # normalize result to [0, 1] with softmax return tf.nn.softmax(sim) - else: - # Convert each individual similarity to probability - return tf.nn.sigmoid(sim) + + # In other cases convert each individual similarity to probability + return tf.nn.sigmoid(sim) def _train_sim( self, diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 94daf5631962..f894c17dfe8f 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -380,6 +380,16 @@ def override_defaults( return config +def _check_confidence_setting(component_config) -> None: + if component_config[RELATIVE_CONFIDENCE]: + rasa.shared.utils.io.raise_warning( + f"{RELATIVE_CONFIDENCE} is set to `True`. It is recommended " + f"to set it to `False`. It will be set to `False` by default " + f"Rasa Open Source 3.0 onwards.", + category=UserWarning, + ) + + def _check_similarity_confidence_setting(component_config) -> None: if ( not component_config[CONSTRAIN_SIMILARITIES] @@ -387,9 +397,9 @@ def _check_similarity_confidence_setting(component_config) -> None: ): raise ValueError( f"If {CONSTRAIN_SIMILARITIES} is set to False, " - f"{RELATIVE_CONFIDENCE} cannot be set to False as" + f"{RELATIVE_CONFIDENCE} cannot be set to False as " f"similarities need to be constrained during training " - f"time in order to compute appropriate confidence values " + f"time as well in order to correctly compute confidence values " f"for each label at inference time." ) @@ -401,7 +411,7 @@ def _check_similarity_loss_setting(component_config) -> None: or component_config[SIMILARITY_TYPE] == INNER and component_config[LOSS_TYPE] == MARGIN ): - raise rasa.shared.utils.io.raise_warning( + rasa.shared.utils.io.raise_warning( f"`{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]}`" f" and `{LOSS_TYPE}={component_config[LOSS_TYPE]}` " f"is not a recommended setting as it may not lead to best results." From 1d5527a33fccac83794d8c73dedb5f7026ddfd4b Mon Sep 17 00:00:00 2001 From: Daksh Date: Wed, 13 Jan 2021 00:40:03 +0100 Subject: [PATCH 09/44] remove sim_neg_ii to run experiments --- rasa/nlu/classifiers/diet_classifier.py | 1 + rasa/nlu/test.py | 14 ++++++++++++++ rasa/utils/tensorflow/layers.py | 13 ++++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index b4931627620f..b82675866168 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -1020,6 +1020,7 @@ def load( meta = train_utils.update_similarity_type(meta) meta = train_utils.update_loss_type(meta) + # meta[RELATIVE_CONFIDENCE] = True model = cls._load_model( entity_tag_specs, diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index 837aec238855..e7167bcdfb45 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -332,6 +332,20 @@ def plot_attribute_confidences( if getattr(r, target_key) != getattr(r, prediction_key) ] + # import matplotlib.pyplot as plt + # + # plt.gcf().clear() + # + # fig = plt.hist(pos_hist) + # plt.title("Positive_sims") + # plt.savefig(f"{hist_filename.split('.')[0]}_pos.png") + # + # plt.gcf().clear() + # + # fig = plt.hist(neg_hist) + # plt.title("Negative_sims") + # plt.savefig(f"{hist_filename.split('.')[0]}_neg.png") + plot_utils.plot_histogram([pos_hist, neg_hist], title, hist_filename) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 18ad1ec38378..9d138d29e327 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -710,6 +710,12 @@ def _get_negs( return neg_embeds, bad_negs + def _compute_vector_length(self, embedding): + norm = tf.norm(embedding, axis=-1) + mean_norm = tf.reduce_mean(norm) + std_norm = tf.math.reduce_std(norm) + # tf.print("Norm", mean_norm, std_norm) + def _sample_negatives( self, inputs_embed: tf.Tensor, @@ -720,6 +726,10 @@ def _sample_negatives( ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: """Sample negative examples.""" + # self._compute_vector_length(inputs_embed) + # tf.print('---------------') + # self._compute_vector_length(labels_embed) + # tf.print('===============s') pos_inputs_embed = tf.expand_dims(inputs_embed, axis=-2) pos_labels_embed = tf.expand_dims(labels_embed, axis=-2) @@ -758,6 +768,7 @@ def confidence_from_sim(self, sim: tf.Tensor, similarity_type: Text) -> tf.Tenso Returns: Confidences corresponding to each similarity value. """ + # return sim if similarity_type == COSINE: # clip negative values to zero return tf.nn.relu(sim) @@ -905,7 +916,7 @@ def _loss_cross_entropy( # Constrain similarity values in a range by applying sigmoid # on them individually so that they saturate at extreme values. sigmoid_logits = tf.concat( - [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 + [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_li], axis=-1 ) sigmoid_labels = tf.concat( From 763c2cd23d3649fbffd9eea2bb08b4d05a5c7638 Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 25 Jan 2021 13:04:06 +0100 Subject: [PATCH 10/44] revert back experimental change --- rasa/nlu/classifiers/diet_classifier.py | 1 - rasa/nlu/test.py | 14 -------------- rasa/utils/tensorflow/layers.py | 13 +------------ 3 files changed, 1 insertion(+), 27 deletions(-) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index b82675866168..b4931627620f 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -1020,7 +1020,6 @@ def load( meta = train_utils.update_similarity_type(meta) meta = train_utils.update_loss_type(meta) - # meta[RELATIVE_CONFIDENCE] = True model = cls._load_model( entity_tag_specs, diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index e7167bcdfb45..837aec238855 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -332,20 +332,6 @@ def plot_attribute_confidences( if getattr(r, target_key) != getattr(r, prediction_key) ] - # import matplotlib.pyplot as plt - # - # plt.gcf().clear() - # - # fig = plt.hist(pos_hist) - # plt.title("Positive_sims") - # plt.savefig(f"{hist_filename.split('.')[0]}_pos.png") - # - # plt.gcf().clear() - # - # fig = plt.hist(neg_hist) - # plt.title("Negative_sims") - # plt.savefig(f"{hist_filename.split('.')[0]}_neg.png") - plot_utils.plot_histogram([pos_hist, neg_hist], title, hist_filename) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 9d138d29e327..18ad1ec38378 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -710,12 +710,6 @@ def _get_negs( return neg_embeds, bad_negs - def _compute_vector_length(self, embedding): - norm = tf.norm(embedding, axis=-1) - mean_norm = tf.reduce_mean(norm) - std_norm = tf.math.reduce_std(norm) - # tf.print("Norm", mean_norm, std_norm) - def _sample_negatives( self, inputs_embed: tf.Tensor, @@ -726,10 +720,6 @@ def _sample_negatives( ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: """Sample negative examples.""" - # self._compute_vector_length(inputs_embed) - # tf.print('---------------') - # self._compute_vector_length(labels_embed) - # tf.print('===============s') pos_inputs_embed = tf.expand_dims(inputs_embed, axis=-2) pos_labels_embed = tf.expand_dims(labels_embed, axis=-2) @@ -768,7 +758,6 @@ def confidence_from_sim(self, sim: tf.Tensor, similarity_type: Text) -> tf.Tenso Returns: Confidences corresponding to each similarity value. """ - # return sim if similarity_type == COSINE: # clip negative values to zero return tf.nn.relu(sim) @@ -916,7 +905,7 @@ def _loss_cross_entropy( # Constrain similarity values in a range by applying sigmoid # on them individually so that they saturate at extreme values. sigmoid_logits = tf.concat( - [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_li], axis=-1 + [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 ) sigmoid_labels = tf.concat( From 2b9d532358f4ef3f3b6a81d43ab0b3dfea314134 Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 31 Jan 2021 18:27:46 +0100 Subject: [PATCH 11/44] update similarity computation during prediction, to be tested. --- rasa/core/policies/ted_policy.py | 12 +-- rasa/nlu/classifiers/diet_classifier.py | 21 +++-- rasa/nlu/selectors/response_selector.py | 12 ++- rasa/utils/plotting.py | 7 +- rasa/utils/tensorflow/constants.py | 2 +- rasa/utils/tensorflow/layers.py | 90 ++++++++++--------- rasa/utils/tensorflow/models.py | 4 +- rasa/utils/train_utils.py | 26 ++---- tests/nlu/classifiers/test_diet_classifier.py | 4 +- tests/utils/test_train_utils.py | 2 +- 10 files changed, 85 insertions(+), 95 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 8e67c4575219..4b2ee7ed8e32 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -103,7 +103,8 @@ FEATURIZERS, ENTITY_RECOGNITION, CONSTRAIN_SIMILARITIES, - RELATIVE_CONFIDENCE, + MODEL_CONFIDENCE, + SOFTMAX, ) from rasa.shared.core.events import EntitiesAdded, Event from rasa.shared.nlu.training_data.message import Message @@ -281,7 +282,7 @@ class TEDPolicy(Policy): # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, # Return softmax based probabilities during prediction. - RELATIVE_CONFIDENCE: True, + MODEL_CONFIDENCE: SOFTMAX, # Split entities by comma, this makes sense e.g. for a list of # ingredients in a recipe, but it doesn't make sense for the parts of # an address @@ -342,7 +343,6 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self.defaults, new_config ) rasa.utils.train_utils._check_confidence_setting(self.config) - rasa.utils.train_utils._check_similarity_confidence_setting(self.config) rasa.utils.train_utils._check_similarity_loss_setting(self.config) self.config = rasa.utils.train_utils.update_loss_type(self.config) self.config = rasa.utils.train_utils.update_similarity_type(self.config) @@ -632,14 +632,16 @@ def predict_action_probabilities( confidence, is_e2e_prediction = self._pick_confidence(confidences, similarities) if self.config[LOSS_TYPE] == CROSS_ENTROPY and self.config[RANKING_LENGTH] > 0: - confidence = rasa.utils.train_utils.sort_and_rank( + confidence = rasa.utils.train_utils.filter_top_k( confidence, self.config[RANKING_LENGTH] ) if ( self.config[SIMILARITY_TYPE] == INNER - and self.config[RELATIVE_CONFIDENCE] + and self.config[MODEL_CONFIDENCE] == SOFTMAX ): + # TODO: This should be removed in 3.0 when softmax as + # model confidence is completely deprecated. # Normalize the values if returned probabilities are from softmax. confidence = rasa.utils.train_utils.normalize(confidence) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index b4931627620f..3b909a469e1f 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -97,7 +97,8 @@ DENSE_DIMENSION, MASK, CONSTRAIN_SIMILARITIES, - RELATIVE_CONFIDENCE, + MODEL_CONFIDENCE, + SOFTMAX, ) logger = logging.getLogger(__name__) @@ -258,8 +259,8 @@ def required_components(cls) -> List[Type[Component]]: # If 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, - # Return softmax based probabilities during prediction. - RELATIVE_CONFIDENCE: True, + # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. + MODEL_CONFIDENCE: SOFTMAX, } # init helpers @@ -300,7 +301,6 @@ def _check_config_parameters(self) -> None: self._check_share_hidden_layers_sizes() train_utils._check_confidence_setting(self.component_config) - train_utils._check_similarity_confidence_setting(self.component_config) train_utils._check_similarity_loss_setting(self.component_config) self.component_config = train_utils.update_loss_type(self.component_config) @@ -868,15 +868,17 @@ def _predict_label( self.component_config[LOSS_TYPE] == CROSS_ENTROPY and self.component_config[RANKING_LENGTH] > 0 ): - message_sim = train_utils.sort_and_rank( + message_sim = train_utils.filter_top_k( message_sim, self.component_config[RANKING_LENGTH] ) if ( self.component_config[SIMILARITY_TYPE] == INNER - and self.component_config[RELATIVE_CONFIDENCE] + and self.component_config[MODEL_CONFIDENCE] == SOFTMAX ): - # Normalize the values if returned probabilities are from + # TODO: This should be removed in 3.0 when softmax as + # model confidence is completely deprecated. + # Normalize the values if returned confidences are from # softmax(hence relative to each other). message_sim = train_utils.normalize(message_sim) @@ -1664,12 +1666,9 @@ def _batch_predict_intents( sentence_vector = self._last_token(text_transformed, sequence_lengths) sentence_vector_embed = self._tf_layers[f"embed.{TEXT}"](sentence_vector) - sim_all = self._tf_layers[f"loss.{LABEL}"].sim( + scores = self._tf_layers[f"loss.{LABEL}"]._confidence_from_embeddings( sentence_vector_embed[:, tf.newaxis, :], self.all_labels_embed[tf.newaxis, :, :], ) - scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim( - sim_all, self.config[SIMILARITY_TYPE] - ) return {"i_scores": scores} diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index fa065eb7cdb7..841e66465628 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -74,7 +74,8 @@ CHECKPOINT_MODEL, DENSE_DIMENSION, CONSTRAIN_SIMILARITIES, - RELATIVE_CONFIDENCE, + MODEL_CONFIDENCE, + SOFTMAX, ) from rasa.nlu.constants import ( RESPONSE_SELECTOR_PROPERTY_NAME, @@ -235,8 +236,8 @@ def required_components(cls) -> List[Type[Component]]: # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, - # Return softmax based probabilities during prediction. - RELATIVE_CONFIDENCE: True, + # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. + MODEL_CONFIDENCE: SOFTMAX, } def __init__( @@ -749,13 +750,10 @@ def batch_predict( sentence_vector = self._last_token(text_transformed, sequence_lengths_text) sentence_vector_embed = self._tf_layers[f"embed.{TEXT}"](sentence_vector) - sim_all = self._tf_layers[f"loss.{LABEL}"].sim( + scores = self._tf_layers[f"loss.{LABEL}"]._confidence_from_embeddings( sentence_vector_embed[:, tf.newaxis, :], self.all_labels_embed[tf.newaxis, :, :], ) - scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim( - sim_all, self.config[SIMILARITY_TYPE] - ) out["i_scores"] = scores return out diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index 7eba3d6d0f7f..2de1769b3fd0 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -133,7 +133,8 @@ def plot_histogram( # Wine-ish colour for the confidences of hits. # Blue-ish colour for the confidences of misses. colors = ["#009292", "#920000"] - bins = [0.05 * i for i in range(1, 21)] + bins = [0.025 * i for i in range(1, 42)] + # bins = [1 * i for i in range(1, 31)] binned_data_sets = [np.histogram(d, bins=bins)[0] for d in hist_data] @@ -172,8 +173,8 @@ def plot_histogram( ) axes[1].set(title="Wrong") - axes[0].set(yticks=bins, xlim=(0, max_xlims[0]), ylim=(min_ylim, 1.0)) - axes[1].set(yticks=bins, xlim=(0, max_xlims[1]), ylim=(min_ylim, 1.0)) + # axes[0].set(yticks=bins, xlim=(0, max_xlims[0]), ylim=(min_ylim, 1.0)) + # axes[1].set(yticks=bins, xlim=(0, max_xlims[1]), ylim=(min_ylim, 1.0)) axes[0].invert_xaxis() axes[0].yaxis.tick_right() diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index f8371d5c6975..d43c85066b9e 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -53,7 +53,7 @@ DENSE_INPUT_DROPOUT = "use_dense_input_dropout" RANKING_LENGTH = "ranking_length" -RELATIVE_CONFIDENCE = "relative_confidence" +MODEL_CONFIDENCE = "model_confidence" BILOU_FLAG = "BILOU_flag" diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 18ad1ec38378..051353110fb3 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -275,13 +275,6 @@ def call( class Embed(tf.keras.layers.Layer): """Dense embedding layer. - Arguments: - embed_dim: Positive integer, dimensionality of the output space. - reg_lambda: Float; regularization factor. - layer_name_suffix: Text added to the name of the layers. - similarity_type: Optional type of similarity measure to use, - either 'cosine' or 'inner'. - Input shape: N-D tensor with shape: `(batch_size, ..., input_dim)`. The most common situation would be @@ -294,20 +287,16 @@ class Embed(tf.keras.layers.Layer): """ def __init__( - self, - embed_dim: int, - reg_lambda: float, - layer_name_suffix: Text, - similarity_type: Optional[Text] = None, + self, embed_dim: int, reg_lambda: float, layer_name_suffix: Text, ) -> None: - super().__init__(name=f"embed_{layer_name_suffix}") + """Initialize layer. - self.similarity_type = similarity_type - if self.similarity_type and self.similarity_type not in {COSINE, INNER}: - raise ValueError( - f"Wrong similarity type '{self.similarity_type}', " - f"should be '{COSINE}' or '{INNER}'." - ) + Args: + embed_dim: Dimensionality of the output space. + reg_lambda: Regularization factor. + layer_name_suffix: Text added to the name of the layers. + """ + super().__init__(name=f"embed_{layer_name_suffix}") regularizer = tf.keras.regularizers.l2(reg_lambda) self._dense = tf.keras.layers.Dense( @@ -319,10 +308,8 @@ def __init__( # noinspection PyMethodOverriding def call(self, x: tf.Tensor) -> tf.Tensor: + """Apply dense layer.""" x = self._dense(x) - if self.similarity_type == COSINE: - x = tf.nn.l2_normalize(x, axis=-1) - return x @@ -562,8 +549,9 @@ def __init__( name: Optional[Text] = None, parallel_iterations: int = 1000, same_sampling: bool = False, + similarity_type: Optional[Text] = None, constrain_similarities: bool = True, - relative_confidence: bool = True, + model_confidence: bool = True, ) -> None: """Declare instance variables with default values. @@ -589,12 +577,13 @@ def __init__( to run in parallel. same_sampling: Boolean, if 'True' sample same negative labels for the whole batch. + similarity_type: Similarity measure to use, either 'cosine' or 'inner'. constrain_similarities: Boolean, if 'True' applies sigmoid on all similarity terms and adds to the loss function to ensure that similarity values are approximately bounded. Used inside _loss_cross_entropy() only. - relative_confidence: Boolean, if 'True' confidence is calculated by applying - softmax over similarities, else sigmoid is applied on individual similarities. + model_confidence: Model confidence to be returned during inference. + Possible values - softmax, cosine, inner. """ super().__init__(name=name) self.num_neg = num_neg @@ -607,7 +596,13 @@ def __init__( self.parallel_iterations = parallel_iterations self.same_sampling = same_sampling self.constrain_similarities = constrain_similarities - self.relative_confidence = relative_confidence + self.model_confidence = model_confidence + self.similarity_type = similarity_type + if self.similarity_type and self.similarity_type not in {COSINE, INNER}: + raise ValueError( + f"Wrong similarity type '{self.similarity_type}', " + f"should be '{COSINE}' or '{INNER}'." + ) @staticmethod def _make_flat(x: tf.Tensor) -> tf.Tensor: @@ -738,35 +733,44 @@ def _sample_negatives( labels_bad_negs, ) - @staticmethod - def sim(a: tf.Tensor, b: tf.Tensor, mask: Optional[tf.Tensor] = None) -> tf.Tensor: + def sim( + self, a: tf.Tensor, b: tf.Tensor, mask: Optional[tf.Tensor] = None + ) -> tf.Tensor: """Calculate similarity between given tensors.""" - + if self.similarity_type == COSINE: + a = tf.nn.l2_normalize(a, axis=-1) + b = tf.nn.l2_normalize(b, axis=-1) sim = tf.reduce_sum(a * b, axis=-1) if mask is not None: sim *= tf.expand_dims(mask, 2) return sim - def confidence_from_sim(self, sim: tf.Tensor, similarity_type: Text) -> tf.Tensor: - """Computes model confidence/probability from computed similarities. + def _confidence_from_embeddings( + self, input_embeddings: tf.Tensor, label_embeddings: tf.Tensor + ) -> tf.Tensor: + """Computes model's prediction confidences from input and label embeddings. + + First compute the similarity from embeddings and then apply an activation + function as needed. Args: - sim: Computed similarities - similarity_type: Similarity function to use - COSINE, INNER, AUTO. + input_embeddings: Embeddings of input + label_embeddings: Embeddings of labels Returns: - Confidences corresponding to each similarity value. + model confidence during prediction. """ - if similarity_type == COSINE: - # clip negative values to zero - return tf.nn.relu(sim) - elif self.relative_confidence: - # normalize result to [0, 1] with softmax - return tf.nn.softmax(sim) - - # In other cases convert each individual similarity to probability - return tf.nn.sigmoid(sim) + # If model's prediction confidence is configured to be cosine similarity, + # then normalize embeddings to unit vectors. + if self.model_confidence == COSINE or self.similarity_type == COSINE: + input_embeddings = tf.nn.l2_normalize(input_embeddings, axis=-1) + label_embeddings = tf.nn.l2_normalize(label_embeddings, axis=-1) + + similarities = self.sim(input_embeddings, label_embeddings) + if self.model_confidence == SOFTMAX: + return tf.nn.softmax(similarities) + return similarities def _train_sim( self, diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 1abe040732ee..fea4cca91b31 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -56,7 +56,7 @@ DROP_RATE_ATTENTION, SCALE_LOSS, CONSTRAIN_SIMILARITIES, - RELATIVE_CONFIDENCE, + MODEL_CONFIDENCE, ) from rasa.utils.tensorflow import layers from rasa.utils.tensorflow.transformer import TransformerEncoder @@ -793,7 +793,7 @@ def _prepare_dot_product_loss( # set to 1 to get deterministic behaviour parallel_iterations=1 if self.random_seed is not None else 1000, constrain_similarities=self.config[CONSTRAIN_SIMILARITIES], - relative_confidence=self.config[RELATIVE_CONFIDENCE], + model_confidence=self.config[MODEL_CONFIDENCE], ) def _prepare_sparse_dense_dropout_layers( diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index e75407b734f7..3dd97c02ddf4 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -24,7 +24,7 @@ NUM_TRANSFORMER_LAYERS, DENSE_DIMENSION, CONSTRAIN_SIMILARITIES, - RELATIVE_CONFIDENCE, + MODEL_CONFIDENCE, ) from rasa.shared.nlu.constants import ( ACTION_NAME, @@ -57,8 +57,8 @@ def normalize(values: np.ndarray) -> np.ndarray: return new_values -def sort_and_rank(values: np.ndarray, ranking_length: Optional[int] = 0) -> np.ndarray: - """Sorts the values in descending order and keep only top `ranking_length` values. +def filter_top_k(values: np.ndarray, ranking_length: Optional[int] = 0) -> np.ndarray: + """Sorts the values in descending order and keeps only top `ranking_length` values. Other values will be set to 0. Args: @@ -386,29 +386,15 @@ def override_defaults( def _check_confidence_setting(component_config) -> None: - if component_config[RELATIVE_CONFIDENCE]: + if component_config[MODEL_CONFIDENCE] == SOFTMAX: rasa.shared.utils.io.raise_warning( - f"{RELATIVE_CONFIDENCE} is set to `True`. It is recommended " - f"to set it to `False`. It will be set to `False` by default " + f"{MODEL_CONFIDENCE} is set to `softmax`. It is recommended " + f"to set it to `cosine`. It will be set to `cosine` by default " f"Rasa Open Source 3.0 onwards.", category=UserWarning, ) -def _check_similarity_confidence_setting(component_config) -> None: - if ( - not component_config[CONSTRAIN_SIMILARITIES] - and not component_config[RELATIVE_CONFIDENCE] - ): - raise ValueError( - f"If {CONSTRAIN_SIMILARITIES} is set to False, " - f"{RELATIVE_CONFIDENCE} cannot be set to False as " - f"similarities need to be constrained during training " - f"time as well in order to correctly compute confidence values " - f"for each label at inference time." - ) - - def _check_similarity_loss_setting(component_config) -> None: if ( component_config[SIMILARITY_TYPE] == COSINE diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py index be5f338b0f22..f6348f91c24e 100644 --- a/tests/nlu/classifiers/test_diet_classifier.py +++ b/tests/nlu/classifiers/test_diet_classifier.py @@ -30,7 +30,7 @@ BILOU_FLAG, ENTITY_RECOGNITION, INTENT_CLASSIFICATION, - RELATIVE_CONFIDENCE, + MODEL_CONFIDENCE, ) from rasa.nlu.components import ComponentBuilder from rasa.nlu.classifiers.diet_classifier import DIETClassifier @@ -315,7 +315,7 @@ async def test_softmax_normalization( @pytest.mark.parametrize( "classifier_params, output_length", - [({RANDOM_SEED: 42, EPOCHS: 1, RELATIVE_CONFIDENCE: False}, LABEL_RANKING_LENGTH)], + [({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: False}, LABEL_RANKING_LENGTH)], ) async def test_softmax_with_absolute_confidence( component_builder, diff --git a/tests/utils/test_train_utils.py b/tests/utils/test_train_utils.py index 974966c4ebc0..548922e0d0ba 100644 --- a/tests/utils/test_train_utils.py +++ b/tests/utils/test_train_utils.py @@ -51,7 +51,7 @@ def test_normalize(): def test_sort_and_rank( input_values: List[float], ranking_length: int, output_values: List[float] ): - ranked_values = train_utils.sort_and_rank(np.array(input_values), ranking_length) + ranked_values = train_utils.filter_top_k(np.array(input_values), ranking_length) assert np.array_equal(ranked_values, output_values) From 28e8c2687f5b251b6fbac6f8599bdd453b988046 Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 31 Jan 2021 22:01:44 +0100 Subject: [PATCH 12/44] update docs, test various options --- changelog/7616.improvement.md | 9 +++- docs/docs/components.mdx | 48 +++++++++++-------- docs/docs/migration-guide.mdx | 10 ++-- docs/docs/policies.mdx | 18 ++++--- rasa/core/policies/ted_policy.py | 2 +- rasa/utils/plotting.py | 36 +++++++++----- rasa/utils/tensorflow/layers.py | 8 ++-- rasa/utils/tensorflow/models.py | 2 +- rasa/utils/train_utils.py | 2 +- tests/nlu/classifiers/test_diet_classifier.py | 34 +++++++++---- 10 files changed, 110 insertions(+), 59 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index c1cdb8c8c966..6462f338d0db 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -2,7 +2,14 @@ Added sigmoid cross-entropy loss on all similarity values to constrain them to a This affects the default behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. If you notice a degradation in performance, set `constrain_similarities=False` in the respective ML component. +You may need to tune fallback confidence thresholds to adapt to this change. Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -Also, added an option `relative_confidence` to each ML component. Contrary to the default behaviour, if `relative_confidence` is set to `False`, the confidences for each label will be between 0 and 1 but they will not sum up to 1. It is also recommended to set `relative_confidence=False` as it will be made default in Rasa Open Source 3.0. You may need to tune fallback confidence thresholds after making this change. \ No newline at end of file +Also, added an option `model_confidence` to each ML component. It affects how model's confidence for each label is computed during inference. It can take three values - +1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. +2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label is in the range `[-1,1]`. +3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label in in an unbounded range. + +The default value is `softmax`, but we recommend using `cosine` as that will be the default value, Rasa Open Source 3.0 onwards. +The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. diff --git a/docs/docs/components.mdx b/docs/docs/components.mdx index 3a447293f1ed..b1cf35d4d99c 100644 --- a/docs/docs/components.mdx +++ b/docs/docs/components.mdx @@ -1620,13 +1620,19 @@ However, additional parameters exist that can be adapted. | | | it to the loss function to ensure that similarity values are | | | | approximately bounded. Used only when `loss_type=softmax` | +---------------------------------+------------------+--------------------------------------------------------------+ -| relative_confidence | True | If `True`, applies softmax on all similarity values for pairs| -| | | of input and labels. This means that output confidences | -| | | will always add up to 1. | -| | | If `False`, applies sigmoid on all similarity values for | -| | | pairs of input and labels. This means that confidence for | -| | | each label will be between 0 and 1 but all of them won't add | -| | | up to 1. | +| model_confidence | "softmax" | Affects how model's confidence for each intent | +| | | is computed. It can take three values - | +| | | 1. `softmax` - Similarities between input and intent | +| | | embeddings are post-processed with a softmax function, | +| | | as a result of which confidence for all intents sum up to 1. | +| | | 2. `cosine` - Cosine similarity between input and intent | +| | | embeddings. Confidence for each intent is in the | +| | | range `[-1,1]`. | +| | | 3. `inner` - Dot product similarity between input and intent | +| | | embeddings. Confidence for each intent is in an unbounded | +| | | range. | +| | | This parameter does not affect the confidence for entity | +| | | prediction. | +---------------------------------+------------------+--------------------------------------------------------------+ ``` @@ -2821,18 +2827,22 @@ However, additional parameters exist that can be adapted. | | | Requires `evaluate_on_number_of_examples > 0` and | | | | `evaluate_every_number_of_epochs > 0` | +---------------------------------+-------------------+--------------------------------------------------------------+ -| constrain_similarities | True | If `True`, applies sigmoid on all similarity terms and adds | -| | | it to the loss function to ensure that similarity values are | -| | | approximately bounded. Used only when `loss_type=softmax` | -+---------------------------------+------------------+--------------------------------------------------------------+ -| relative_confidence | True | If `True`, applies softmax on all similarity values for pairs| -| | | of input and labels. This means that output confidences | -| | | will always add up to 1. | -| | | If `False`, applies sigmoid on all similarity values for | -| | | pairs of input and labels. This means that confidence for | -| | | each label will be between 0 and 1 but all of them won't add | -| | | up to 1. | -+---------------------------------+------------------+--------------------------------------------------------------+ +| constrain_similarities | True | If `True`, applies sigmoid on all similarity terms and adds | +| | | it to the loss function to ensure that similarity values are | +| | | approximately bounded. Used only when `loss_type=softmax` | ++---------------------------------+------------------+---------------------------------------------------------------+ +| model_confidence | "softmax" | Affects how model's confidence for each response label | +| | | is computed. It can take three values - | +| | | 1. `softmax` - Similarities between input and response label | +| | | embeddings are post-processed with a softmax function, | +| | | as a result of which confidence for all labels sum up to 1. | +| | | 2. `cosine` - Cosine similarity between input and response | +| | | label embeddings. Confidence for each label is in the | +| | | range `[-1,1]`. | +| | | 3. `inner` - Dot product similarity between input and response| +| | | label embeddings. Confidence for each label is in an | +| | | unbounded range. | ++---------------------------------+------------------+---------------------------------------------------------------+ ``` :::note diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 2c34fdc904a5..00b4a7940469 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -20,11 +20,13 @@ components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. These include - The default loss function (`loss_type=cross_entropy`) adds a sigmoid cross-entropy loss of all similarity values to constrain them to an approximate range. If you notice a degradation in performance, set `constrain_similarities=False` in the respective ML component. -- Added an option `relative_confidence` to each ML component. Contrary to the default behaviour, -if `relative_confidence` is set to `False`, the confidences for each label will be between 0 and 1 -but they will not sum up to 1. It is also recommended to set `relative_confidence=False` as it will be made -default in Rasa Open Source 3.0. You may need to tune fallback confidence thresholds after making this change. +Also, a new option `model_confidence` has been added to each ML component. It affects how model's confidence for each label is computed during inference. It can take three values - +1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. +2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label is in the range `[-1,1]`. +3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label in in an unbounded range. +The default value is `softmax`, but we recommend using `cosine` as that will be the default value, Rasa Open Source 3.0 onwards. +The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. ## Rasa 2.1 to Rasa 2.2 diff --git a/docs/docs/policies.mdx b/docs/docs/policies.mdx index 26a855e9a02f..5f6939a75b7f 100644 --- a/docs/docs/policies.mdx +++ b/docs/docs/policies.mdx @@ -345,13 +345,17 @@ However, additional parameters exist that can be adapted. | | | it to the loss function to ensure that similarity values are | | | | approximately bounded. Used only when `loss_type=softmax` | +---------------------------------------+------------------------+--------------------------------------------------------------+ -| relative_confidence | True | If `True`, applies softmax on all similarity values for pairs| -| | | of input and labels. This means that output confidences | -| | | will always add up to 1. | -| | | If `False`, applies sigmoid on all similarity values for | -| | | pairs of input and labels. This means that confidence for | -| | | each label will be between 0 and 1 but all of them won't add | -| | | up to 1. | +| model_confidence | "softmax" | Affects how model's confidence for each action | +| | | is computed. It can take three values - | +| | | 1. `softmax` - Similarities between input and action | +| | | embeddings are post-processed with a softmax function, | +| | | as a result of which confidence for all labels sum up to 1. | +| | | 2. `cosine` - Cosine similarity between input and action | +| | | embeddings. Confidence for each label is in the | +| | | range `[-1,1]`. | +| | | 3. `inner` - Dot product similarity between input and action | +| | | embeddings. Confidence for each label is in an | +| | | unbounded range. | +---------------------------------------+------------------------+--------------------------------------------------------------+ | split_entities_by_comma | True | Splits a list of extracted entities by comma to treat each | | | | one of them as a single entity. Can either be `True`/`False` | diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 4046a086d0d9..a51328824509 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -283,7 +283,7 @@ class TEDPolicy(Policy): # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: True, - # Return softmax based probabilities during prediction. + # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. MODEL_CONFIDENCE: SOFTMAX, # Split entities by comma, this makes sense e.g. for a list of # ingredients in a recipe, but it doesn't make sense for the parts of diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index 2de1769b3fd0..c670059fbe40 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -133,22 +133,32 @@ def plot_histogram( # Wine-ish colour for the confidences of hits. # Blue-ish colour for the confidences of misses. colors = ["#009292", "#920000"] - bins = [0.025 * i for i in range(1, 42)] - # bins = [1 * i for i in range(1, 31)] + n_bins = 25 + max_value = max(max(hist_data[0]), max(hist_data[1])) + min_value = min(min(hist_data[0]), min(hist_data[1])) + bin_width = (max_value - min_value) / n_bins + bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] binned_data_sets = [np.histogram(d, bins=bins)[0] for d in hist_data] max_xlims = [max(binned_data_set) for binned_data_set in binned_data_sets] max_xlims = [xlim + np.ceil(0.25 * xlim) for xlim in max_xlims] # padding - min_ylim = bins[ - min( - [ - (binned_data_set != 0).argmax(axis=0) - for binned_data_set in binned_data_sets - ] - ) - ] + min_ylim = ( + bins[ + min( + [ + (binned_data_set != 0).argmax(axis=0) + for binned_data_set in binned_data_sets + ] + ) + ] + - bin_width + ) + + max_ylim = max(bins) + bin_width + + yticks = [float("{:.2f}".format(x)) for x in bins] centers = 0.5 * (0.05 + (bins + np.roll(bins, 0))[:-1]) heights = 0.75 * np.diff(bins) @@ -173,14 +183,14 @@ def plot_histogram( ) axes[1].set(title="Wrong") - # axes[0].set(yticks=bins, xlim=(0, max_xlims[0]), ylim=(min_ylim, 1.0)) - # axes[1].set(yticks=bins, xlim=(0, max_xlims[1]), ylim=(min_ylim, 1.0)) + axes[0].set(yticks=yticks, xlim=(0, max_xlims[0]), ylim=(min_ylim, max_ylim)) + axes[1].set(yticks=yticks, xlim=(0, max_xlims[1]), ylim=(min_ylim, max_ylim)) axes[0].invert_xaxis() axes[0].yaxis.tick_right() fig.subplots_adjust( - wspace=0.14 + wspace=0.17 ) # get the graphs exactly far enough apart for yaxis labels fig.suptitle(title, fontsize="x-large", fontweight="bold") diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 95e02a7cfd8b..555de4eba27a 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -551,7 +551,7 @@ def __init__( same_sampling: bool = False, similarity_type: Optional[Text] = None, constrain_similarities: bool = True, - model_confidence: bool = True, + model_confidence: Text = SOFTMAX, ) -> None: """Declare instance variables with default values. @@ -716,18 +716,18 @@ def _confidence_from_embeddings( """Computes model's prediction confidences from input and label embeddings. First compute the similarity from embeddings and then apply an activation - function as needed. + function if needed. Args: input_embeddings: Embeddings of input label_embeddings: Embeddings of labels Returns: - model confidence during prediction. + model's prediction confidence """ # If model's prediction confidence is configured to be cosine similarity, # then normalize embeddings to unit vectors. - if self.model_confidence == COSINE or self.similarity_type == COSINE: + if self.model_confidence == COSINE: input_embeddings = tf.nn.l2_normalize(input_embeddings, axis=-1) label_embeddings = tf.nn.l2_normalize(label_embeddings, axis=-1) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index aa459ca26c9e..cfa5ad025333 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -732,7 +732,6 @@ def _prepare_embed_layers(self, name: Text, prefix: Text = "embed") -> None: self.config[EMBEDDING_DIMENSION], self.config[REGULARIZATION_CONSTANT], name, - self.config[SIMILARITY_TYPE], ) def _prepare_ffnn_layer( @@ -792,6 +791,7 @@ def _prepare_dot_product_loss( scale_loss, # set to 1 to get deterministic behaviour parallel_iterations=1 if self.random_seed is not None else 1000, + similarity_type=self.config[SIMILARITY_TYPE], constrain_similarities=self.config[CONSTRAIN_SIMILARITIES], model_confidence=self.config[MODEL_CONFIDENCE], ) diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 3dd97c02ddf4..2d822ce859a7 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -389,7 +389,7 @@ def _check_confidence_setting(component_config) -> None: if component_config[MODEL_CONFIDENCE] == SOFTMAX: rasa.shared.utils.io.raise_warning( f"{MODEL_CONFIDENCE} is set to `softmax`. It is recommended " - f"to set it to `cosine`. It will be set to `cosine` by default " + f"to set it to `cosine`. It will be set to `cosine` by default, " f"Rasa Open Source 3.0 onwards.", category=UserWarning, ) diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py index eb520804b798..2b2b4b6e2456 100644 --- a/tests/nlu/classifiers/test_diet_classifier.py +++ b/tests/nlu/classifiers/test_diet_classifier.py @@ -370,14 +370,31 @@ async def test_softmax_normalization( @pytest.mark.parametrize( - "classifier_params, output_length", - [({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: False}, LABEL_RANKING_LENGTH)], + "classifier_params, prediction_min, prediction_max, output_length", + [ + ( + {RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "cosine"}, + -1, + 1, + LABEL_RANKING_LENGTH, + ) + ], + [ + ( + {RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "inner"}, + -1e9, + 1e9, + LABEL_RANKING_LENGTH, + ) + ], ) -async def test_softmax_with_absolute_confidence( - component_builder, - tmp_path, - classifier_params, - output_length, +async def test_cross_entropy_without_normalization( + component_builder: ComponentBuilder, + tmp_path: Path, + classifier_params: Dict[Text, Any], + prediction_min: float, + prediction_max: float, + output_length: int, monkeypatch: MonkeyPatch, ): pipeline = as_pipeline( @@ -408,7 +425,8 @@ async def test_softmax_with_absolute_confidence( # check each confidence is in range confidence_in_range = [ - 0.0 <= confidence <= 1.0 for confidence in intent_confidences + prediction_min <= confidence <= prediction_max + for confidence in intent_confidences ] assert all(confidence_in_range) From 7eeb251224c7389005d1d575251526c297b23ef9 Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 31 Jan 2021 22:04:03 +0100 Subject: [PATCH 13/44] assertive --- changelog/7616.improvement.md | 2 +- docs/docs/migration-guide.mdx | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index 6462f338d0db..8b6d23e06483 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -2,7 +2,7 @@ Added sigmoid cross-entropy loss on all similarity values to constrain them to a This affects the default behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. If you notice a degradation in performance, set `constrain_similarities=False` in the respective ML component. -You may need to tune fallback confidence thresholds to adapt to this change. +You should tune fallback confidence thresholds to adapt to this change. Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 00b4a7940469..71ae99a4201e 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -28,6 +28,8 @@ Also, a new option `model_confidence` has been added to each ML component. It af The default value is `softmax`, but we recommend using `cosine` as that will be the default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. +You should tune fallback confidence thresholds to adapt to these changes. + ## Rasa 2.1 to Rasa 2.2 ### General From 0d175d46e2de491c9710d6712ee077c8b349d12b Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 31 Jan 2021 23:23:02 +0100 Subject: [PATCH 14/44] fix plotting --- rasa/utils/plotting.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index c670059fbe40..195060c631ee 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -134,8 +134,12 @@ def plot_histogram( # Blue-ish colour for the confidences of misses. colors = ["#009292", "#920000"] n_bins = 25 - max_value = max(max(hist_data[0]), max(hist_data[1])) - min_value = min(min(hist_data[0]), min(hist_data[1])) + max_value = max( + [max(hist_data[0], default=0), max(hist_data[1], default=0)], default=0 + ) + min_value = min( + [min(hist_data[0], default=0), min(hist_data[1], default=0)], default=0 + ) bin_width = (max_value - min_value) / n_bins bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] From af82d2126df4c4aa17cfefbcbfe06457947ead7c Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 1 Feb 2021 13:44:08 +0100 Subject: [PATCH 15/44] fix ted, add line to migration --- docs/docs/migration-guide.mdx | 2 ++ rasa/core/policies/ted_policy.py | 7 +++--- rasa/nlu/classifiers/diet_classifier.py | 4 +++- rasa/nlu/selectors/response_selector.py | 4 +++- rasa/utils/tensorflow/layers.py | 29 +++++++++++++++---------- rasa/utils/train_utils.py | 7 +++--- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 71ae99a4201e..131083686334 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -29,6 +29,8 @@ The default value is `softmax`, but we recommend using `cosine` as that will be The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. You should tune fallback confidence thresholds to adapt to these changes. +To maintain the behaviour of older minor versions of Rasa Open Source 2.x, set `constrain_similarities=False` +and `model_confidence=softmax` to the respective ML component. ## Rasa 2.1 to Rasa 2.2 diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index a51328824509..cab3fe4e6367 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -1727,15 +1727,14 @@ def batch_predict( ) = self._embed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) - sim_all = self._tf_layers[f"loss.{LABEL}"].sim( + sim_all, scores = self._tf_layers[ + f"loss.{LABEL}" + ]._similarity_confidence_from_embeddings( dialogue_embed[:, :, tf.newaxis, :], self.all_labels_embed[tf.newaxis, tf.newaxis, :, :], dialogue_mask, ) - scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim( - sim_all, self.config[SIMILARITY_TYPE] - ) predictions = { "action_scores": scores, "similarities": sim_all, diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index dc21066dfa94..c3ee0ec93fe5 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -1683,7 +1683,9 @@ def _batch_predict_intents( sentence_vector = self._last_token(text_transformed, sequence_lengths) sentence_vector_embed = self._tf_layers[f"embed.{TEXT}"](sentence_vector) - scores = self._tf_layers[f"loss.{LABEL}"]._confidence_from_embeddings( + _, scores = self._tf_layers[ + f"loss.{LABEL}" + ]._similarity_confidence_from_embeddings( sentence_vector_embed[:, tf.newaxis, :], self.all_labels_embed[tf.newaxis, :, :], ) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index 6ae739215a85..5bd814fb3877 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -763,7 +763,9 @@ def batch_predict( sentence_vector = self._last_token(text_transformed, sequence_lengths_text) sentence_vector_embed = self._tf_layers[f"embed.{TEXT}"](sentence_vector) - scores = self._tf_layers[f"loss.{LABEL}"]._confidence_from_embeddings( + _, scores = self._tf_layers[ + f"loss.{LABEL}" + ]._similarity_confidence_from_embeddings( sentence_vector_embed[:, tf.newaxis, :], self.all_labels_embed[tf.newaxis, :, :], ) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 555de4eba27a..53e0d9179835 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -535,7 +535,7 @@ def f1_score( class DotProductLoss(tf.keras.layers.Layer): - """Dot-product loss layer""" + """Dot-product loss layer.""" def __init__( self, @@ -710,20 +710,24 @@ def sim( return sim - def _confidence_from_embeddings( - self, input_embeddings: tf.Tensor, label_embeddings: tf.Tensor - ) -> tf.Tensor: - """Computes model's prediction confidences from input and label embeddings. + def _similarity_confidence_from_embeddings( + self, + input_embeddings: tf.Tensor, + label_embeddings: tf.Tensor, + mask: Optional[tf.Tensor] = None, + ) -> Tuple[tf.Tensor, tf.Tensor]: + """Computes similarity between input and label embeddings and model's confidence. First compute the similarity from embeddings and then apply an activation - function if needed. + function if needed to get the confidence. Args: - input_embeddings: Embeddings of input - label_embeddings: Embeddings of labels + input_embeddings: Embeddings of input. + label_embeddings: Embeddings of labels. + mask: Mask over input and output sequence. Returns: - model's prediction confidence + similarity between input and label embeddings and model's prediction confidence for each label. """ # If model's prediction confidence is configured to be cosine similarity, # then normalize embeddings to unit vectors. @@ -731,10 +735,11 @@ def _confidence_from_embeddings( input_embeddings = tf.nn.l2_normalize(input_embeddings, axis=-1) label_embeddings = tf.nn.l2_normalize(label_embeddings, axis=-1) - similarities = self.sim(input_embeddings, label_embeddings) + similarities = self.sim(input_embeddings, label_embeddings, mask) + confidences = similarities if self.model_confidence == SOFTMAX: - return tf.nn.softmax(similarities) - return similarities + confidences = tf.nn.softmax(similarities) + return similarities, confidences def _train_sim( self, diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 2d822ce859a7..99058492c07c 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -94,12 +94,13 @@ def update_similarity_type(config: Dict[Text, Any]) -> Dict[Text, Any]: def update_loss_type(config: Dict[Text, Any]) -> Dict[Text, Any]: - """ - If LOSS_TYPE is set to 'softmax', update it to 'cross_entropy' since former is deprecated. + """If LOSS_TYPE is set to 'softmax', update it to 'cross_entropy' since former is deprecated. + Args: config: model configuration - Returns: updated model configuration + Returns: + updated model configuration """ # TODO: Completely deprecate this with 3.0 if config.get(LOSS_TYPE) == SOFTMAX: From d11ab35e138b73cde188dff6fdadff1997076d19 Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 1 Feb 2021 18:17:57 +0100 Subject: [PATCH 16/44] dummy change to trigger tests --- changelog/7616.improvement.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index 8b6d23e06483..774622ba7c90 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -11,5 +11,4 @@ Also, added an option `model_confidence` to each ML component. It affects how mo 2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label is in the range `[-1,1]`. 3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label in in an unbounded range. -The default value is `softmax`, but we recommend using `cosine` as that will be the default value, Rasa Open Source 3.0 onwards. -The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. +The default value is `softmax`, but we recommend using `cosine` as that will be the default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. From 4cf0750f31cab94facac144cb68e14cfe5cc2d02 Mon Sep 17 00:00:00 2001 From: Daksh Date: Wed, 3 Feb 2021 18:46:49 +0100 Subject: [PATCH 17/44] add changes for autoconfig, defaults --- changelog/7616.improvement.md | 18 ++++--- .../config_empty_en_after_dumping.yml | 6 +++ .../config_empty_en_after_dumping_core.yml | 2 + .../config_empty_en_after_dumping_nlu.yml | 4 ++ .../config_empty_fr_after_dumping.yml | 6 +++ .../config_with_comments_after_dumping.yml | 2 + docs/docs/components.mdx | 52 ++++++++++--------- docs/docs/migration-guide.mdx | 17 +++--- docs/docs/policies.mdx | 10 ++-- rasa/core/policies/ted_policy.py | 8 +-- rasa/nlu/classifiers/diet_classifier.py | 5 +- rasa/nlu/selectors/response_selector.py | 4 +- rasa/shared/importers/default_config.yml | 6 +++ rasa/utils/train_utils.py | 13 +++++ 14 files changed, 100 insertions(+), 53 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index 774622ba7c90..91edd05a876b 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -1,14 +1,16 @@ -Added sigmoid cross-entropy loss on all similarity values to constrain them to an approximate range in `DotProductLoss`. +Added an option `constrain_similarities` which adds sigmoid cross-entropy loss on all similarity values to constrain them to an approximate range in `DotProductLoss`. -This affects the default behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. -If you notice a degradation in performance, set `constrain_similarities=False` in the respective ML component. -You should tune fallback confidence thresholds to adapt to this change. +This affects the behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. +By default, the parameter is set to `False` but users are encouraged to set it to `True` and re-train their assistants as it will be set to `True` by default, Rasa Open Source 3.0 onwards. +Once you re-train your assistant with this option set to `True`, you should also tune fallback confidence thresholds. Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -Also, added an option `model_confidence` to each ML component. It affects how model's confidence for each label is computed during inference. It can take three values - +Also, added an option `model_confidence` to each ML component. It affects how m mponent. It affects how model's confidence for each label is computed during inference. It can take three values - 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. -2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label is in the range `[-1,1]`. -3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label in in an unbounded range. +2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. +3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. -The default value is `softmax`, but we recommend using `cosine` as that will be the default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. +The default value is `softmax`, but we recommend using `cosine` as that will be the new default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. + +The default auto-configuration is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. \ No newline at end of file diff --git a/data/test_config/config_empty_en_after_dumping.yml b/data/test_config/config_empty_en_after_dumping.yml index 20507a3944af..79c21d70c4a7 100644 --- a/data/test_config/config_empty_en_after_dumping.yml +++ b/data/test_config/config_empty_en_after_dumping.yml @@ -13,9 +13,13 @@ pipeline: # max_ngram: 4 # - name: DIETClassifier # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: EntitySynonymMapper # - name: ResponseSelector # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: FallbackClassifier # threshold: 0.3 # ambiguity_threshold: 0.1 @@ -27,4 +31,6 @@ policies: # - name: TEDPolicy # max_history: 5 # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: RulePolicy diff --git a/data/test_config/config_empty_en_after_dumping_core.yml b/data/test_config/config_empty_en_after_dumping_core.yml index 1488270ddf39..adb3c2a0af55 100644 --- a/data/test_config/config_empty_en_after_dumping_core.yml +++ b/data/test_config/config_empty_en_after_dumping_core.yml @@ -8,4 +8,6 @@ policies: # - name: TEDPolicy # max_history: 5 # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: RulePolicy diff --git a/data/test_config/config_empty_en_after_dumping_nlu.yml b/data/test_config/config_empty_en_after_dumping_nlu.yml index a4cb5077bf58..8249b17a0e11 100644 --- a/data/test_config/config_empty_en_after_dumping_nlu.yml +++ b/data/test_config/config_empty_en_after_dumping_nlu.yml @@ -13,9 +13,13 @@ pipeline: # max_ngram: 4 # - name: DIETClassifier # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: EntitySynonymMapper # - name: ResponseSelector # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: FallbackClassifier # threshold: 0.3 # ambiguity_threshold: 0.1 diff --git a/data/test_config/config_empty_fr_after_dumping.yml b/data/test_config/config_empty_fr_after_dumping.yml index 8148c3ebee68..a2ea89f4bf0a 100644 --- a/data/test_config/config_empty_fr_after_dumping.yml +++ b/data/test_config/config_empty_fr_after_dumping.yml @@ -13,9 +13,13 @@ pipeline: # max_ngram: 4 # - name: DIETClassifier # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: EntitySynonymMapper # - name: ResponseSelector # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: FallbackClassifier # threshold: 0.3 # ambiguity_threshold: 0.1 @@ -27,4 +31,6 @@ policies: # - name: TEDPolicy # max_history: 5 # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: RulePolicy diff --git a/data/test_config/config_with_comments_after_dumping.yml b/data/test_config/config_with_comments_after_dumping.yml index 16b6129d18f9..ef0743f894de 100644 --- a/data/test_config/config_with_comments_after_dumping.yml +++ b/data/test_config/config_with_comments_after_dumping.yml @@ -27,6 +27,8 @@ policies: # even here # - name: TEDPolicy # max_history: 5 # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: RulePolicy # comments everywhere diff --git a/docs/docs/components.mdx b/docs/docs/components.mdx index 747285d3e063..5e36a9c61dca 100644 --- a/docs/docs/components.mdx +++ b/docs/docs/components.mdx @@ -1531,10 +1531,12 @@ However, additional parameters exist that can be adapted. | similarity_type | "auto" | Type of similarity measure to use, either 'auto' or 'cosine' | | | | or 'inner'. | +---------------------------------+------------------+--------------------------------------------------------------+ -| loss_type | "softmax" | The type of the loss function, either 'softmax' or 'margin'. | +| loss_type | "cross_entropy" | The type of the loss function, either 'cross_entropy' | +| | | or 'margin'. | +---------------------------------+------------------+--------------------------------------------------------------+ -| ranking_length | 10 | Number of top actions to normalize scores for loss type | -| | | 'softmax'. Set to 0 to turn off normalization. | +| ranking_length | 10 | Number of top intents to normalize scores for. Applicable | +| | | with loss type 'cross_entropy'. Set to 0 to disable | +| | | normalization. | +---------------------------------+------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | @@ -1616,9 +1618,9 @@ However, additional parameters exist that can be adapted. | | | ... | | | | ``` | +---------------------------------+------------------+--------------------------------------------------------------+ -| constrain_similarities | True | If `True`, applies sigmoid on all similarity terms and adds | +| constrain_similarities | False | If `True`, applies sigmoid on all similarity terms and adds | | | | it to the loss function to ensure that similarity values are | -| | | approximately bounded. Used only when `loss_type=softmax` | +| | | approximately bounded. Used only if `loss_type=cross_entropy`| +---------------------------------+------------------+--------------------------------------------------------------+ | model_confidence | "softmax" | Affects how model's confidence for each intent | | | | is computed. It can take three values - | @@ -2760,10 +2762,12 @@ However, additional parameters exist that can be adapted. | similarity_type | "auto" | Type of similarity measure to use, either 'auto' or 'cosine' | | | | or 'inner'. | +---------------------------------+-------------------+--------------------------------------------------------------+ -| loss_type | "softmax" | The type of the loss function, either 'softmax' or 'margin'. | +| loss_type | "cross_entropy" | The type of the loss function, either 'cross_entropy' | +| | | or 'margin'. | +---------------------------------+-------------------+--------------------------------------------------------------+ -| ranking_length | 10 | Number of top actions to normalize scores for loss type | -| | | 'softmax'. Set to 0 to turn off normalization. | +| ranking_length | 10 | Number of top responses to normalize scores for. Applicable | +| | | with loss type 'cross_entropy'. Set to 0 to disable | +| | | normalization. | +---------------------------------+-------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | @@ -2832,22 +2836,22 @@ However, additional parameters exist that can be adapted. | | | Requires `evaluate_on_number_of_examples > 0` and | | | | `evaluate_every_number_of_epochs > 0` | +---------------------------------+-------------------+--------------------------------------------------------------+ -| constrain_similarities | True | If `True`, applies sigmoid on all similarity terms and adds | -| | | it to the loss function to ensure that similarity values are | -| | | approximately bounded. Used only when `loss_type=softmax` | -+---------------------------------+------------------+---------------------------------------------------------------+ -| model_confidence | "softmax" | Affects how model's confidence for each response label | -| | | is computed. It can take three values - | -| | | 1. `softmax` - Similarities between input and response label | -| | | embeddings are post-processed with a softmax function, | -| | | as a result of which confidence for all labels sum up to 1. | -| | | 2. `cosine` - Cosine similarity between input and response | -| | | label embeddings. Confidence for each label is in the | -| | | range `[-1,1]`. | -| | | 3. `inner` - Dot product similarity between input and response| -| | | label embeddings. Confidence for each label is in an | -| | | unbounded range. | -+---------------------------------+------------------+---------------------------------------------------------------+ +| constrain_similarities | False | If `True`, applies sigmoid on all similarity terms and adds | +| | | it to the loss function to ensure that similarity values are | +| | | approximately bounded. Used only if `loss_type=cross_entropy`| ++---------------------------------+-------------------+--------------------------------------------------------------+ +| model_confidence | "softmax" | Affects how model's confidence for each response label | +| | | is computed. It can take three values - | +| | | 1. `softmax` - Similarities between input and response label | +| | | embeddings are post-processed with a softmax function, | +| | | as a result of which confidence for all labels sum up to 1. | +| | | 2. `cosine` - Cosine similarity between input and response | +| | | label embeddings. Confidence for each label is in the | +| | | range `[-1,1]`. | +| | | 3. `inner` - Dot product similarity between input and | +| | | response label embeddings. Confidence for each label is in an| +| | | unbounded range. | ++---------------------------------+-------------------+--------------------------------------------------------------+ ``` :::note diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 131083686334..4b908a1193c5 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -14,23 +14,20 @@ how you can migrate from one version to another. ### Machine Learning Components -Few changes have been made to the default loss function inside machine learning (ML) +Few changes have been made to the loss function inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. These include: -- Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -- The default loss function (`loss_type=cross_entropy`) adds a sigmoid cross-entropy loss of all similarity values to constrain -them to an approximate range. If you notice a degradation in performance, set `constrain_similarities=False` -in the respective ML component. +1. Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. +2. The default loss function (`loss_type=cross_entropy`) adds an optional sigmoid cross-entropy loss of all similarity values to constrain +them to an approximate range. You can turn on this option by setting `constrain_similarities=True`. Also, a new option `model_confidence` has been added to each ML component. It affects how model's confidence for each label is computed during inference. It can take three values - 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. -2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label is in the range `[-1,1]`. -3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label in in an unbounded range. -The default value is `softmax`, but we recommend using `cosine` as that will be the default value, Rasa Open Source 3.0 onwards. +2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. +3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. +The default value is `softmax`, but we recommend using `cosine` as that will be the new default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. You should tune fallback confidence thresholds to adapt to these changes. -To maintain the behaviour of older minor versions of Rasa Open Source 2.x, set `constrain_similarities=False` -and `model_confidence=softmax` to the respective ML component. ## Rasa 2.1 to Rasa 2.2 diff --git a/docs/docs/policies.mdx b/docs/docs/policies.mdx index ec6efa60c7e3..6255b3219aec 100644 --- a/docs/docs/policies.mdx +++ b/docs/docs/policies.mdx @@ -265,10 +265,12 @@ However, additional parameters exist that can be adapted. | similarity_type | "auto" | Type of similarity measure to use, either 'auto' or 'cosine' | | | | or 'inner'. | +---------------------------------------+------------------------+--------------------------------------------------------------+ -| loss_type | "softmax" | The type of the loss function, either 'softmax' or 'margin'. | +| loss_type | "cross_entropy" | The type of the loss function, either 'cross_entropy' | +| | | or 'margin'. | +---------------------------------------+------------------------+--------------------------------------------------------------+ -| ranking_length | 10 | Number of top actions to normalize scores for loss type | -| | | 'softmax'. Set to 0 to turn off normalization. | +| ranking_length | 10 | Number of top responses to normalize scores for. Applicable | +| | | with loss type 'cross_entropy'. Set to 0 to disable | +| | | normalization. | +---------------------------------------+------------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | @@ -341,7 +343,7 @@ However, additional parameters exist that can be adapted. | entity_recognition | True | If 'True' entity recognition is trained and entities are | | | | extracted. | +---------------------------------------+------------------------+--------------------------------------------------------------+ -| constrain_similarities | True | If `True`, applies sigmoid on all similarity terms and adds | +| constrain_similarities | False | If `True`, applies sigmoid on all similarity terms and adds | | | | it to the loss function to ensure that similarity values are | | | | approximately bounded. Used only when `loss_type=softmax` | +---------------------------------------+------------------------+--------------------------------------------------------------+ diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 0d61d25997b1..b7d3d2b0f540 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -215,9 +215,9 @@ class TEDPolicy(Policy): NUM_NEG: 20, # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'. SIMILARITY_TYPE: AUTO, - # The type of the loss function, either 'softmax' or 'margin'. + # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top actions to normalize scores for loss type 'softmax'. + # Number of top actions to normalize scores for. Applicable with loss type 'cross_entropy'. # Set to 0 to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors @@ -282,7 +282,7 @@ class TEDPolicy(Policy): ENTITY_RECOGNITION: True, # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. - CONSTRAIN_SIMILARITIES: True, + CONSTRAIN_SIMILARITIES: False, # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. MODEL_CONFIDENCE: SOFTMAX, # 'BILOU_flag' determines whether to use BILOU tagging or not. @@ -349,6 +349,8 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self.config = rasa.utils.train_utils.override_defaults( self.defaults, new_config ) + + rasa.utils.train_utils._check_loss_setting(self.config) rasa.utils.train_utils._check_confidence_setting(self.config) rasa.utils.train_utils._check_similarity_loss_setting(self.config) self.config = rasa.utils.train_utils.update_loss_type(self.config) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 33eda2b13fcf..4d4b5f3b97d9 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -181,7 +181,7 @@ def required_components(cls) -> List[Type[Component]]: SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top actions to normalize scores for loss type 'softmax'. + # Number of top intents to normalize scores for. Applicable with loss type 'cross_entropy'. # Set to 0 to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors @@ -251,7 +251,7 @@ def required_components(cls) -> List[Type[Component]]: SPLIT_ENTITIES_BY_COMMA: True, # If 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. - CONSTRAIN_SIMILARITIES: True, + CONSTRAIN_SIMILARITIES: False, # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. MODEL_CONFIDENCE: SOFTMAX, } @@ -293,6 +293,7 @@ def _check_config_parameters(self) -> None: self._check_masked_lm() self._check_share_hidden_layers_sizes() + train_utils._check_loss_setting(self.component_config) train_utils._check_confidence_setting(self.component_config) train_utils._check_similarity_loss_setting(self.component_config) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index a10cf24437a6..e769c371fc67 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -176,7 +176,7 @@ def required_components(cls) -> List[Type[Component]]: SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top actions to normalize scores for loss type 'softmax'. + # Number of top actions to normalize scores for. Applicable with loss type 'cross_entropy'. # Set to 0 to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors @@ -237,7 +237,7 @@ def required_components(cls) -> List[Type[Component]]: CHECKPOINT_MODEL: False, # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. - CONSTRAIN_SIMILARITIES: True, + CONSTRAIN_SIMILARITIES: False, # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. MODEL_CONFIDENCE: SOFTMAX, } diff --git a/rasa/shared/importers/default_config.yml b/rasa/shared/importers/default_config.yml index 95c9716b0d4e..63d10d9249ab 100644 --- a/rasa/shared/importers/default_config.yml +++ b/rasa/shared/importers/default_config.yml @@ -13,9 +13,13 @@ pipeline: max_ngram: 4 - name: DIETClassifier epochs: 100 + constrain_similarities: true + model_confidence: cosine - name: EntitySynonymMapper - name: ResponseSelector epochs: 100 + constrain_similarities: true + model_confidence: cosine - name: FallbackClassifier threshold: 0.3 ambiguity_threshold: 0.1 @@ -27,4 +31,6 @@ policies: - name: TEDPolicy max_history: 5 epochs: 100 + constrain_similarities: true + model_confidence: cosine - name: RulePolicy diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index e6f09bf74f8b..253bc7bb07c4 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -396,6 +396,19 @@ def _check_confidence_setting(component_config) -> None: ) +def _check_loss_setting(component_config) -> None: + if not component_config[CONSTRAIN_SIMILARITIES] and component_config[LOSS_TYPE] in [ + SOFTMAX, + CROSS_ENTROPY, + ]: + rasa.shared.utils.io.raise_warning( + f"{CONSTRAIN_SIMILARITIES} is set to `False`. It is recommended " + f"to set it to `True` when using cross-entropy loss. It will be set to `True` by default, " + f"Rasa Open Source 3.0 onwards.", + category=UserWarning, + ) + + def _check_similarity_loss_setting(component_config) -> None: if ( component_config[SIMILARITY_TYPE] == COSINE From 827dc2bcb104b94a97811971b8a13d71629fe889 Mon Sep 17 00:00:00 2001 From: Daksh Date: Thu, 4 Feb 2021 12:35:19 +0100 Subject: [PATCH 18/44] fix test --- tests/nlu/classifiers/test_diet_classifier.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py index 2b2b4b6e2456..bb0c2aa13cb0 100644 --- a/tests/nlu/classifiers/test_diet_classifier.py +++ b/tests/nlu/classifiers/test_diet_classifier.py @@ -377,15 +377,13 @@ async def test_softmax_normalization( -1, 1, LABEL_RANKING_LENGTH, - ) - ], - [ + ), ( {RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "inner"}, -1e9, 1e9, LABEL_RANKING_LENGTH, - ) + ), ], ) async def test_cross_entropy_without_normalization( From 6e44c2fc2cdc1c44af6b1554854cb328c188a21c Mon Sep 17 00:00:00 2001 From: Daksh Varshneya Date: Fri, 5 Feb 2021 13:20:37 +0100 Subject: [PATCH 19/44] Apply suggestions from code review Co-authored-by: Tobias Wochinger Co-authored-by: Vladimir Vlasov --- docs/docs/migration-guide.mdx | 2 +- rasa/core/policies/ted_policy.py | 2 +- rasa/utils/tensorflow/layers.py | 4 ++-- rasa/utils/train_utils.py | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 4b908a1193c5..15fbaca57dc7 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -16,7 +16,7 @@ how you can migrate from one version to another. Few changes have been made to the loss function inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. These include: -1. Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. +1. Configuration option `loss_type=softmax` is now deprecated and will be removed in Rasa Open Source 3.0. Use `loss_type=cross_entropy` instead. 2. The default loss function (`loss_type=cross_entropy`) adds an optional sigmoid cross-entropy loss of all similarity values to constrain them to an approximate range. You can turn on this option by setting `constrain_similarities=True`. diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 9b567aad47aa..19eb4eca337d 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -283,7 +283,7 @@ class TEDPolicy(Policy): # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: False, - # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. + # Model confidence to be returned during inference. Possible values - 'softmax', 'cosine', 'inner'. MODEL_CONFIDENCE: SOFTMAX, # 'BILOU_flag' determines whether to use BILOU tagging or not. # If set to 'True' labelling is more rigorous, however more diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 53e0d9179835..ac2649ee60da 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -287,7 +287,7 @@ class Embed(tf.keras.layers.Layer): """ def __init__( - self, embed_dim: int, reg_lambda: float, layer_name_suffix: Text, + self, embed_dim: int, reg_lambda: float, layer_name_suffix: Text ) -> None: """Initialize layer. @@ -852,7 +852,7 @@ def _loss_cross_entropy( sim_neg_li: tf.Tensor, mask: Optional[tf.Tensor], ) -> tf.Tensor: - """Define cross entropy loss.""" + """Defines cross entropy loss.""" # Similarity terms between input and label should be optimized relative # to each other and hence use them as logits for softmax term softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 253bc7bb07c4..9c64b8d34b09 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -392,7 +392,7 @@ def _check_confidence_setting(component_config) -> None: f"{MODEL_CONFIDENCE} is set to `softmax`. It is recommended " f"to set it to `cosine`. It will be set to `cosine` by default, " f"Rasa Open Source 3.0 onwards.", - category=UserWarning, + category=FutureWarning, ) @@ -405,7 +405,7 @@ def _check_loss_setting(component_config) -> None: f"{CONSTRAIN_SIMILARITIES} is set to `False`. It is recommended " f"to set it to `True` when using cross-entropy loss. It will be set to `True` by default, " f"Rasa Open Source 3.0 onwards.", - category=UserWarning, + category=FutureWarning, ) @@ -423,7 +423,7 @@ def _check_similarity_loss_setting(component_config) -> None: f"Ideally use `{SIMILARITY_TYPE}={INNER}`" f" and `{LOSS_TYPE}={CROSS_ENTROPY}` or" f"`{SIMILARITY_TYPE}={COSINE}` and `{LOSS_TYPE}={MARGIN}`.", - category=UserWarning, + category=FutureWarning, ) From f5d26e72970e4cfbe30f81dfe948072d0e831f55 Mon Sep 17 00:00:00 2001 From: Daksh Date: Fri, 5 Feb 2021 13:21:53 +0100 Subject: [PATCH 20/44] remove parallel iter and complex op --- changelog/7616.improvement.md | 4 ++-- rasa/core/policies/ted_policy.py | 2 +- rasa/utils/tensorflow/layers.py | 14 ++++---------- rasa/utils/tensorflow/models.py | 2 -- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index 91edd05a876b..6020823cbd49 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -1,12 +1,12 @@ Added an option `constrain_similarities` which adds sigmoid cross-entropy loss on all similarity values to constrain them to an approximate range in `DotProductLoss`. -This affects the behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. +This affects the behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - [DIETClassifier](components.mdx#dietclassifier), [ResponseSelector](components.mdx#dietclassifier) and [TEDPolicy](policies.mdx#ted-policy). By default, the parameter is set to `False` but users are encouraged to set it to `True` and re-train their assistants as it will be set to `True` by default, Rasa Open Source 3.0 onwards. Once you re-train your assistant with this option set to `True`, you should also tune fallback confidence thresholds. Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -Also, added an option `model_confidence` to each ML component. It affects how m mponent. It affects how model's confidence for each label is computed during inference. It can take three values - +Also, added an option `model_confidence` to each ML component. It affects how model's confidence for each label is computed during inference. It can take three values - 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. 2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. 3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 9b567aad47aa..dea768b2189f 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -283,7 +283,7 @@ class TEDPolicy(Policy): # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: False, - # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. + # Model confidence to be returned during inference. Possible values - 'softmax', 'cosine' and 'inner'. MODEL_CONFIDENCE: SOFTMAX, # 'BILOU_flag' determines whether to use BILOU tagging or not. # If set to 'True' labelling is more rigorous, however more diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 53e0d9179835..04683c7c96a8 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -287,7 +287,7 @@ class Embed(tf.keras.layers.Layer): """ def __init__( - self, embed_dim: int, reg_lambda: float, layer_name_suffix: Text, + self, embed_dim: int, reg_lambda: float, layer_name_suffix: Text ) -> None: """Initialize layer. @@ -547,7 +547,6 @@ def __init__( neg_lambda: float, scale_loss: bool, name: Optional[Text] = None, - parallel_iterations: int = 1000, same_sampling: bool = False, similarity_type: Optional[Text] = None, constrain_similarities: bool = True, @@ -573,8 +572,6 @@ def __init__( scale_loss: Boolean, if 'True' scale loss inverse proportionally to the confidence of the correct prediction. name: Optional name of the layer. - parallel_iterations: Positive integer, the number of iterations allowed - to run in parallel. same_sampling: Boolean, if 'True' sample same negative labels for the whole batch. similarity_type: Similarity measure to use, either 'cosine' or 'inner'. @@ -593,7 +590,6 @@ def __init__( self.use_max_sim_neg = use_max_sim_neg self.neg_lambda = neg_lambda self.scale_loss = scale_loss - self.parallel_iterations = parallel_iterations self.same_sampling = same_sampling self.constrain_similarities = constrain_similarities self.model_confidence = model_confidence @@ -852,7 +848,7 @@ def _loss_cross_entropy( sim_neg_li: tf.Tensor, mask: Optional[tf.Tensor], ) -> tf.Tensor: - """Define cross entropy loss.""" + """Defines cross entropy loss.""" # Similarity terms between input and label should be optimized relative # to each other and hence use them as logits for softmax term softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) @@ -883,10 +879,8 @@ def _loss_cross_entropy( sigmoid_labels = tf.concat( [ - tf.expand_dims( - tf.ones_like(sigmoid_logits[..., 0], tf.float32), -1 - ), - tf.zeros_like(sigmoid_logits[..., 1:], tf.float32), + tf.ones_like(sigmoid_logits[..., :1]), + tf.zeros_like(sigmoid_logits[..., 1:]), ], axis=-1, ) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index cfa5ad025333..5b4ae15c427f 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -789,8 +789,6 @@ def _prepare_dot_product_loss( self.config[USE_MAX_NEG_SIM], self.config[NEGATIVE_MARGIN_SCALE], scale_loss, - # set to 1 to get deterministic behaviour - parallel_iterations=1 if self.random_seed is not None else 1000, similarity_type=self.config[SIMILARITY_TYPE], constrain_similarities=self.config[CONSTRAIN_SIMILARITIES], model_confidence=self.config[MODEL_CONFIDENCE], From a5286eb044c94be2911d75c3ca1963e3fd2911bc Mon Sep 17 00:00:00 2001 From: Daksh Date: Fri, 5 Feb 2021 17:24:02 +0100 Subject: [PATCH 21/44] more review comments --- changelog/7616.improvement.md | 26 +++++++++++------- docs/docs/components.mdx | 8 +++--- docs/docs/migration-guide.mdx | 13 +++++++-- docs/docs/policies.mdx | 6 ++--- rasa/core/policies/ted_policy.py | 22 +++++++-------- rasa/nlu/classifiers/diet_classifier.py | 16 ++++------- rasa/utils/train_utils.py | 36 ++++++------------------- 7 files changed, 58 insertions(+), 69 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index 6020823cbd49..1c84680fad6a 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -1,16 +1,24 @@ -Added an option `constrain_similarities` which adds sigmoid cross-entropy loss on all similarity values to constrain them to an approximate range in `DotProductLoss`. +Added two new parameters `constrain_similarities` and `model_confidence` to machine learning (ML) components - [DIETClassifier](components.mdx#dietclassifier), [ResponseSelector](components.mdx#dietclassifier) and [TEDPolicy](policies.mdx#ted-policy). -This affects the behaviour of the loss function(`loss_type=cross_entropy`) inside machine learning (ML) components - [DIETClassifier](components.mdx#dietclassifier), [ResponseSelector](components.mdx#dietclassifier) and [TEDPolicy](policies.mdx#ted-policy). -By default, the parameter is set to `False` but users are encouraged to set it to `True` and re-train their assistants as it will be set to `True` by default, Rasa Open Source 3.0 onwards. -Once you re-train your assistant with this option set to `True`, you should also tune fallback confidence thresholds. +Setting `constrain_similarities=True` adds a sigmoid cross-entropy loss on all similarity values to restrict them to an approximate range in `DotProductLoss`. This should help the models to perform better on real world test sets. +By default, the parameter is set to `False` to preserve the old behaviour but users are encouraged to set it to `True` and re-train their assistants as it will be set to `True` by default, Rasa Open Source 3.0 onwards. -Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. - -Also, added an option `model_confidence` to each ML component. It affects how model's confidence for each label is computed during inference. It can take three values - +Parameter `model_confidence` affects how model's confidence for each label is computed during inference. It can take three values - 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. 2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. 3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. -The default value is `softmax`, but we recommend using `cosine` as that will be the new default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. +Setting `model_confidence=cosine` should help users tune the fallback thresholds of their assistant better. The default value is `softmax` to preserve the old behaviour, but we recommend using `cosine` as that will be the new default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. + +With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as - +``` +- name: DIETClassifier + model_confidence: cosine + constrain_similarities: True + ... +``` +Once the assistant is re-trained with the above configuration, users should also tune fallback confidence thresholds. + +Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -The default auto-configuration is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. \ No newline at end of file +The default auto-configuration is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. The config would look like this - \ No newline at end of file diff --git a/docs/docs/components.mdx b/docs/docs/components.mdx index 5e36a9c61dca..2025f126c127 100644 --- a/docs/docs/components.mdx +++ b/docs/docs/components.mdx @@ -1535,8 +1535,8 @@ However, additional parameters exist that can be adapted. | | | or 'margin'. | +---------------------------------+------------------+--------------------------------------------------------------+ | ranking_length | 10 | Number of top intents to normalize scores for. Applicable | -| | | with loss type 'cross_entropy'. Set to 0 to disable | -| | | normalization. | +| | | only with loss type 'cross_entropy' and 'softmax' | +| | | confidences. Set to 0 to disable normalization. | +---------------------------------+------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | @@ -2766,8 +2766,8 @@ However, additional parameters exist that can be adapted. | | | or 'margin'. | +---------------------------------+-------------------+--------------------------------------------------------------+ | ranking_length | 10 | Number of top responses to normalize scores for. Applicable | -| | | with loss type 'cross_entropy'. Set to 0 to disable | -| | | normalization. | +| | | only with loss type 'cross_entropy' and 'softmax' | +| | | confidences. Set to 0 to disable normalization. | +---------------------------------+-------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 15fbaca57dc7..9d5551e10410 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -18,7 +18,7 @@ Few changes have been made to the loss function inside machine learning (ML) components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. These include: 1. Configuration option `loss_type=softmax` is now deprecated and will be removed in Rasa Open Source 3.0. Use `loss_type=cross_entropy` instead. 2. The default loss function (`loss_type=cross_entropy`) adds an optional sigmoid cross-entropy loss of all similarity values to constrain -them to an approximate range. You can turn on this option by setting `constrain_similarities=True`. +them to an approximate range. You can turn on this option by setting `constrain_similarities=True`. This should help the models to perform better on real world test sets. Also, a new option `model_confidence` has been added to each ML component. It affects how model's confidence for each label is computed during inference. It can take three values - 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. @@ -27,7 +27,16 @@ Also, a new option `model_confidence` has been added to each ML component. It af The default value is `softmax`, but we recommend using `cosine` as that will be the new default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. -You should tune fallback confidence thresholds to adapt to these changes. +With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as - +``` +- name: DIETClassifier + model_confidence: cosine + constrain_similarities: True + ... +``` +Once the assistant is re-trained with the above configuration, users should also tune fallback confidence thresholds. + +Configuration option `loss_type=softmax` is also deprecatedin all ML components. Use `loss_type=cross_entropy` instead. ## Rasa 2.1 to Rasa 2.2 diff --git a/docs/docs/policies.mdx b/docs/docs/policies.mdx index db2651335bb1..2b57bdaeb1b6 100644 --- a/docs/docs/policies.mdx +++ b/docs/docs/policies.mdx @@ -271,9 +271,9 @@ However, additional parameters exist that can be adapted. | loss_type | "cross_entropy" | The type of the loss function, either 'cross_entropy' | | | | or 'margin'. | +---------------------------------------+------------------------+--------------------------------------------------------------+ -| ranking_length | 10 | Number of top responses to normalize scores for. Applicable | -| | | with loss type 'cross_entropy'. Set to 0 to disable | -| | | normalization. | +| ranking_length | 10 | Number of top actions to normalize scores for. Applicable | +| | | only with loss type 'cross_entropy' and 'softmax' | +| | | confidences. Set to 0 to disable normalization. | +---------------------------------------+------------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index a3811e611173..f00d838852b3 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -620,20 +620,18 @@ def predict_action_probabilities( # take correct prediction from batch confidence, is_e2e_prediction = self._pick_confidence(confidences, similarities) - if self.config[LOSS_TYPE] == CROSS_ENTROPY and self.config[RANKING_LENGTH] > 0: - confidence = rasa.utils.train_utils.filter_top_k( - confidence, self.config[RANKING_LENGTH] + if ( + self.config[LOSS_TYPE] == CROSS_ENTROPY + and self.config[RANKING_LENGTH] > 0 + and self.config[SIMILARITY_TYPE] == INNER + and self.config[MODEL_CONFIDENCE] == SOFTMAX + ): + # TODO: This should be removed in 3.0 when softmax as + # model confidence and normalization is completely deprecated. + confidences = rasa.utils.train_utils.normalize( + confidences, self.config[RANKING_LENGTH] ) - if ( - self.config[SIMILARITY_TYPE] == INNER - and self.config[MODEL_CONFIDENCE] == SOFTMAX - ): - # TODO: This should be removed in 3.0 when softmax as - # model confidence is completely deprecated. - # Normalize the values if returned probabilities are from softmax. - confidence = rasa.utils.train_utils.normalize(confidence) - optional_events = self._create_optional_event_for_entities( output, is_e2e_prediction, interpreter, tracker ) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 4d4b5f3b97d9..5e42078eaf5c 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -867,21 +867,15 @@ def _predict_label( if ( self.component_config[LOSS_TYPE] == CROSS_ENTROPY and self.component_config[RANKING_LENGTH] > 0 + and self.component_config[SIMILARITY_TYPE] == INNER + and self.component_config[MODEL_CONFIDENCE] == SOFTMAX ): - message_sim = train_utils.filter_top_k( + # TODO: This should be removed in 3.0 when softmax as + # model confidence and normalization is completely deprecated. + message_sim = train_utils.normalize( message_sim, self.component_config[RANKING_LENGTH] ) - if ( - self.component_config[SIMILARITY_TYPE] == INNER - and self.component_config[MODEL_CONFIDENCE] == SOFTMAX - ): - # TODO: This should be removed in 3.0 when softmax as - # model confidence is completely deprecated. - # Normalize the values if returned confidences are from - # softmax(hence relative to each other). - message_sim = train_utils.normalize(message_sim) - message_sim[::-1].sort() message_sim = message_sim.tolist() diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 9c64b8d34b09..8728aeb2e363 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -40,38 +40,18 @@ from rasa.nlu.tokenizers.tokenizer import Token -def normalize(values: np.ndarray) -> np.ndarray: +def normalize(values: np.ndarray, ranking_length: Optional[int] = 0) -> np.ndarray: """Normalizes an array of positive numbers over the top `ranking_length` values. - - Args: - values: Values to normalize - - Returns: - Normalized values. - """ - new_values = values.copy() - - if np.sum(new_values) > 0: - new_values = new_values / np.sum(new_values) - - return new_values - - -def filter_top_k(values: np.ndarray, ranking_length: Optional[int] = 0) -> np.ndarray: - """Sorts the values in descending order and keeps only top `ranking_length` values. - Other values will be set to 0. - Args: - values: Values to sort and rank - ranking_length: number of values to maintain above 0. - - Returns: - Modified values. """ new_values = values.copy() # prevent mutation of the input if 0 < ranking_length < len(new_values): ranked = sorted(new_values, reverse=True) new_values[new_values < ranked[ranking_length - 1]] = 0 + + if np.sum(new_values) > 0: + new_values = new_values / np.sum(new_values) + return new_values @@ -386,7 +366,7 @@ def override_defaults( return config -def _check_confidence_setting(component_config) -> None: +def _check_confidence_setting(component_config: Dict[Text, Any]) -> None: if component_config[MODEL_CONFIDENCE] == SOFTMAX: rasa.shared.utils.io.raise_warning( f"{MODEL_CONFIDENCE} is set to `softmax`. It is recommended " @@ -396,7 +376,7 @@ def _check_confidence_setting(component_config) -> None: ) -def _check_loss_setting(component_config) -> None: +def _check_loss_setting(component_config: Dict[Text, Any]) -> None: if not component_config[CONSTRAIN_SIMILARITIES] and component_config[LOSS_TYPE] in [ SOFTMAX, CROSS_ENTROPY, @@ -409,7 +389,7 @@ def _check_loss_setting(component_config) -> None: ) -def _check_similarity_loss_setting(component_config) -> None: +def _check_similarity_loss_setting(component_config: Dict[Text, Any]) -> None: if ( component_config[SIMILARITY_TYPE] == COSINE and component_config[LOSS_TYPE] == CROSS_ENTROPY From bdadebfd01ca3fd01ec337f6156768b57052053f Mon Sep 17 00:00:00 2001 From: Daksh Date: Fri, 5 Feb 2021 17:47:11 +0100 Subject: [PATCH 22/44] fix tests --- changelog/7616.improvement.md | 4 ++-- rasa/core/policies/ted_policy.py | 4 ++-- rasa/nlu/classifiers/diet_classifier.py | 4 ++-- rasa/nlu/selectors/response_selector.py | 4 ++-- tests/utils/test_train_utils.py | 21 +++++++-------------- 5 files changed, 15 insertions(+), 22 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index 1c84680fad6a..63be53e61c5f 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -1,7 +1,7 @@ Added two new parameters `constrain_similarities` and `model_confidence` to machine learning (ML) components - [DIETClassifier](components.mdx#dietclassifier), [ResponseSelector](components.mdx#dietclassifier) and [TEDPolicy](policies.mdx#ted-policy). Setting `constrain_similarities=True` adds a sigmoid cross-entropy loss on all similarity values to restrict them to an approximate range in `DotProductLoss`. This should help the models to perform better on real world test sets. -By default, the parameter is set to `False` to preserve the old behaviour but users are encouraged to set it to `True` and re-train their assistants as it will be set to `True` by default, Rasa Open Source 3.0 onwards. +By default, the parameter is set to `False` to preserve the old behaviour, but users are encouraged to set it to `True` and re-train their assistants as it will be set to `True` by default, Rasa Open Source 3.0 onwards. Parameter `model_confidence` affects how model's confidence for each label is computed during inference. It can take three values - 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. @@ -21,4 +21,4 @@ Once the assistant is re-trained with the above configuration, users should also Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -The default auto-configuration is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. The config would look like this - \ No newline at end of file +The default auto-configuration is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. \ No newline at end of file diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index f00d838852b3..e8081dd9ae0c 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -218,8 +218,8 @@ class TEDPolicy(Policy): SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top actions to normalize scores for. Applicable with loss type 'cross_entropy'. - # Set to 0 to turn off normalization. + # Number of top actions to normalize scores for. Applicable with loss type 'cross_entropy' + # and 'softmax' confidences. Set to 0 to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 5e42078eaf5c..314d03fac680 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -181,8 +181,8 @@ def required_components(cls) -> List[Type[Component]]: SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top intents to normalize scores for. Applicable with loss type 'cross_entropy'. - # Set to 0 to turn off normalization. + # Number of top intents to normalize scores for. Applicable with loss type 'cross_entropy' + # and 'softmax' confidences. Set to 0 to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index e769c371fc67..6f099f38df8d 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -176,8 +176,8 @@ def required_components(cls) -> List[Type[Component]]: SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top actions to normalize scores for. Applicable with loss type 'cross_entropy'. - # Set to 0 to turn off normalization. + # Number of top actions to normalize scores for. Applicable with loss type 'cross_entropy' + # and 'softmax' confidences. Set to 0 to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. diff --git a/tests/utils/test_train_utils.py b/tests/utils/test_train_utils.py index 548922e0d0ba..74dccd2ad5df 100644 --- a/tests/utils/test_train_utils.py +++ b/tests/utils/test_train_utils.py @@ -36,23 +36,16 @@ def test_align_token_features(): assert np.all(actual_features[0][4] == np.mean(token_features[0][5:10], axis=0)) -def test_normalize(): - input_values = [0.7, 0.1, 0.1] - normalized_values = train_utils.normalize(np.array(input_values)) - assert np.allclose( - normalized_values, np.array([0.77777778, 0.11111111, 0.11111111]), atol=1e-5 - ) - - @pytest.mark.parametrize( "input_values, ranking_length, output_values", - [([0.5, 0.8, 0.1], 2, [0.5, 0.8, 0.0]), ([0.5, 0.3, 0.9], 5, [0.5, 0.3, 0.9]),], + [ + ([0.2, 0.7, 0.1], 2, [0.2222222, 0.77777778, 0.0]), + ([0.1, 0.7, 0.1], 5, [0.11111111, 0.77777778, 0.11111111]), + ], ) -def test_sort_and_rank( - input_values: List[float], ranking_length: int, output_values: List[float] -): - ranked_values = train_utils.filter_top_k(np.array(input_values), ranking_length) - assert np.array_equal(ranked_values, output_values) +def test_normalize(input_values, ranking_length, output_values): + normalized_values = train_utils.normalize(np.array(input_values), ranking_length) + assert np.allclose(normalized_values, np.array(output_values), atol=1e-5) @pytest.mark.parametrize( From 3a3b0f3cd0b7835435a8f805566aa8a8e96e7a94 Mon Sep 17 00:00:00 2001 From: Daksh Date: Fri, 5 Feb 2021 19:10:33 +0100 Subject: [PATCH 23/44] add conditions --- rasa/core/policies/ted_policy.py | 16 +++++---------- rasa/nlu/classifiers/diet_classifier.py | 8 ++------ rasa/utils/train_utils.py | 26 +++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index e8081dd9ae0c..875fa79529f3 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -346,9 +346,8 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self.defaults, new_config ) - rasa.utils.train_utils._check_loss_setting(self.config) - rasa.utils.train_utils._check_confidence_setting(self.config) - rasa.utils.train_utils._check_similarity_loss_setting(self.config) + rasa.utils.train_utils.validate_configuration_settings(self.config) + self.config = rasa.utils.train_utils.update_loss_type(self.config) self.config = rasa.utils.train_utils.update_similarity_type(self.config) self.config = rasa.utils.train_utils.update_evaluation_parameters(self.config) @@ -620,16 +619,11 @@ def predict_action_probabilities( # take correct prediction from batch confidence, is_e2e_prediction = self._pick_confidence(confidences, similarities) - if ( - self.config[LOSS_TYPE] == CROSS_ENTROPY - and self.config[RANKING_LENGTH] > 0 - and self.config[SIMILARITY_TYPE] == INNER - and self.config[MODEL_CONFIDENCE] == SOFTMAX - ): + if self.config[RANKING_LENGTH] > 0 and self.config[MODEL_CONFIDENCE] == SOFTMAX: # TODO: This should be removed in 3.0 when softmax as # model confidence and normalization is completely deprecated. - confidences = rasa.utils.train_utils.normalize( - confidences, self.config[RANKING_LENGTH] + confidence = rasa.utils.train_utils.normalize( + confidence, self.config[RANKING_LENGTH] ) optional_events = self._create_optional_event_for_entities( diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 314d03fac680..20f1f2348731 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -293,9 +293,7 @@ def _check_config_parameters(self) -> None: self._check_masked_lm() self._check_share_hidden_layers_sizes() - train_utils._check_loss_setting(self.component_config) - train_utils._check_confidence_setting(self.component_config) - train_utils._check_similarity_loss_setting(self.component_config) + train_utils.validate_configuration_settings(self.component_config) self.component_config = train_utils.update_loss_type(self.component_config) @@ -865,9 +863,7 @@ def _predict_label( label_ids = message_sim.argsort()[::-1] if ( - self.component_config[LOSS_TYPE] == CROSS_ENTROPY - and self.component_config[RANKING_LENGTH] > 0 - and self.component_config[SIMILARITY_TYPE] == INNER + self.component_config[RANKING_LENGTH] > 0 and self.component_config[MODEL_CONFIDENCE] == SOFTMAX ): # TODO: This should be removed in 3.0 when softmax as diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 8728aeb2e363..eee26f9002d2 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -34,6 +34,7 @@ ) from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS from rasa.core.constants import DIALOGUE +from rasa.shared.exceptions import InvalidConfigException if TYPE_CHECKING: from rasa.nlu.extractors.extractor import EntityTagSpec @@ -366,6 +367,17 @@ def override_defaults( return config +def validate_configuration_settings(component_config: Dict[Text, Any]) -> None: + """Performs checks to validate that combination of parameters in the configuration are correctly set. + + Args: + component_config: Configuration to validate. + """ + _check_loss_setting(component_config) + _check_confidence_setting(component_config) + _check_similarity_loss_setting(component_config) + + def _check_confidence_setting(component_config: Dict[Text, Any]) -> None: if component_config[MODEL_CONFIDENCE] == SOFTMAX: rasa.shared.utils.io.raise_warning( @@ -374,6 +386,20 @@ def _check_confidence_setting(component_config: Dict[Text, Any]) -> None: f"Rasa Open Source 3.0 onwards.", category=FutureWarning, ) + if component_config[LOSS_TYPE] not in [SOFTMAX, CROSS_ENTROPY]: + raise InvalidConfigException( + f"{LOSS_TYPE}={component_config[LOSS_TYPE]} and " + f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid " + f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} " + f"only with {LOSS_TYPE}={CROSS_ENTROPY}." + ) + if component_config[SIMILARITY_TYPE] != INNER: + raise InvalidConfigException( + f"{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]} and " + f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid " + f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} " + f"only with {SIMILARITY_TYPE}={INNER}." + ) def _check_loss_setting(component_config: Dict[Text, Any]) -> None: From cf27ec4d953b53ea8e200d8eb401152878ad4d66 Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 7 Feb 2021 17:48:10 +0100 Subject: [PATCH 24/44] add tests for diet and ted --- rasa/core/policies/ted_policy.py | 2 + rasa/nlu/classifiers/diet_classifier.py | 4 + rasa/utils/tensorflow/layers.py | 5 +- rasa/utils/train_utils.py | 24 ++++- tests/nlu/selectors/test_selectors.py | 126 ++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 3 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 875fa79529f3..f75f489c1bf3 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -346,6 +346,8 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self.defaults, new_config ) + self.config = rasa.utils.train_utils.update_confidence_type(self.config) + rasa.utils.train_utils.validate_configuration_settings(self.config) self.config = rasa.utils.train_utils.update_loss_type(self.config) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 20f1f2348731..de140e6565e8 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -293,6 +293,10 @@ def _check_config_parameters(self) -> None: self._check_masked_lm() self._check_share_hidden_layers_sizes() + self.component_config = train_utils.update_confidence_type( + self.component_config + ) + train_utils.validate_configuration_settings(self.component_config) self.component_config = train_utils.update_loss_type(self.component_config) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 04683c7c96a8..354e7430eadd 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -12,6 +12,7 @@ INNER, CROSS_ENTROPY, ) +from rasa.shared.exceptions import RasaException logger = logging.getLogger(__name__) @@ -595,7 +596,7 @@ def __init__( self.model_confidence = model_confidence self.similarity_type = similarity_type if self.similarity_type and self.similarity_type not in {COSINE, INNER}: - raise ValueError( + raise RasaException( f"Wrong similarity type '{self.similarity_type}', " f"should be '{COSINE}' or '{INNER}'." ) @@ -918,7 +919,7 @@ def _chosen_loss(self) -> Callable: elif self.loss_type == CROSS_ENTROPY: return self._loss_cross_entropy else: - raise ValueError( + raise RasaException( f"Wrong loss type '{self.loss_type}', " f"should be '{MARGIN}' or '{CROSS_ENTROPY}'" ) diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index eee26f9002d2..1a31b1eb69fd 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -367,6 +367,28 @@ def override_defaults( return config +def update_confidence_type(component_config: Dict[Text, Any]) -> Dict[Text, Any]: + """Set model confidence to cosine if margin loss is used. + + Args: + component_config: model configuration + + Returns: + updated model configuration + """ + # TODO: Remove this once model_confidence is set to cosine by default. + if ( + component_config[LOSS_TYPE] == MARGIN + and component_config[MODEL_CONFIDENCE] == SOFTMAX + ): + rasa.shared.utils.io.raise_warning( + f"Overriding defaults by setting {MODEL_CONFIDENCE} to " + f"{COSINE} as {LOSS_TYPE} is set to {MARGIN} in the configuration." + ) + component_config[MODEL_CONFIDENCE] = COSINE + return component_config + + def validate_configuration_settings(component_config: Dict[Text, Any]) -> None: """Performs checks to validate that combination of parameters in the configuration are correctly set. @@ -393,7 +415,7 @@ def _check_confidence_setting(component_config: Dict[Text, Any]) -> None: f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} " f"only with {LOSS_TYPE}={CROSS_ENTROPY}." ) - if component_config[SIMILARITY_TYPE] != INNER: + if component_config[SIMILARITY_TYPE] not in [INNER, AUTO]: raise InvalidConfigException( f"{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]} and " f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid " diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index 6d5c4aabea9d..c7b1748a8b21 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -3,6 +3,8 @@ import pytest import numpy as np from typing import List, Dict, Text, Any +from mock import Mock +from _pytest.monkeypatch import MonkeyPatch import rasa.model from rasa.nlu import train @@ -19,12 +21,20 @@ EVAL_NUM_EPOCHS, EVAL_NUM_EXAMPLES, CHECKPOINT_MODEL, + MODEL_CONFIDENCE, + RANDOM_SEED, + RANKING_LENGTH, + LOSS_TYPE, ) +from rasa.utils import train_utils +from rasa.nlu.classifiers import LABEL_RANKING_LENGTH from rasa.shared.nlu.constants import TEXT from rasa.shared.constants import DIAGNOSTIC_DATA from rasa.nlu.selectors.response_selector import ResponseSelector from rasa.shared.nlu.training_data.message import Message from rasa.shared.nlu.training_data.training_data import TrainingData +from tests.nlu.classifiers.test_diet_classifier import as_pipeline +from tests.conftest import DEFAULT_NLU_DATA @pytest.mark.parametrize( @@ -315,3 +325,119 @@ async def test_process_gives_diagnostic_data(trained_response_selector_bot: Path assert "attention_weights" in diagnostic_data[name] # By default, ResponseSelector has `number_of_transformer_layers = 0` assert diagnostic_data[name].get("attention_weights") is None + + +@pytest.mark.parametrize( + "classifier_params, prediction_min, prediction_max, output_length", + [ + ({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "cosine"}, -1, 1, 9,), + ({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "inner"}, -1e9, 1e9, 9,), + ], +) +async def test_cross_entropy_without_normalization( + component_builder: ComponentBuilder, + tmp_path: Path, + classifier_params: Dict[Text, Any], + prediction_min: float, + prediction_max: float, + output_length: int, + monkeypatch: MonkeyPatch, +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" + ) + assert pipeline[2]["name"] == "ResponseSelector" + pipeline[2].update(classifier_params) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, + path=str(tmp_path), + data="data/test_selectors", + component_builder=component_builder, + ) + loaded = Interpreter.load(persisted_path, component_builder) + + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + + parse_data = loaded.parse("hello") + response_ranking = parse_data.get("response_selector").get("default").get("ranking") + + # check that the output was correctly truncated + assert len(response_ranking) == output_length + + response_confidences = [response.get("confidence") for response in response_ranking] + + # check each confidence is in range + confidence_in_range = [ + prediction_min <= confidence <= prediction_max + for confidence in response_confidences + ] + assert all(confidence_in_range) + + # normalize shouldn't have been called + mock.normalize.assert_not_called() + + +@pytest.mark.parametrize( + "classifier_params", [({LOSS_TYPE: "margin", RANDOM_SEED: 42, EPOCHS: 1})], +) +async def test_margin_loss_is_not_normalized( + monkeypatch, component_builder, tmpdir, classifier_params +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" + ) + assert pipeline[2]["name"] == "ResponseSelector" + pipeline[2].update(classifier_params) + + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, + path=str(tmpdir), + data="data/test_selectors", + component_builder=component_builder, + ) + loaded = Interpreter.load(persisted_path, component_builder) + + parse_data = loaded.parse("hello") + response_ranking = parse_data.get("response_selector").get("default").get("ranking") + + # check that the output was not normalized + mock.normalize.assert_not_called() + + # check that the output was correctly truncated + assert len(response_ranking) == 9 + + +@pytest.mark.parametrize( + "classifier_params, data_path, output_length", + [ + ({RANDOM_SEED: 42, EPOCHS: 2}, "data/test_selectors", 2), + ({RANDOM_SEED: 42, RANKING_LENGTH: 0, EPOCHS: 2}, "data/test_selectors", 2), + ({RANDOM_SEED: 42, RANKING_LENGTH: 1, EPOCHS: 2}, "data/test_selectors", 1), + ], +) +async def test_softmax_ranking( + component_builder, tmp_path, classifier_params, data_path, output_length, +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" + ) + assert pipeline[2]["name"] == "ResponseSelector" + pipeline[2].update(classifier_params) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, path=str(tmp_path), data=data_path, component_builder=component_builder + ) + loaded = Interpreter.load(persisted_path, component_builder) + + parse_data = loaded.parse("hello") + response_ranking = parse_data.get("response_selector").get("default").get("ranking") + # check that the output was correctly truncated after normalization + assert len(response_ranking) == output_length From 476e59820e36e7988865de909b7a67e78ad53e0c Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 7 Feb 2021 17:55:59 +0100 Subject: [PATCH 25/44] add types --- tests/nlu/selectors/test_selectors.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index c7b1748a8b21..0f0be42ab7e6 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -384,7 +384,10 @@ async def test_cross_entropy_without_normalization( "classifier_params", [({LOSS_TYPE: "margin", RANDOM_SEED: 42, EPOCHS: 1})], ) async def test_margin_loss_is_not_normalized( - monkeypatch, component_builder, tmpdir, classifier_params + monkeypatch: MonkeyPatch, + component_builder: ComponentBuilder, + tmpdir: Path, + classifier_params: Dict[Text, int], ): pipeline = as_pipeline( "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" @@ -417,13 +420,17 @@ async def test_margin_loss_is_not_normalized( @pytest.mark.parametrize( "classifier_params, data_path, output_length", [ - ({RANDOM_SEED: 42, EPOCHS: 2}, "data/test_selectors", 2), - ({RANDOM_SEED: 42, RANKING_LENGTH: 0, EPOCHS: 2}, "data/test_selectors", 2), - ({RANDOM_SEED: 42, RANKING_LENGTH: 1, EPOCHS: 2}, "data/test_selectors", 1), + ({RANDOM_SEED: 42, EPOCHS: 2}, "data/test_selectors", 9), + ({RANDOM_SEED: 42, RANKING_LENGTH: 0, EPOCHS: 2}, "data/test_selectors", 9), + ({RANDOM_SEED: 42, RANKING_LENGTH: 2, EPOCHS: 2}, "data/test_selectors", 2), ], ) async def test_softmax_ranking( - component_builder, tmp_path, classifier_params, data_path, output_length, + component_builder: ComponentBuilder, + tmp_path: Path, + classifier_params: Dict[Text, int], + data_path: Text, + output_length: int, ): pipeline = as_pipeline( "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" From 3d554e390a492b993906ea192da59274e7194976 Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 7 Feb 2021 18:07:20 +0100 Subject: [PATCH 26/44] added tests for TED --- tests/core/policies/test_ted_policy.py | 98 ++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py index ea790d422127..bae7475be18c 100644 --- a/tests/core/policies/test_ted_policy.py +++ b/tests/core/policies/test_ted_policy.py @@ -32,8 +32,12 @@ SCALE_LOSS, SIMILARITY_TYPE, VALUE_RELATIVE_ATTENTION, + MODEL_CONFIDENCE, + COSINE, + INNER, ) from tests.core.test_policies import PolicyTestCollection +from rasa.shared.constants import DEFAULT_SENDER_ID UTTER_GREET_ACTION = "utter_greet" GREET_INTENT_NAME = "greet" @@ -330,6 +334,100 @@ def test_normalization( mock.normalize.assert_not_called() +class TestTEDPolicyCosineConfidence(TestTEDPolicy): + def create_policy( + self, featurizer: Optional[TrackerFeaturizer], priority: int + ) -> Policy: + return TEDPolicy( + featurizer=featurizer, priority=priority, **{MODEL_CONFIDENCE: COSINE} + ) + + def test_normalization( + self, + trained_policy: Policy, + tracker: DialogueStateTracker, + default_domain: Domain, + monkeypatch: MonkeyPatch, + ): + # first check the output is what we expect + predicted_probabilities = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ).probabilities + # there should be no normalization + confidence_in_range = [ + -1 <= confidence <= 1 for confidence in predicted_probabilities + ] + assert all(confidence_in_range) + + # also check our function is not called + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + + mock.normalize.assert_not_called() + + def test_prediction_on_empty_tracker( + self, trained_policy: Policy, default_domain: Domain + ): + tracker = DialogueStateTracker(DEFAULT_SENDER_ID, default_domain.slots) + prediction = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + assert not prediction.is_end_to_end_prediction + assert len(prediction.probabilities) == default_domain.num_actions + assert max(prediction.probabilities) <= 1.0 + assert min(prediction.probabilities) >= -1.0 + + +class TestTEDPolicyInnerConfidence(TestTEDPolicy): + def create_policy( + self, featurizer: Optional[TrackerFeaturizer], priority: int + ) -> Policy: + return TEDPolicy( + featurizer=featurizer, priority=priority, **{MODEL_CONFIDENCE: INNER} + ) + + def test_normalization( + self, + trained_policy: Policy, + tracker: DialogueStateTracker, + default_domain: Domain, + monkeypatch: MonkeyPatch, + ): + # first check the output is what we expect + predicted_probabilities = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ).probabilities + # there should be no normalization + confidence_in_range = [ + -1e9 <= confidence <= 1e9 for confidence in predicted_probabilities + ] + assert all(confidence_in_range) + + # also check our function is not called + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + + mock.normalize.assert_not_called() + + def test_prediction_on_empty_tracker( + self, trained_policy: Policy, default_domain: Domain + ): + tracker = DialogueStateTracker(DEFAULT_SENDER_ID, default_domain.slots) + prediction = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + assert not prediction.is_end_to_end_prediction + assert len(prediction.probabilities) == default_domain.num_actions + assert max(prediction.probabilities) <= 1e9 + assert min(prediction.probabilities) >= -1e9 + + class TestTEDPolicyLowRankingLength(TestTEDPolicy): def create_policy( self, featurizer: Optional[TrackerFeaturizer], priority: int From 54f9ee4549f7e710d108d8b0be683ec45e6a17c9 Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 7 Feb 2021 18:56:36 +0100 Subject: [PATCH 27/44] change plotting strategy, testing --- rasa/utils/plotting.py | 26 ++++++++++++++++++++++---- rasa/utils/train_utils.py | 1 + tests/core/policies/test_ted_policy.py | 12 ++++++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index 195060c631ee..cf4661fd7f9b 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -128,6 +128,23 @@ def plot_histogram( """ import matplotlib.pyplot as plt + def get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[float]: + total_values = len(data[0]) + len(data[1]) + bin_max_size = int(total_values * bin_size_frac) + + all_values = sorted(data[0] + data[1]) + bins = [] + bin_count = 0 + for value in all_values: + bin_count += 1 + if bin_count == bin_max_size: + bins.append(value) + bin_count = 0 + if bin_count: + bins.append(all_values[-1]) + + return bins + plt.gcf().clear() # Wine-ish colour for the confidences of hits. @@ -140,8 +157,8 @@ def plot_histogram( min_value = min( [min(hist_data[0], default=0), min(hist_data[1], default=0)], default=0 ) - bin_width = (max_value - min_value) / n_bins - bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] + + bins = get_bins(hist_data) binned_data_sets = [np.histogram(d, bins=bins)[0] for d in hist_data] @@ -157,10 +174,11 @@ def plot_histogram( ] ) ] - - bin_width + # - bins[0] ) - max_ylim = max(bins) + bin_width + # max_ylim = max(bins) + bin_width + max_ylim = max(bins) yticks = [float("{:.2f}".format(x)) for x in bins] diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 1a31b1eb69fd..84f0fe09e7ff 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -43,6 +43,7 @@ def normalize(values: np.ndarray, ranking_length: Optional[int] = 0) -> np.ndarray: """Normalizes an array of positive numbers over the top `ranking_length` values. + Other values will be set to 0. """ new_values = values.copy() # prevent mutation of the input diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py index bae7475be18c..d1a5c55e3b36 100644 --- a/tests/core/policies/test_ted_policy.py +++ b/tests/core/policies/test_ted_policy.py @@ -287,6 +287,18 @@ def test_normalization( # function should not get called for margin loss_type mock.normalize.assert_not_called() + def test_prediction_on_empty_tracker( + self, trained_policy: Policy, default_domain: Domain + ): + tracker = DialogueStateTracker(DEFAULT_SENDER_ID, default_domain.slots) + prediction = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + assert not prediction.is_end_to_end_prediction + assert len(prediction.probabilities) == default_domain.num_actions + assert max(prediction.probabilities) <= 1.0 + assert min(prediction.probabilities) >= -1.0 + class TestTEDPolicyWithEval(TestTEDPolicy): def create_policy( From 5734612c9a84195fc85a9df9f58a14fc46eba70d Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 7 Feb 2021 19:00:10 +0100 Subject: [PATCH 28/44] change function call --- rasa/utils/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index cf4661fd7f9b..f391f84a5717 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -128,7 +128,7 @@ def plot_histogram( """ import matplotlib.pyplot as plt - def get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[float]: + def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[float]: total_values = len(data[0]) + len(data[1]) bin_max_size = int(total_values * bin_size_frac) @@ -158,7 +158,7 @@ def get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[float [min(hist_data[0], default=0), min(hist_data[1], default=0)], default=0 ) - bins = get_bins(hist_data) + bins = _get_bins(hist_data) binned_data_sets = [np.histogram(d, bins=bins)[0] for d in hist_data] From 1dc6930661e534daf22b2244268036810d6bff77 Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 7 Feb 2021 20:05:54 +0100 Subject: [PATCH 29/44] self review, add types, docformats --- rasa/core/policies/ted_policy.py | 1 + rasa/nlu/classifiers/diet_classifier.py | 3 ++- rasa/nlu/selectors/response_selector.py | 2 +- rasa/utils/plotting.py | 15 +++++++++++---- tests/core/policies/test_ted_policy.py | 6 ++++++ tests/nlu/selectors/test_selectors.py | 6 ++---- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index f75f489c1bf3..8a86095ed2b1 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -807,6 +807,7 @@ def load( model_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data ) + meta = rasa.utils.train_utils.update_confidence_type(meta) meta = rasa.utils.train_utils.update_similarity_type(meta) meta = rasa.utils.train_utils.update_loss_type(meta) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index de140e6565e8..673315ec35f0 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -252,7 +252,7 @@ def required_components(cls) -> List[Type[Component]]: # If 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: False, - # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. + # Model confidence to be returned during inference. Possible values - 'softmax', 'cosine', 'inner'. MODEL_CONFIDENCE: SOFTMAX, } @@ -1019,6 +1019,7 @@ def load( data_example, ) = cls._load_from_files(meta, model_dir) + meta = train_utils.update_confidence_type(meta) meta = train_utils.update_similarity_type(meta) meta = train_utils.update_loss_type(meta) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index 6f099f38df8d..b66426fef78b 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -238,7 +238,7 @@ def required_components(cls) -> List[Type[Component]]: # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to # ensure that similarity values are approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: False, - # Model confidence to be returned during inference. Possible values - softmax, cosine, inner. + # Model confidence to be returned during inference. Possible values - 'softmax', 'cosine', 'inner'. MODEL_CONFIDENCE: SOFTMAX, } diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index f391f84a5717..999826c50246 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -5,6 +5,7 @@ import numpy as np from typing import List, Text, Optional, Union, Any import matplotlib +from matplotlib.ticker import FormatStrFormatter import rasa.shared.utils.io from rasa.constants import RESULTS_FILE @@ -158,7 +159,10 @@ def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[floa [min(hist_data[0], default=0), min(hist_data[1], default=0)], default=0 ) - bins = _get_bins(hist_data) + bin_width = (max_value - min_value) / n_bins + bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] + + # bins = _get_bins(hist_data) binned_data_sets = [np.histogram(d, bins=bins)[0] for d in hist_data] @@ -174,11 +178,11 @@ def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[floa ] ) ] - # - bins[0] + - bins[0] ) - # max_ylim = max(bins) + bin_width - max_ylim = max(bins) + max_ylim = max(bins) + bin_width + # max_ylim = max(bins) yticks = [float("{:.2f}".format(x)) for x in bins] @@ -203,11 +207,14 @@ def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[floa color=colors[1], label="misses", ) + axes[0].set_yscale("log") axes[1].set(title="Wrong") axes[0].set(yticks=yticks, xlim=(0, max_xlims[0]), ylim=(min_ylim, max_ylim)) axes[1].set(yticks=yticks, xlim=(0, max_xlims[1]), ylim=(min_ylim, max_ylim)) + axes[0].yaxis.set_major_formatter(FormatStrFormatter("%.2f")) + axes[0].invert_xaxis() axes[0].yaxis.tick_right() diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py index d1a5c55e3b36..b6bed59d98ae 100644 --- a/tests/core/policies/test_ted_policy.py +++ b/tests/core/policies/test_ted_policy.py @@ -354,6 +354,9 @@ def create_policy( featurizer=featurizer, priority=priority, **{MODEL_CONFIDENCE: COSINE} ) + def test_similarity_type(self, trained_policy: TEDPolicy): + assert trained_policy.config[SIMILARITY_TYPE] == COSINE + def test_normalization( self, trained_policy: Policy, @@ -401,6 +404,9 @@ def create_policy( featurizer=featurizer, priority=priority, **{MODEL_CONFIDENCE: INNER} ) + def test_similarity_type(self, trained_policy: TEDPolicy): + assert trained_policy.config[SIMILARITY_TYPE] == INNER + def test_normalization( self, trained_policy: Policy, diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index 0f0be42ab7e6..e21c4dde4d49 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -27,14 +27,12 @@ LOSS_TYPE, ) from rasa.utils import train_utils -from rasa.nlu.classifiers import LABEL_RANKING_LENGTH from rasa.shared.nlu.constants import TEXT from rasa.shared.constants import DIAGNOSTIC_DATA from rasa.nlu.selectors.response_selector import ResponseSelector from rasa.shared.nlu.training_data.message import Message from rasa.shared.nlu.training_data.training_data import TrainingData from tests.nlu.classifiers.test_diet_classifier import as_pipeline -from tests.conftest import DEFAULT_NLU_DATA @pytest.mark.parametrize( @@ -330,8 +328,8 @@ async def test_process_gives_diagnostic_data(trained_response_selector_bot: Path @pytest.mark.parametrize( "classifier_params, prediction_min, prediction_max, output_length", [ - ({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "cosine"}, -1, 1, 9,), - ({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "inner"}, -1e9, 1e9, 9,), + ({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "cosine"}, -1, 1, 9), + ({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "inner"}, -1e9, 1e9, 9), ], ) async def test_cross_entropy_without_normalization( From ab1e7b36dcdba59876fc7fbe9bcb746c1d9b6e7c Mon Sep 17 00:00:00 2001 From: Daksh Date: Sun, 7 Feb 2021 20:07:18 +0100 Subject: [PATCH 30/44] revert back plotting changes --- rasa/utils/plotting.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index 999826c50246..3d4963b2e348 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -159,10 +159,10 @@ def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[floa [min(hist_data[0], default=0), min(hist_data[1], default=0)], default=0 ) - bin_width = (max_value - min_value) / n_bins - bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] + # bin_width = (max_value - min_value) / n_bins + # bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] - # bins = _get_bins(hist_data) + bins = _get_bins(hist_data) binned_data_sets = [np.histogram(d, bins=bins)[0] for d in hist_data] @@ -178,11 +178,11 @@ def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[floa ] ) ] - - bins[0] + # - bins[0] ) - max_ylim = max(bins) + bin_width - # max_ylim = max(bins) + # max_ylim = max(bins) + bin_width + max_ylim = max(bins) yticks = [float("{:.2f}".format(x)) for x in bins] From f2da6bb61459af06bcc105ff1a9d31f9137663cf Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 13:05:45 +0100 Subject: [PATCH 31/44] final plotting style --- rasa/nlu/test.py | 2 +- rasa/utils/plotting.py | 31 +++++--------------------- tests/core/policies/test_ted_policy.py | 13 ++++++----- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index 837aec238855..e9f819d9e243 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -927,7 +927,7 @@ def evaluate_entities( merged_targets, merged_predictions, merged_confidences, - title="Entity Confusion matrix", + title="Entity Prediction Confidence Distribution", hist_filename=histogram_filename, ) diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index 3d4963b2e348..c816f26f77a9 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -129,23 +129,6 @@ def plot_histogram( """ import matplotlib.pyplot as plt - def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[float]: - total_values = len(data[0]) + len(data[1]) - bin_max_size = int(total_values * bin_size_frac) - - all_values = sorted(data[0] + data[1]) - bins = [] - bin_count = 0 - for value in all_values: - bin_count += 1 - if bin_count == bin_max_size: - bins.append(value) - bin_count = 0 - if bin_count: - bins.append(all_values[-1]) - - return bins - plt.gcf().clear() # Wine-ish colour for the confidences of hits. @@ -159,10 +142,8 @@ def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[floa [min(hist_data[0], default=0), min(hist_data[1], default=0)], default=0 ) - # bin_width = (max_value - min_value) / n_bins - # bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] - - bins = _get_bins(hist_data) + bin_width = (max_value - min_value) / n_bins + bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] binned_data_sets = [np.histogram(d, bins=bins)[0] for d in hist_data] @@ -178,11 +159,10 @@ def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[floa ] ) ] - # - bins[0] + - bin_width ) - # max_ylim = max(bins) + bin_width - max_ylim = max(bins) + max_ylim = max(bins) + bin_width yticks = [float("{:.2f}".format(x)) for x in bins] @@ -207,13 +187,14 @@ def _get_bins(data: List[List[float]], bin_size_frac: float = 0.04) -> List[floa color=colors[1], label="misses", ) - axes[0].set_yscale("log") + axes[1].set(title="Wrong") axes[0].set(yticks=yticks, xlim=(0, max_xlims[0]), ylim=(min_ylim, max_ylim)) axes[1].set(yticks=yticks, xlim=(0, max_xlims[1]), ylim=(min_ylim, max_ylim)) axes[0].yaxis.set_major_formatter(FormatStrFormatter("%.2f")) + axes[0].yaxis.set_minor_formatter(FormatStrFormatter("%.2f")) axes[0].invert_xaxis() axes[0].yaxis.tick_right() diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py index b6bed59d98ae..de3c7668e008 100644 --- a/tests/core/policies/test_ted_policy.py +++ b/tests/core/policies/test_ted_policy.py @@ -268,7 +268,10 @@ def create_policy( ) def test_similarity_type(self, trained_policy: TEDPolicy): - assert trained_policy.config[SIMILARITY_TYPE] == "cosine" + assert trained_policy.config[SIMILARITY_TYPE] == COSINE + + def test_confidence_type(self, trained_policy: TEDPolicy): + assert trained_policy.config[MODEL_CONFIDENCE] == COSINE def test_normalization( self, @@ -354,8 +357,8 @@ def create_policy( featurizer=featurizer, priority=priority, **{MODEL_CONFIDENCE: COSINE} ) - def test_similarity_type(self, trained_policy: TEDPolicy): - assert trained_policy.config[SIMILARITY_TYPE] == COSINE + def test_confidence_type(self, trained_policy: TEDPolicy): + assert trained_policy.config[MODEL_CONFIDENCE] == COSINE def test_normalization( self, @@ -404,8 +407,8 @@ def create_policy( featurizer=featurizer, priority=priority, **{MODEL_CONFIDENCE: INNER} ) - def test_similarity_type(self, trained_policy: TEDPolicy): - assert trained_policy.config[SIMILARITY_TYPE] == INNER + def test_confidence_type(self, trained_policy: TEDPolicy): + assert trained_policy.config[MODEL_CONFIDENCE] == INNER def test_normalization( self, From 8fb7ea2aa0d568a45046c7121a21cd64f3e70117 Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 13:08:53 +0100 Subject: [PATCH 32/44] change epochs to 1 --- tests/nlu/selectors/test_selectors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index e21c4dde4d49..02dd94394cea 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -418,9 +418,9 @@ async def test_margin_loss_is_not_normalized( @pytest.mark.parametrize( "classifier_params, data_path, output_length", [ - ({RANDOM_SEED: 42, EPOCHS: 2}, "data/test_selectors", 9), - ({RANDOM_SEED: 42, RANKING_LENGTH: 0, EPOCHS: 2}, "data/test_selectors", 9), - ({RANDOM_SEED: 42, RANKING_LENGTH: 2, EPOCHS: 2}, "data/test_selectors", 2), + ({RANDOM_SEED: 42, EPOCHS: 1}, "data/test_selectors", 9), + ({RANDOM_SEED: 42, RANKING_LENGTH: 0, EPOCHS: 1}, "data/test_selectors", 9), + ({RANDOM_SEED: 42, RANKING_LENGTH: 2, EPOCHS: 1}, "data/test_selectors", 2), ], ) async def test_softmax_ranking( From 2724b1951c286b75ff8b226c6f1bec4e73985499 Mon Sep 17 00:00:00 2001 From: Daksh Varshneya Date: Mon, 8 Feb 2021 14:36:13 +0100 Subject: [PATCH 33/44] Partial suggestions from code review Co-authored-by: Tobias Wochinger --- changelog/7616.improvement.md | 8 ++++---- docs/docs/components.mdx | 4 ++-- docs/docs/migration-guide.mdx | 1 - docs/docs/policies.mdx | 4 ++-- rasa/core/policies/ted_policy.py | 1 - rasa/nlu/classifiers/diet_classifier.py | 1 - 6 files changed, 8 insertions(+), 11 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index 63be53e61c5f..cc31ff1dcc73 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -3,15 +3,15 @@ Added two new parameters `constrain_similarities` and `model_confidence` to mach Setting `constrain_similarities=True` adds a sigmoid cross-entropy loss on all similarity values to restrict them to an approximate range in `DotProductLoss`. This should help the models to perform better on real world test sets. By default, the parameter is set to `False` to preserve the old behaviour, but users are encouraged to set it to `True` and re-train their assistants as it will be set to `True` by default, Rasa Open Source 3.0 onwards. -Parameter `model_confidence` affects how model's confidence for each label is computed during inference. It can take three values - +Parameter `model_confidence` affects how model's confidence for each label is computed during inference. It can take three values: 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. 2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. 3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. Setting `model_confidence=cosine` should help users tune the fallback thresholds of their assistant better. The default value is `softmax` to preserve the old behaviour, but we recommend using `cosine` as that will be the new default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. -With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as - -``` +With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as +```yaml - name: DIETClassifier model_confidence: cosine constrain_similarities: True @@ -21,4 +21,4 @@ Once the assistant is re-trained with the above configuration, users should also Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -The default auto-configuration is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. \ No newline at end of file +The default auto-configuration is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. diff --git a/docs/docs/components.mdx b/docs/docs/components.mdx index 2025f126c127..ef5ac83db5a1 100644 --- a/docs/docs/components.mdx +++ b/docs/docs/components.mdx @@ -1623,7 +1623,7 @@ However, additional parameters exist that can be adapted. | | | approximately bounded. Used only if `loss_type=cross_entropy`| +---------------------------------+------------------+--------------------------------------------------------------+ | model_confidence | "softmax" | Affects how model's confidence for each intent | -| | | is computed. It can take three values - | +| | | is computed. It can take three values | | | | 1. `softmax` - Similarities between input and intent | | | | embeddings are post-processed with a softmax function, | | | | as a result of which confidence for all intents sum up to 1. | @@ -2841,7 +2841,7 @@ However, additional parameters exist that can be adapted. | | | approximately bounded. Used only if `loss_type=cross_entropy`| +---------------------------------+-------------------+--------------------------------------------------------------+ | model_confidence | "softmax" | Affects how model's confidence for each response label | -| | | is computed. It can take three values - | +| | | is computed. It can take three values | | | | 1. `softmax` - Similarities between input and response label | | | | embeddings are post-processed with a softmax function, | | | | as a result of which confidence for all labels sum up to 1. | diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 9d5551e10410..38d0d2fb6854 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -36,7 +36,6 @@ With both the above recommendations, users should configure their ML component, ``` Once the assistant is re-trained with the above configuration, users should also tune fallback confidence thresholds. -Configuration option `loss_type=softmax` is also deprecatedin all ML components. Use `loss_type=cross_entropy` instead. ## Rasa 2.1 to Rasa 2.2 diff --git a/docs/docs/policies.mdx b/docs/docs/policies.mdx index 2b57bdaeb1b6..bc2d4c1c4e85 100644 --- a/docs/docs/policies.mdx +++ b/docs/docs/policies.mdx @@ -348,10 +348,10 @@ However, additional parameters exist that can be adapted. +---------------------------------------+------------------------+--------------------------------------------------------------+ | constrain_similarities | False | If `True`, applies sigmoid on all similarity terms and adds | | | | it to the loss function to ensure that similarity values are | -| | | approximately bounded. Used only when `loss_type=softmax` | +| | | approximately bounded. Used only when `loss_type=softmax`. | +---------------------------------------+------------------------+--------------------------------------------------------------+ | model_confidence | "softmax" | Affects how model's confidence for each action | -| | | is computed. It can take three values - | +| | | is computed. It can take three values | | | | 1. `softmax` - Similarities between input and action | | | | embeddings are post-processed with a softmax function, | | | | as a result of which confidence for all labels sum up to 1. | diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 8a86095ed2b1..4d99e218b143 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -83,7 +83,6 @@ VALUE_RELATIVE_ATTENTION, MAX_RELATIVE_POSITION, CROSS_ENTROPY, - INNER, AUTO, BALANCED, TENSORBOARD_LOG_DIR, diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 673315ec35f0..2a2d46db7343 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -86,7 +86,6 @@ VALUE_RELATIVE_ATTENTION, MAX_RELATIVE_POSITION, AUTO, - INNER, BALANCED, CROSS_ENTROPY, TENSORBOARD_LOG_LEVEL, From 8c66bd8393b3cf126cc3565974702ff5b1a0048e Mon Sep 17 00:00:00 2001 From: Daksh Varshneya Date: Mon, 8 Feb 2021 14:45:16 +0100 Subject: [PATCH 34/44] Apply doc suggestions from code review Co-authored-by: Melinda Loubser <32034278+melindaloubser1@users.noreply.github.com> Co-authored-by: Vladimir Vlasov --- changelog/7616.improvement.md | 2 +- docs/docs/migration-guide.mdx | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index cc31ff1dcc73..bce9f7e27d17 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -8,7 +8,7 @@ Parameter `model_confidence` affects how model's confidence for each label is co 2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. 3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. -Setting `model_confidence=cosine` should help users tune the fallback thresholds of their assistant better. The default value is `softmax` to preserve the old behaviour, but we recommend using `cosine` as that will be the new default value, Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. +Setting `model_confidence=cosine` should help users tune the fallback thresholds of their assistant better. The default value is `softmax` to preserve the old behaviour, but we recommend using `cosine` as that will be the new default value from Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as ```yaml diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 38d0d2fb6854..b6bc792bd169 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -14,20 +14,20 @@ how you can migrate from one version to another. ### Machine Learning Components -Few changes have been made to the loss function inside machine learning (ML) -components - `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. These include: +A few changes have been made to the loss function inside machine learning (ML) +components `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. These include: 1. Configuration option `loss_type=softmax` is now deprecated and will be removed in Rasa Open Source 3.0. Use `loss_type=cross_entropy` instead. -2. The default loss function (`loss_type=cross_entropy`) adds an optional sigmoid cross-entropy loss of all similarity values to constrain +2. The default loss function (`loss_type=cross_entropy`) can add an optional sigmoid cross-entropy loss of all similarity values to constrain them to an approximate range. You can turn on this option by setting `constrain_similarities=True`. This should help the models to perform better on real world test sets. -Also, a new option `model_confidence` has been added to each ML component. It affects how model's confidence for each label is computed during inference. It can take three values - +Also, a new option `model_confidence` has been added to each ML component. It affects how model's confidence for each label is computed during inference. It can take one of three values: 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. 2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. 3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. -The default value is `softmax`, but we recommend using `cosine` as that will be the new default value, Rasa Open Source 3.0 onwards. +The default value is `softmax`, but we recommend using `cosine` as that will be the new default value from Rasa Open Source 3.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. -With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as - +With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as: ``` - name: DIETClassifier model_confidence: cosine From 06a70eeb8b498d8d425a8cedbe252d327afcb523 Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 15:11:06 +0100 Subject: [PATCH 35/44] refactor loss, add docstrings --- changelog/7616.improvement.md | 2 +- rasa/core/policies/ted_policy.py | 13 ++-- rasa/nlu/classifiers/diet_classifier.py | 13 ++-- rasa/nlu/selectors/response_selector.py | 26 +++++-- rasa/utils/tensorflow/layers.py | 98 +++++++++++++++---------- 5 files changed, 96 insertions(+), 56 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index 63be53e61c5f..ed757edf664f 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -21,4 +21,4 @@ Once the assistant is re-trained with the above configuration, users should also Configuration option `loss_type=softmax` is now deprecated. Use `loss_type=cross_entropy` instead. -The default auto-configuration is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. \ No newline at end of file +The default [auto-configuration](https://rasa.com/docs/rasa/model-configuration#suggested-config) is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. \ No newline at end of file diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 8a86095ed2b1..ecf5672736fc 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -218,8 +218,9 @@ class TEDPolicy(Policy): SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top actions to normalize scores for. Applicable with loss type 'cross_entropy' - # and 'softmax' confidences. Set to 0 to turn off normalization. + # Number of top actions to normalize scores for. Applicable with + # loss type 'cross_entropy' and 'softmax' confidences. Set to 0 + # to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. @@ -281,10 +282,12 @@ class TEDPolicy(Policy): FEATURIZERS: [], # If set to true, entities are predicted in user utterances. ENTITY_RECOGNITION: True, - # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to - # ensure that similarity values are approximately bounded. Used inside softmax loss only. + # if 'True' applies sigmoid on all similarity terms and adds + # it to the loss function to ensure that similarity values are + # approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: False, - # Model confidence to be returned during inference. Possible values - 'softmax', 'cosine' and 'inner'. + # Model confidence to be returned during inference. Possible values - + # 'softmax', 'cosine' and 'inner'. MODEL_CONFIDENCE: SOFTMAX, # 'BILOU_flag' determines whether to use BILOU tagging or not. # If set to 'True' labelling is more rigorous, however more diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 673315ec35f0..5b85a5568c26 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -181,8 +181,9 @@ def required_components(cls) -> List[Type[Component]]: SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top intents to normalize scores for. Applicable with loss type 'cross_entropy' - # and 'softmax' confidences. Set to 0 to turn off normalization. + # Number of top intents to normalize scores for. Applicable with + # loss type 'cross_entropy' and 'softmax' confidences. Set to 0 + # to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. @@ -249,10 +250,12 @@ def required_components(cls) -> List[Type[Component]]: # Split entities by comma, this makes sense e.g. for a list of ingredients # in a recipie, but it doesn't make sense for the parts of an address SPLIT_ENTITIES_BY_COMMA: True, - # If 'True' applies sigmoid on all similarity terms and adds it to the loss function to - # ensure that similarity values are approximately bounded. Used inside softmax loss only. + # If 'True' applies sigmoid on all similarity terms and adds + # it to the loss function to ensure that similarity values are + # approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: False, - # Model confidence to be returned during inference. Possible values - 'softmax', 'cosine', 'inner'. + # Model confidence to be returned during inference. Possible values - + # 'softmax', 'cosine', 'inner'. MODEL_CONFIDENCE: SOFTMAX, } diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index b66426fef78b..f6aa535f6298 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -176,8 +176,9 @@ def required_components(cls) -> List[Type[Component]]: SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, - # Number of top actions to normalize scores for. Applicable with loss type 'cross_entropy' - # and 'softmax' confidences. Set to 0 to turn off normalization. + # Number of top actions to normalize scores for. Applicable with + # loss type 'cross_entropy' and 'softmax' confidences. Set to 0 + # to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. @@ -235,10 +236,12 @@ def required_components(cls) -> List[Type[Component]]: FEATURIZERS: [], # Perform model checkpointing CHECKPOINT_MODEL: False, - # if 'True' applies sigmoid on all similarity terms and adds it to the loss function to - # ensure that similarity values are approximately bounded. Used inside softmax loss only. + # if 'True' applies sigmoid on all similarity terms and adds it + # to the loss function to ensure that similarity values are + # approximately bounded. Used inside softmax loss only. CONSTRAIN_SIMILARITIES: False, - # Model confidence to be returned during inference. Possible values - 'softmax', 'cosine', 'inner'. + # Model confidence to be returned during inference. Possible values - + # 'softmax', 'cosine', 'inner'. MODEL_CONFIDENCE: SOFTMAX, } @@ -252,7 +255,18 @@ def __init__( responses: Optional[Dict[Text, List[Dict[Text, Any]]]] = None, finetune_mode: bool = False, ) -> None: - """Declare instance variables with default values.""" + """Declare instance variables with default values. + + Args: + component_config: Configuration for the component. + index_label_id_mapping: Mapping between label and index used for encoding. + entity_tag_specs: Format specification all entity tags. + model: Model architecture. + all_retrieval_intents: All retrieval intents defined in the data. + responses: All responses defined in the data. + finetune_mode: If `True` loads the model with pre-trained weights, + otherwise initializes it with random weights. + """ component_config = component_config or {} # the following properties cannot be adapted for the ResponseSelector diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 354e7430eadd..30281b851f57 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -582,6 +582,9 @@ def __init__( Used inside _loss_cross_entropy() only. model_confidence: Model confidence to be returned during inference. Possible values - softmax, cosine, inner. + + Raises: + RasaException: When `similarity_type` is not one of 'cosine' or 'inner'. """ super().__init__(name=name) self.num_neg = num_neg @@ -595,7 +598,7 @@ def __init__( self.constrain_similarities = constrain_similarities self.model_confidence = model_confidence self.similarity_type = similarity_type - if self.similarity_type and self.similarity_type not in {COSINE, INNER}: + if not self.similarity_type or self.similarity_type not in {COSINE, INNER}: raise RasaException( f"Wrong similarity type '{self.similarity_type}', " f"should be '{COSINE}' or '{INNER}'." @@ -850,49 +853,15 @@ def _loss_cross_entropy( mask: Optional[tf.Tensor], ) -> tf.Tensor: """Defines cross entropy loss.""" - # Similarity terms between input and label should be optimized relative - # to each other and hence use them as logits for softmax term - softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) - - if not self.constrain_similarities: - # Concatenate other similarity terms as well. Due to this, - # similarity values between input and label may not be - # approximately bounded in a defined range. - softmax_logits = tf.concat( - [softmax_logits, sim_neg_ii, sim_neg_ll], axis=-1 - ) - - # create label_ids for softmax - softmax_label_ids = tf.zeros_like(softmax_logits[..., 0], tf.int32) - - softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=softmax_label_ids, logits=softmax_logits + loss = self._compute_softmax_loss( + sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li ) - loss = softmax_loss - if self.constrain_similarities: - # Constrain similarity values in a range by applying sigmoid - # on them individually so that they saturate at extreme values. - sigmoid_logits = tf.concat( - [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 - ) - - sigmoid_labels = tf.concat( - [ - tf.ones_like(sigmoid_logits[..., :1]), - tf.zeros_like(sigmoid_logits[..., 1:]), - ], - axis=-1, + loss += self._compute_sigmoid_loss( + sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li ) - sigmoid_loss = tf.nn.sigmoid_cross_entropy_with_logits( - labels=sigmoid_labels, logits=sigmoid_logits - ) - - # average over logits axis - loss += tf.reduce_mean(sigmoid_loss, axis=-1) - if self.scale_loss: # in case of cross entropy log_likelihood = -loss loss *= _scale_loss(-loss) @@ -910,6 +879,57 @@ def _loss_cross_entropy( # average the loss over the batch return tf.reduce_mean(loss) + def _compute_sigmoid_loss( + self, + sim_pos: tf.Tensor, + sim_neg_il: tf.Tensor, + sim_neg_ll: tf.Tensor, + sim_neg_ii: tf.Tensor, + sim_neg_li: tf.Tensor, + ) -> tf.Tensor: + # Constrain similarity values in a range by applying sigmoid + # on them individually so that they saturate at extreme values. + sigmoid_logits = tf.concat( + [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 + ) + sigmoid_labels = tf.concat( + [ + tf.ones_like(sigmoid_logits[..., :1]), + tf.zeros_like(sigmoid_logits[..., 1:]), + ], + axis=-1, + ) + sigmoid_loss = tf.nn.sigmoid_cross_entropy_with_logits( + labels=sigmoid_labels, logits=sigmoid_logits + ) + # average over logits axis + return tf.reduce_mean(sigmoid_loss, axis=-1) + + def _compute_softmax_loss( + self, + sim_pos: tf.Tensor, + sim_neg_il: tf.Tensor, + sim_neg_ll: tf.Tensor, + sim_neg_ii: tf.Tensor, + sim_neg_li: tf.Tensor, + ) -> tf.Tensor: + # Similarity terms between input and label should be optimized relative + # to each other and hence use them as logits for softmax term + softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) + if not self.constrain_similarities: + # Concatenate other similarity terms as well. Due to this, + # similarity values between input and label may not be + # approximately bounded in a defined range. + softmax_logits = tf.concat( + [softmax_logits, sim_neg_ii, sim_neg_ll], axis=-1 + ) + # create label_ids for softmax + softmax_label_ids = tf.zeros_like(softmax_logits[..., 0], tf.int32) + softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=softmax_label_ids, logits=softmax_logits + ) + return softmax_loss + @property def _chosen_loss(self) -> Callable: """Use loss depending on given option.""" From 6f9cd90c2a5e9e71d661155917641d0b5907af14 Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 15:22:10 +0100 Subject: [PATCH 36/44] remove none for similarity_type --- rasa/utils/tensorflow/layers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 30281b851f57..49d46acb2152 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -547,9 +547,9 @@ def __init__( use_max_sim_neg: bool, neg_lambda: float, scale_loss: bool, + similarity_type: Text, name: Optional[Text] = None, same_sampling: bool = False, - similarity_type: Optional[Text] = None, constrain_similarities: bool = True, model_confidence: Text = SOFTMAX, ) -> None: @@ -572,10 +572,10 @@ def __init__( used only if 'loss_type' is set to 'margin'. scale_loss: Boolean, if 'True' scale loss inverse proportionally to the confidence of the correct prediction. + similarity_type: Similarity measure to use, either 'cosine' or 'inner'. name: Optional name of the layer. same_sampling: Boolean, if 'True' sample same negative labels for the whole batch. - similarity_type: Similarity measure to use, either 'cosine' or 'inner'. constrain_similarities: Boolean, if 'True' applies sigmoid on all similarity terms and adds to the loss function to ensure that similarity values are approximately bounded. @@ -598,7 +598,7 @@ def __init__( self.constrain_similarities = constrain_similarities self.model_confidence = model_confidence self.similarity_type = similarity_type - if not self.similarity_type or self.similarity_type not in {COSINE, INNER}: + if self.similarity_type not in {COSINE, INNER}: raise RasaException( f"Wrong similarity type '{self.similarity_type}', " f"should be '{COSINE}' or '{INNER}'." From 6697c0dd6ef1a8f703037211df676c78fe491309 Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 16:45:29 +0100 Subject: [PATCH 37/44] override defaults during load so that new parameters are filled in before model is initialized --- rasa/core/policies/ted_policy.py | 1 + rasa/nlu/classifiers/diet_classifier.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 62e6a50e3d11..251305f93adf 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -809,6 +809,7 @@ def load( model_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data ) + meta = rasa.utils.train_utils.override_defaults(cls.defaults, meta) meta = rasa.utils.train_utils.update_confidence_type(meta) meta = rasa.utils.train_utils.update_similarity_type(meta) meta = rasa.utils.train_utils.update_loss_type(meta) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index f11096b85363..a40de9a50a07 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -332,6 +332,11 @@ def __init__( super().__init__(component_config) + print( + self.component_config[CONSTRAIN_SIMILARITIES], + self.component_config[MODEL_CONFIDENCE], + ) + self._check_config_parameters() # transform numbers to labels @@ -1021,6 +1026,7 @@ def load( data_example, ) = cls._load_from_files(meta, model_dir) + meta = train_utils.override_defaults(cls.defaults, meta) meta = train_utils.update_confidence_type(meta) meta = train_utils.update_similarity_type(meta) meta = train_utils.update_loss_type(meta) From 13d8aa85f06ef2a6f807845eb9ace2eff30eef1c Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 17:23:35 +0100 Subject: [PATCH 38/44] change call to deprecated function check --- changelog/7616.improvement.md | 2 +- rasa/core/policies/ted_policy.py | 4 ++-- rasa/nlu/classifiers/diet_classifier.py | 6 ++++-- rasa/utils/tensorflow/layers.py | 4 ++-- rasa/utils/train_utils.py | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md index d687089711f5..6eb78ea7c073 100644 --- a/changelog/7616.improvement.md +++ b/changelog/7616.improvement.md @@ -19,6 +19,6 @@ With both the above recommendations, users should configure their ML component, ``` Once the assistant is re-trained with the above configuration, users should also tune fallback confidence thresholds. -Configuration option `loss_type=softmax` is now deprecated and will be in Rasa Open Source 3.0.0 . Use `loss_type=cross_entropy` instead. +Configuration option `loss_type=softmax` is now deprecated and will be removed in Rasa Open Source 3.0.0 . Use `loss_type=cross_entropy` instead. The default [auto-configuration](model-configuration.mdx#suggested-config) is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 251305f93adf..8eaa404ebd5d 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -352,7 +352,7 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: rasa.utils.train_utils.validate_configuration_settings(self.config) - self.config = rasa.utils.train_utils.update_loss_type(self.config) + self.config = rasa.utils.train_utils.update_deprecated_loss_type(self.config) self.config = rasa.utils.train_utils.update_similarity_type(self.config) self.config = rasa.utils.train_utils.update_evaluation_parameters(self.config) @@ -812,7 +812,7 @@ def load( meta = rasa.utils.train_utils.override_defaults(cls.defaults, meta) meta = rasa.utils.train_utils.update_confidence_type(meta) meta = rasa.utils.train_utils.update_similarity_type(meta) - meta = rasa.utils.train_utils.update_loss_type(meta) + meta = rasa.utils.train_utils.update_deprecated_loss_type(meta) meta[EPOCHS] = epoch_override diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index a40de9a50a07..ec7cf506a435 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -301,7 +301,9 @@ def _check_config_parameters(self) -> None: train_utils.validate_configuration_settings(self.component_config) - self.component_config = train_utils.update_loss_type(self.component_config) + self.component_config = train_utils.update_deprecated_loss_type( + self.component_config + ) self.component_config = train_utils.update_similarity_type( self.component_config @@ -1029,7 +1031,7 @@ def load( meta = train_utils.override_defaults(cls.defaults, meta) meta = train_utils.update_confidence_type(meta) meta = train_utils.update_similarity_type(meta) - meta = train_utils.update_loss_type(meta) + meta = train_utils.update_deprecated_loss_type(meta) model = cls._load_model( entity_tag_specs, diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 49d46acb2152..44823059671e 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -581,7 +581,7 @@ def __init__( ensure that similarity values are approximately bounded. Used inside _loss_cross_entropy() only. model_confidence: Model confidence to be returned during inference. - Possible values - softmax, cosine, inner. + Possible values - 'softmax', 'cosine' and 'inner'. Raises: RasaException: When `similarity_type` is not one of 'cosine' or 'inner'. @@ -879,8 +879,8 @@ def _loss_cross_entropy( # average the loss over the batch return tf.reduce_mean(loss) + @staticmethod def _compute_sigmoid_loss( - self, sim_pos: tf.Tensor, sim_neg_il: tf.Tensor, sim_neg_ll: tf.Tensor, diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 84f0fe09e7ff..e0a3d3d09532 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -75,7 +75,7 @@ def update_similarity_type(config: Dict[Text, Any]) -> Dict[Text, Any]: return config -def update_loss_type(config: Dict[Text, Any]) -> Dict[Text, Any]: +def update_deprecated_loss_type(config: Dict[Text, Any]) -> Dict[Text, Any]: """If LOSS_TYPE is set to 'softmax', update it to 'cross_entropy' since former is deprecated. Args: From 9ea25dd93409f2e42b7d72afbf42a87a0f70af0d Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 17:56:56 +0100 Subject: [PATCH 39/44] more comments --- rasa/utils/train_utils.py | 10 +++++----- tests/nlu/selectors/test_selectors.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index e0a3d3d09532..ecf0910729ea 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -406,8 +406,8 @@ def _check_confidence_setting(component_config: Dict[Text, Any]) -> None: rasa.shared.utils.io.raise_warning( f"{MODEL_CONFIDENCE} is set to `softmax`. It is recommended " f"to set it to `cosine`. It will be set to `cosine` by default, " - f"Rasa Open Source 3.0 onwards.", - category=FutureWarning, + f"Rasa Open Source 3.0.0 onwards.", + category=UserWarning, ) if component_config[LOSS_TYPE] not in [SOFTMAX, CROSS_ENTROPY]: raise InvalidConfigException( @@ -433,8 +433,8 @@ def _check_loss_setting(component_config: Dict[Text, Any]) -> None: rasa.shared.utils.io.raise_warning( f"{CONSTRAIN_SIMILARITIES} is set to `False`. It is recommended " f"to set it to `True` when using cross-entropy loss. It will be set to `True` by default, " - f"Rasa Open Source 3.0 onwards.", - category=FutureWarning, + f"Rasa Open Source 3.0.0 onwards.", + category=UserWarning, ) @@ -452,7 +452,7 @@ def _check_similarity_loss_setting(component_config: Dict[Text, Any]) -> None: f"Ideally use `{SIMILARITY_TYPE}={INNER}`" f" and `{LOSS_TYPE}={CROSS_ENTROPY}` or" f"`{SIMILARITY_TYPE}={COSINE}` and `{LOSS_TYPE}={MARGIN}`.", - category=FutureWarning, + category=UserWarning, ) diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index 02dd94394cea..610ef3304efb 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -384,7 +384,7 @@ async def test_cross_entropy_without_normalization( async def test_margin_loss_is_not_normalized( monkeypatch: MonkeyPatch, component_builder: ComponentBuilder, - tmpdir: Path, + tmp_path: Path, classifier_params: Dict[Text, int], ): pipeline = as_pipeline( @@ -399,7 +399,7 @@ async def test_margin_loss_is_not_normalized( _config = RasaNLUModelConfig({"pipeline": pipeline}) (trained_model, _, persisted_path) = await train( _config, - path=str(tmpdir), + path=str(tmp_path), data="data/test_selectors", component_builder=component_builder, ) From ffcfdd154354c5b101725892ad0ef6066426ffbc Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 19:01:15 +0100 Subject: [PATCH 40/44] add tests for config checks --- rasa/utils/train_utils.py | 1 + tests/utils/test_train_utils.py | 59 ++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index ecf0910729ea..2c643506356b 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -430,6 +430,7 @@ def _check_loss_setting(component_config: Dict[Text, Any]) -> None: SOFTMAX, CROSS_ENTROPY, ]: + print("raising") rasa.shared.utils.io.raise_warning( f"{CONSTRAIN_SIMILARITIES} is set to `False`. It is recommended " f"to set it to `True` when using cross-entropy loss. It will be set to `True` by default, " diff --git a/tests/utils/test_train_utils.py b/tests/utils/test_train_utils.py index 74dccd2ad5df..ccddfd15be7b 100644 --- a/tests/utils/test_train_utils.py +++ b/tests/utils/test_train_utils.py @@ -2,7 +2,9 @@ import numpy as np import pytest -from typing import List +from typing import Text +from _pytest.logging import LogCaptureFixture +import logging import rasa.utils.train_utils as train_utils from rasa.nlu.constants import NUMBER_OF_SUB_TOKENS @@ -11,6 +13,18 @@ SPLIT_ENTITIES_BY_COMMA_DEFAULT_VALUE, SPLIT_ENTITIES_BY_COMMA, ) +from rasa.utils.tensorflow.constants import ( + MODEL_CONFIDENCE, + SIMILARITY_TYPE, + LOSS_TYPE, + COSINE, + SOFTMAX, + INNER, + CROSS_ENTROPY, + MARGIN, + CONSTRAIN_SIMILARITIES, +) +from rasa.shared.exceptions import RasaException, InvalidConfigException def test_align_token_features(): @@ -74,3 +88,46 @@ def test_init_split_entities_config( ) == expected_initialized_config ) + + +@pytest.mark.parametrize( + "component_config, raises_exception", + [ + ({MODEL_CONFIDENCE: SOFTMAX, LOSS_TYPE: MARGIN}, True), + ({MODEL_CONFIDENCE: SOFTMAX, LOSS_TYPE: SOFTMAX}, False), + ({MODEL_CONFIDENCE: SOFTMAX, LOSS_TYPE: CROSS_ENTROPY}, False), + ({MODEL_CONFIDENCE: COSINE, LOSS_TYPE: MARGIN}, False), + ({MODEL_CONFIDENCE: COSINE, LOSS_TYPE: SOFTMAX}, False), + ({MODEL_CONFIDENCE: COSINE, LOSS_TYPE: CROSS_ENTROPY}, False), + ({MODEL_CONFIDENCE: INNER, LOSS_TYPE: MARGIN}, False), + ({MODEL_CONFIDENCE: INNER, LOSS_TYPE: SOFTMAX}, False), + ({MODEL_CONFIDENCE: INNER, LOSS_TYPE: CROSS_ENTROPY}, False), + ], +) +def test_confidence_loss_settings( + component_config: Dict[Text, Any], raises_exception: bool +): + component_config[SIMILARITY_TYPE] = INNER + if raises_exception: + with pytest.raises(InvalidConfigException): + train_utils._check_confidence_setting(component_config) + + +@pytest.mark.parametrize( + "component_config, raises_exception", + [ + ({MODEL_CONFIDENCE: SOFTMAX, SIMILARITY_TYPE: INNER}, False), + ({MODEL_CONFIDENCE: SOFTMAX, SIMILARITY_TYPE: COSINE}, True), + ({MODEL_CONFIDENCE: COSINE, SIMILARITY_TYPE: INNER}, False), + ({MODEL_CONFIDENCE: COSINE, SIMILARITY_TYPE: COSINE}, False), + ({MODEL_CONFIDENCE: INNER, SIMILARITY_TYPE: INNER}, False), + ({MODEL_CONFIDENCE: INNER, SIMILARITY_TYPE: COSINE}, False), + ], +) +def test_confidence_similarity_settings( + component_config: Dict[Text, Any], raises_exception: bool +): + component_config[LOSS_TYPE] = SOFTMAX + if raises_exception: + with pytest.raises(InvalidConfigException): + train_utils._check_confidence_setting(component_config) From 25abb8d780d1c96dc89c9f0d2735dd86a9eb2184 Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 19:04:23 +0100 Subject: [PATCH 41/44] remove prints --- rasa/nlu/classifiers/diet_classifier.py | 5 ----- rasa/utils/train_utils.py | 1 - tests/utils/test_train_utils.py | 5 +---- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index ec7cf506a435..3292f9361e09 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -334,11 +334,6 @@ def __init__( super().__init__(component_config) - print( - self.component_config[CONSTRAIN_SIMILARITIES], - self.component_config[MODEL_CONFIDENCE], - ) - self._check_config_parameters() # transform numbers to labels diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index 2c643506356b..ecf0910729ea 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -430,7 +430,6 @@ def _check_loss_setting(component_config: Dict[Text, Any]) -> None: SOFTMAX, CROSS_ENTROPY, ]: - print("raising") rasa.shared.utils.io.raise_warning( f"{CONSTRAIN_SIMILARITIES} is set to `False`. It is recommended " f"to set it to `True` when using cross-entropy loss. It will be set to `True` by default, " diff --git a/tests/utils/test_train_utils.py b/tests/utils/test_train_utils.py index ccddfd15be7b..33952b15a393 100644 --- a/tests/utils/test_train_utils.py +++ b/tests/utils/test_train_utils.py @@ -3,8 +3,6 @@ import numpy as np import pytest from typing import Text -from _pytest.logging import LogCaptureFixture -import logging import rasa.utils.train_utils as train_utils from rasa.nlu.constants import NUMBER_OF_SUB_TOKENS @@ -22,9 +20,8 @@ INNER, CROSS_ENTROPY, MARGIN, - CONSTRAIN_SIMILARITIES, ) -from rasa.shared.exceptions import RasaException, InvalidConfigException +from rasa.shared.exceptions import InvalidConfigException def test_align_token_features(): From c2e74e108d208251d87429fba4d3c5dc776a5428 Mon Sep 17 00:00:00 2001 From: Daksh Varshneya Date: Mon, 8 Feb 2021 19:25:34 +0100 Subject: [PATCH 42/44] Update docs/docs/migration-guide.mdx --- docs/docs/migration-guide.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index c66866edd102..342ba47f1c4e 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -22,7 +22,7 @@ them to an approximate range. You can turn on this option by setting `constrain_ Also, a new option `model_confidence` has been added to each ML component. It affects how model's confidence for each label is computed during inference. It can take one of three values: 1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. -2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. +2. `cosine` - Cosine similarity between input and label embeddings. Confidence for each label will be in the range `[-1,1]`. 3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. The default value is `softmax`, but we recommend using `cosine` as that will be the new default value from Rasa Open Source 3.0.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. From c2b9b9371e5875eaf6e10038708d1547f1a6c98c Mon Sep 17 00:00:00 2001 From: Daksh Date: Mon, 8 Feb 2021 19:26:14 +0100 Subject: [PATCH 43/44] fix test --- tests/utils/test_train_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/utils/test_train_utils.py b/tests/utils/test_train_utils.py index 33952b15a393..9b9d15f2cc93 100644 --- a/tests/utils/test_train_utils.py +++ b/tests/utils/test_train_utils.py @@ -108,6 +108,8 @@ def test_confidence_loss_settings( if raises_exception: with pytest.raises(InvalidConfigException): train_utils._check_confidence_setting(component_config) + else: + train_utils._check_confidence_setting(component_config) @pytest.mark.parametrize( @@ -128,3 +130,5 @@ def test_confidence_similarity_settings( if raises_exception: with pytest.raises(InvalidConfigException): train_utils._check_confidence_setting(component_config) + else: + train_utils._check_confidence_setting(component_config) From c69fdbb496d4623ac4b72694b176d2836c1c8269 Mon Sep 17 00:00:00 2001 From: Daksh Varshneya Date: Tue, 9 Feb 2021 09:42:38 +0100 Subject: [PATCH 44/44] Update rasa/utils/tensorflow/layers.py --- rasa/utils/tensorflow/layers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 44823059671e..bd3945eced1b 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -933,7 +933,6 @@ def _compute_softmax_loss( @property def _chosen_loss(self) -> Callable: """Use loss depending on given option.""" - if self.loss_type == MARGIN: return self._loss_margin elif self.loss_type == CROSS_ENTROPY: