diff --git a/changelog/7616.improvement.md b/changelog/7616.improvement.md new file mode 100644 index 000000000000..6eb78ea7c073 --- /dev/null +++ b/changelog/7616.improvement.md @@ -0,0 +1,24 @@ +Added two new parameters `constrain_similarities` and `model_confidence` to machine learning (ML) components - [DIETClassifier](components.mdx#dietclassifier), [ResponseSelector](components.mdx#dietclassifier) and [TEDPolicy](policies.mdx#ted-policy). + +Setting `constrain_similarities=True` adds a sigmoid cross-entropy loss on all similarity values to restrict them to an approximate range in `DotProductLoss`. This should help the models to perform better on real world test sets. +By default, the parameter is set to `False` to preserve the old behaviour, but users are encouraged to set it to `True` and re-train their assistants as it will be set to `True` by default from Rasa Open Source 3.0.0 onwards. + +Parameter `model_confidence` affects how model's confidence for each label is computed during inference. It can take three values: +1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. +2. `cosine` - Cosine similarity between input label embeddings. Confidence for each label will be in the range `[-1,1]`. +3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. + +Setting `model_confidence=cosine` should help users tune the fallback thresholds of their assistant better. The default value is `softmax` to preserve the old behaviour, but we recommend using `cosine` as that will be the new default value from Rasa Open Source 3.0.0 onwards. The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. + +With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as +```yaml +- name: DIETClassifier + model_confidence: cosine + constrain_similarities: True + ... +``` +Once the assistant is re-trained with the above configuration, users should also tune fallback confidence thresholds. + +Configuration option `loss_type=softmax` is now deprecated and will be removed in Rasa Open Source 3.0.0 . Use `loss_type=cross_entropy` instead. + +The default [auto-configuration](model-configuration.mdx#suggested-config) is changed to use `constrain_similarities=True` and `model_confidence=cosine` in ML components so that new users start with the recommended configuration. diff --git a/data/test_config/config_empty_en_after_dumping.yml b/data/test_config/config_empty_en_after_dumping.yml index 20507a3944af..79c21d70c4a7 100644 --- a/data/test_config/config_empty_en_after_dumping.yml +++ b/data/test_config/config_empty_en_after_dumping.yml @@ -13,9 +13,13 @@ pipeline: # max_ngram: 4 # - name: DIETClassifier # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: EntitySynonymMapper # - name: ResponseSelector # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: FallbackClassifier # threshold: 0.3 # ambiguity_threshold: 0.1 @@ -27,4 +31,6 @@ policies: # - name: TEDPolicy # max_history: 5 # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: RulePolicy diff --git a/data/test_config/config_empty_en_after_dumping_core.yml b/data/test_config/config_empty_en_after_dumping_core.yml index 1488270ddf39..adb3c2a0af55 100644 --- a/data/test_config/config_empty_en_after_dumping_core.yml +++ b/data/test_config/config_empty_en_after_dumping_core.yml @@ -8,4 +8,6 @@ policies: # - name: TEDPolicy # max_history: 5 # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: RulePolicy diff --git a/data/test_config/config_empty_en_after_dumping_nlu.yml b/data/test_config/config_empty_en_after_dumping_nlu.yml index a4cb5077bf58..8249b17a0e11 100644 --- a/data/test_config/config_empty_en_after_dumping_nlu.yml +++ b/data/test_config/config_empty_en_after_dumping_nlu.yml @@ -13,9 +13,13 @@ pipeline: # max_ngram: 4 # - name: DIETClassifier # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: EntitySynonymMapper # - name: ResponseSelector # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: FallbackClassifier # threshold: 0.3 # ambiguity_threshold: 0.1 diff --git a/data/test_config/config_empty_fr_after_dumping.yml b/data/test_config/config_empty_fr_after_dumping.yml index 8148c3ebee68..a2ea89f4bf0a 100644 --- a/data/test_config/config_empty_fr_after_dumping.yml +++ b/data/test_config/config_empty_fr_after_dumping.yml @@ -13,9 +13,13 @@ pipeline: # max_ngram: 4 # - name: DIETClassifier # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: EntitySynonymMapper # - name: ResponseSelector # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: FallbackClassifier # threshold: 0.3 # ambiguity_threshold: 0.1 @@ -27,4 +31,6 @@ policies: # - name: TEDPolicy # max_history: 5 # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: RulePolicy diff --git a/data/test_config/config_with_comments_after_dumping.yml b/data/test_config/config_with_comments_after_dumping.yml index 16b6129d18f9..ef0743f894de 100644 --- a/data/test_config/config_with_comments_after_dumping.yml +++ b/data/test_config/config_with_comments_after_dumping.yml @@ -27,6 +27,8 @@ policies: # even here # - name: TEDPolicy # max_history: 5 # epochs: 100 +# constrain_similarities: true +# model_confidence: cosine # - name: RulePolicy # comments everywhere diff --git a/docs/docs/components.mdx b/docs/docs/components.mdx index 490e108ff358..ef5ac83db5a1 100644 --- a/docs/docs/components.mdx +++ b/docs/docs/components.mdx @@ -1531,10 +1531,12 @@ However, additional parameters exist that can be adapted. | similarity_type | "auto" | Type of similarity measure to use, either 'auto' or 'cosine' | | | | or 'inner'. | +---------------------------------+------------------+--------------------------------------------------------------+ -| loss_type | "softmax" | The type of the loss function, either 'softmax' or 'margin'. | +| loss_type | "cross_entropy" | The type of the loss function, either 'cross_entropy' | +| | | or 'margin'. | +---------------------------------+------------------+--------------------------------------------------------------+ -| ranking_length | 10 | Number of top actions to normalize scores for loss type | -| | | 'softmax'. Set to 0 to turn off normalization. | +| ranking_length | 10 | Number of top intents to normalize scores for. Applicable | +| | | only with loss type 'cross_entropy' and 'softmax' | +| | | confidences. Set to 0 to disable normalization. | +---------------------------------+------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | @@ -1616,6 +1618,24 @@ However, additional parameters exist that can be adapted. | | | ... | | | | ``` | +---------------------------------+------------------+--------------------------------------------------------------+ +| constrain_similarities | False | If `True`, applies sigmoid on all similarity terms and adds | +| | | it to the loss function to ensure that similarity values are | +| | | approximately bounded. Used only if `loss_type=cross_entropy`| ++---------------------------------+------------------+--------------------------------------------------------------+ +| model_confidence | "softmax" | Affects how model's confidence for each intent | +| | | is computed. It can take three values | +| | | 1. `softmax` - Similarities between input and intent | +| | | embeddings are post-processed with a softmax function, | +| | | as a result of which confidence for all intents sum up to 1. | +| | | 2. `cosine` - Cosine similarity between input and intent | +| | | embeddings. Confidence for each intent is in the | +| | | range `[-1,1]`. | +| | | 3. `inner` - Dot product similarity between input and intent | +| | | embeddings. Confidence for each intent is in an unbounded | +| | | range. | +| | | This parameter does not affect the confidence for entity | +| | | prediction. | ++---------------------------------+------------------+--------------------------------------------------------------+ ``` :::note @@ -2742,10 +2762,12 @@ However, additional parameters exist that can be adapted. | similarity_type | "auto" | Type of similarity measure to use, either 'auto' or 'cosine' | | | | or 'inner'. | +---------------------------------+-------------------+--------------------------------------------------------------+ -| loss_type | "softmax" | The type of the loss function, either 'softmax' or 'margin'. | +| loss_type | "cross_entropy" | The type of the loss function, either 'cross_entropy' | +| | | or 'margin'. | +---------------------------------+-------------------+--------------------------------------------------------------+ -| ranking_length | 10 | Number of top actions to normalize scores for loss type | -| | | 'softmax'. Set to 0 to turn off normalization. | +| ranking_length | 10 | Number of top responses to normalize scores for. Applicable | +| | | only with loss type 'cross_entropy' and 'softmax' | +| | | confidences. Set to 0 to disable normalization. | +---------------------------------+-------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | @@ -2814,6 +2836,22 @@ However, additional parameters exist that can be adapted. | | | Requires `evaluate_on_number_of_examples > 0` and | | | | `evaluate_every_number_of_epochs > 0` | +---------------------------------+-------------------+--------------------------------------------------------------+ +| constrain_similarities | False | If `True`, applies sigmoid on all similarity terms and adds | +| | | it to the loss function to ensure that similarity values are | +| | | approximately bounded. Used only if `loss_type=cross_entropy`| ++---------------------------------+-------------------+--------------------------------------------------------------+ +| model_confidence | "softmax" | Affects how model's confidence for each response label | +| | | is computed. It can take three values | +| | | 1. `softmax` - Similarities between input and response label | +| | | embeddings are post-processed with a softmax function, | +| | | as a result of which confidence for all labels sum up to 1. | +| | | 2. `cosine` - Cosine similarity between input and response | +| | | label embeddings. Confidence for each label is in the | +| | | range `[-1,1]`. | +| | | 3. `inner` - Dot product similarity between input and | +| | | response label embeddings. Confidence for each label is in an| +| | | unbounded range. | ++---------------------------------+-------------------+--------------------------------------------------------------+ ``` :::note diff --git a/docs/docs/migration-guide.mdx b/docs/docs/migration-guide.mdx index 203b3d7bcc3b..5dd3c9e7dbf0 100644 --- a/docs/docs/migration-guide.mdx +++ b/docs/docs/migration-guide.mdx @@ -10,6 +10,33 @@ description: | This page contains information about changes between major versions and how you can migrate from one version to another. +## Rasa 2.2 to Rasa 2.3 + +### Machine Learning Components + +A few changes have been made to the loss function inside machine learning (ML) +components `DIETClassifier`, `ResponseSelector` and `TEDPolicy`. These include: +1. Configuration option `loss_type=softmax` is now deprecated and will be removed in Rasa Open Source 3.0.0. Use `loss_type=cross_entropy` instead. +2. The default loss function (`loss_type=cross_entropy`) can add an optional sigmoid cross-entropy loss of all similarity values to constrain +them to an approximate range. You can turn on this option by setting `constrain_similarities=True`. This should help the models to perform better on real world test sets. + +Also, a new option `model_confidence` has been added to each ML component. It affects how a model's confidence for each label is computed during inference. It can take one of three values: +1. `softmax` - Similarities between input and label embeddings are post-processed with a softmax function, as a result of which confidence for all labels sum up to 1. +2. `cosine` - Cosine similarity between input and label embeddings. Confidence for each label will be in the range `[-1,1]`. +3. `inner` - Dot product similarity between input and label embeddings. Confidence for each label will be in an unbounded range. +The default value is `softmax`, but we recommend using `cosine` as that will be the new default value from Rasa Open Source 3.0.0 onwards. +The value of this option does not affect how confidences are computed for entity predictions in `DIETClassifier` and `TEDPolicy`. + +With both the above recommendations, users should configure their ML component, e.g. `DIETClassifier`, as: +``` +- name: DIETClassifier + model_confidence: cosine + constrain_similarities: True + ... +``` +Once the assistant is re-trained with the above configuration, users should also tune fallback confidence thresholds. + + ## Rasa 2.1 to Rasa 2.2 ### General diff --git a/docs/docs/policies.mdx b/docs/docs/policies.mdx index 9292c5fb05fb..bc2d4c1c4e85 100644 --- a/docs/docs/policies.mdx +++ b/docs/docs/policies.mdx @@ -268,10 +268,12 @@ However, additional parameters exist that can be adapted. | similarity_type | "auto" | Type of similarity measure to use, either 'auto' or 'cosine' | | | | or 'inner'. | +---------------------------------------+------------------------+--------------------------------------------------------------+ -| loss_type | "softmax" | The type of the loss function, either 'softmax' or 'margin'. | +| loss_type | "cross_entropy" | The type of the loss function, either 'cross_entropy' | +| | | or 'margin'. | +---------------------------------------+------------------------+--------------------------------------------------------------+ -| ranking_length | 10 | Number of top actions to normalize scores for loss type | -| | | 'softmax'. Set to 0 to turn off normalization. | +| ranking_length | 10 | Number of top actions to normalize scores for. Applicable | +| | | only with loss type 'cross_entropy' and 'softmax' | +| | | confidences. Set to 0 to disable normalization. | +---------------------------------------+------------------------+--------------------------------------------------------------+ | maximum_positive_similarity | 0.8 | Indicates how similar the algorithm should try to make | | | | embedding vectors for correct labels. | @@ -344,6 +346,22 @@ However, additional parameters exist that can be adapted. | entity_recognition | True | If 'True' entity recognition is trained and entities are | | | | extracted. | +---------------------------------------+------------------------+--------------------------------------------------------------+ +| constrain_similarities | False | If `True`, applies sigmoid on all similarity terms and adds | +| | | it to the loss function to ensure that similarity values are | +| | | approximately bounded. Used only when `loss_type=softmax`. | ++---------------------------------------+------------------------+--------------------------------------------------------------+ +| model_confidence | "softmax" | Affects how model's confidence for each action | +| | | is computed. It can take three values | +| | | 1. `softmax` - Similarities between input and action | +| | | embeddings are post-processed with a softmax function, | +| | | as a result of which confidence for all labels sum up to 1. | +| | | 2. `cosine` - Cosine similarity between input and action | +| | | embeddings. Confidence for each label is in the | +| | | range `[-1,1]`. | +| | | 3. `inner` - Dot product similarity between input and action | +| | | embeddings. Confidence for each label is in an | +| | | unbounded range. | ++---------------------------------------+------------------------+--------------------------------------------------------------+ | BILOU_flag | True | If 'True', additional BILOU tags are added to entity labels. | +---------------------------------------+------------------------+--------------------------------------------------------------+ | split_entities_by_comma | True | Splits a list of extracted entities by comma to treat each | diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 09b3a7a7cdb4..8eaa404ebd5d 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -82,7 +82,7 @@ KEY_RELATIVE_ATTENTION, VALUE_RELATIVE_ATTENTION, MAX_RELATIVE_POSITION, - SOFTMAX, + CROSS_ENTROPY, AUTO, BALANCED, TENSORBOARD_LOG_DIR, @@ -102,6 +102,9 @@ HIDDEN_LAYERS_SIZES, FEATURIZERS, ENTITY_RECOGNITION, + CONSTRAIN_SIMILARITIES, + MODEL_CONFIDENCE, + SOFTMAX, BILOU_FLAG, ) from rasa.shared.core.events import EntitiesAdded, Event @@ -212,10 +215,11 @@ class TEDPolicy(Policy): NUM_NEG: 20, # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'. SIMILARITY_TYPE: AUTO, - # The type of the loss function, either 'softmax' or 'margin'. - LOSS_TYPE: SOFTMAX, - # Number of top actions to normalize scores for loss type 'softmax'. - # Set to 0 to turn off normalization. + # The type of the loss function, either 'cross_entropy' or 'margin'. + LOSS_TYPE: CROSS_ENTROPY, + # Number of top actions to normalize scores for. Applicable with + # loss type 'cross_entropy' and 'softmax' confidences. Set to 0 + # to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. @@ -277,6 +281,13 @@ class TEDPolicy(Policy): FEATURIZERS: [], # If set to true, entities are predicted in user utterances. ENTITY_RECOGNITION: True, + # if 'True' applies sigmoid on all similarity terms and adds + # it to the loss function to ensure that similarity values are + # approximately bounded. Used inside softmax loss only. + CONSTRAIN_SIMILARITIES: False, + # Model confidence to be returned during inference. Possible values - + # 'softmax', 'cosine' and 'inner'. + MODEL_CONFIDENCE: SOFTMAX, # 'BILOU_flag' determines whether to use BILOU tagging or not. # If set to 'True' labelling is more rigorous, however more # examples per entity are required. @@ -336,6 +347,12 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self.config = rasa.utils.train_utils.override_defaults( self.defaults, new_config ) + + self.config = rasa.utils.train_utils.update_confidence_type(self.config) + + rasa.utils.train_utils.validate_configuration_settings(self.config) + + self.config = rasa.utils.train_utils.update_deprecated_loss_type(self.config) self.config = rasa.utils.train_utils.update_similarity_type(self.config) self.config = rasa.utils.train_utils.update_evaluation_parameters(self.config) @@ -606,7 +623,9 @@ def predict_action_probabilities( # take correct prediction from batch confidence, is_e2e_prediction = self._pick_confidence(confidences, similarities) - if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0: + if self.config[RANKING_LENGTH] > 0 and self.config[MODEL_CONFIDENCE] == SOFTMAX: + # TODO: This should be removed in 3.0 when softmax as + # model confidence and normalization is completely deprecated. confidence = rasa.utils.train_utils.normalize( confidence, self.config[RANKING_LENGTH] ) @@ -790,7 +809,10 @@ def load( model_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data ) + meta = rasa.utils.train_utils.override_defaults(cls.defaults, meta) + meta = rasa.utils.train_utils.update_confidence_type(meta) meta = rasa.utils.train_utils.update_similarity_type(meta) + meta = rasa.utils.train_utils.update_deprecated_loss_type(meta) meta[EPOCHS] = epoch_override @@ -1710,15 +1732,14 @@ def batch_predict( ) = self._embed_dialogue(dialogue_in, tf_batch_data) dialogue_mask = tf.squeeze(dialogue_mask, axis=-1) - sim_all = self._tf_layers[f"loss.{LABEL}"].sim( + sim_all, scores = self._tf_layers[ + f"loss.{LABEL}" + ]._similarity_confidence_from_embeddings( dialogue_embed[:, :, tf.newaxis, :], self.all_labels_embed[tf.newaxis, tf.newaxis, :, :], dialogue_mask, ) - scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim( - sim_all, self.config[SIMILARITY_TYPE] - ) predictions = { "action_scores": scores, "similarities": sim_all, diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index c50204fd1912..3292f9361e09 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -85,9 +85,9 @@ KEY_RELATIVE_ATTENTION, VALUE_RELATIVE_ATTENTION, MAX_RELATIVE_POSITION, - SOFTMAX, AUTO, BALANCED, + CROSS_ENTROPY, TENSORBOARD_LOG_LEVEL, CONCAT_DIMENSION, FEATURIZERS, @@ -97,6 +97,9 @@ SEQUENCE_LENGTH, DENSE_DIMENSION, MASK, + CONSTRAIN_SIMILARITIES, + MODEL_CONFIDENCE, + SOFTMAX, ) logger = logging.getLogger(__name__) @@ -175,10 +178,11 @@ def required_components(cls) -> List[Type[Component]]: NUM_NEG: 20, # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'. SIMILARITY_TYPE: AUTO, - # The type of the loss function, either 'softmax' or 'margin'. - LOSS_TYPE: SOFTMAX, - # Number of top actions to normalize scores for loss type 'softmax'. - # Set to 0 to turn off normalization. + # The type of the loss function, either 'cross_entropy' or 'margin'. + LOSS_TYPE: CROSS_ENTROPY, + # Number of top intents to normalize scores for. Applicable with + # loss type 'cross_entropy' and 'softmax' confidences. Set to 0 + # to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. @@ -245,6 +249,13 @@ def required_components(cls) -> List[Type[Component]]: # Split entities by comma, this makes sense e.g. for a list of ingredients # in a recipie, but it doesn't make sense for the parts of an address SPLIT_ENTITIES_BY_COMMA: True, + # If 'True' applies sigmoid on all similarity terms and adds + # it to the loss function to ensure that similarity values are + # approximately bounded. Used inside softmax loss only. + CONSTRAIN_SIMILARITIES: False, + # Model confidence to be returned during inference. Possible values - + # 'softmax', 'cosine', 'inner'. + MODEL_CONFIDENCE: SOFTMAX, } # init helpers @@ -284,6 +295,16 @@ def _check_config_parameters(self) -> None: self._check_masked_lm() self._check_share_hidden_layers_sizes() + self.component_config = train_utils.update_confidence_type( + self.component_config + ) + + train_utils.validate_configuration_settings(self.component_config) + + self.component_config = train_utils.update_deprecated_loss_type( + self.component_config + ) + self.component_config = train_utils.update_similarity_type( self.component_config ) @@ -850,9 +871,11 @@ def _predict_label( label_ids = message_sim.argsort()[::-1] if ( - self.component_config[LOSS_TYPE] == SOFTMAX - and self.component_config[RANKING_LENGTH] > 0 + self.component_config[RANKING_LENGTH] > 0 + and self.component_config[MODEL_CONFIDENCE] == SOFTMAX ): + # TODO: This should be removed in 3.0 when softmax as + # model confidence and normalization is completely deprecated. message_sim = train_utils.normalize( message_sim, self.component_config[RANKING_LENGTH] ) @@ -1000,7 +1023,10 @@ def load( data_example, ) = cls._load_from_files(meta, model_dir) + meta = train_utils.override_defaults(cls.defaults, meta) + meta = train_utils.update_confidence_type(meta) meta = train_utils.update_similarity_type(meta) + meta = train_utils.update_deprecated_loss_type(meta) model = cls._load_model( entity_tag_specs, @@ -1651,12 +1677,11 @@ def _batch_predict_intents( sentence_vector = self._last_token(text_transformed, sequence_lengths) sentence_vector_embed = self._tf_layers[f"embed.{TEXT}"](sentence_vector) - sim_all = self._tf_layers[f"loss.{LABEL}"].sim( + _, scores = self._tf_layers[ + f"loss.{LABEL}" + ]._similarity_confidence_from_embeddings( sentence_vector_embed[:, tf.newaxis, :], self.all_labels_embed[tf.newaxis, :, :], ) - scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim( - sim_all, self.config[SIMILARITY_TYPE] - ) return {"i_scores": scores} diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index d1c4626f647e..f6aa535f6298 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -66,7 +66,7 @@ MAX_RELATIVE_POSITION, RETRIEVAL_INTENT, USE_TEXT_AS_LABEL, - SOFTMAX, + CROSS_ENTROPY, AUTO, BALANCED, TENSORBOARD_LOG_DIR, @@ -75,6 +75,9 @@ FEATURIZERS, CHECKPOINT_MODEL, DENSE_DIMENSION, + CONSTRAIN_SIMILARITIES, + MODEL_CONFIDENCE, + SOFTMAX, ) from rasa.nlu.constants import ( RESPONSE_SELECTOR_PROPERTY_NAME, @@ -171,10 +174,11 @@ def required_components(cls) -> List[Type[Component]]: NUM_NEG: 20, # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'. SIMILARITY_TYPE: AUTO, - # The type of the loss function, either 'softmax' or 'margin'. - LOSS_TYPE: SOFTMAX, - # Number of top actions to normalize scores for loss type 'softmax'. - # Set to 0 to turn off normalization. + # The type of the loss function, either 'cross_entropy' or 'margin'. + LOSS_TYPE: CROSS_ENTROPY, + # Number of top actions to normalize scores for. Applicable with + # loss type 'cross_entropy' and 'softmax' confidences. Set to 0 + # to turn off normalization. RANKING_LENGTH: 10, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. @@ -232,6 +236,13 @@ def required_components(cls) -> List[Type[Component]]: FEATURIZERS: [], # Perform model checkpointing CHECKPOINT_MODEL: False, + # if 'True' applies sigmoid on all similarity terms and adds it + # to the loss function to ensure that similarity values are + # approximately bounded. Used inside softmax loss only. + CONSTRAIN_SIMILARITIES: False, + # Model confidence to be returned during inference. Possible values - + # 'softmax', 'cosine', 'inner'. + MODEL_CONFIDENCE: SOFTMAX, } def __init__( @@ -244,7 +255,18 @@ def __init__( responses: Optional[Dict[Text, List[Dict[Text, Any]]]] = None, finetune_mode: bool = False, ) -> None: + """Declare instance variables with default values. + Args: + component_config: Configuration for the component. + index_label_id_mapping: Mapping between label and index used for encoding. + entity_tag_specs: Format specification all entity tags. + model: Model architecture. + all_retrieval_intents: All retrieval intents defined in the data. + responses: All responses defined in the data. + finetune_mode: If `True` loads the model with pre-trained weights, + otherwise initializes it with random weights. + """ component_config = component_config or {} # the following properties cannot be adapted for the ResponseSelector @@ -755,13 +777,12 @@ def batch_predict( sentence_vector = self._last_token(text_transformed, sequence_lengths_text) sentence_vector_embed = self._tf_layers[f"embed.{TEXT}"](sentence_vector) - sim_all = self._tf_layers[f"loss.{LABEL}"].sim( + _, scores = self._tf_layers[ + f"loss.{LABEL}" + ]._similarity_confidence_from_embeddings( sentence_vector_embed[:, tf.newaxis, :], self.all_labels_embed[tf.newaxis, :, :], ) - scores = self._tf_layers[f"loss.{LABEL}"].confidence_from_sim( - sim_all, self.config[SIMILARITY_TYPE] - ) out["i_scores"] = scores return out diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index 837aec238855..e9f819d9e243 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -927,7 +927,7 @@ def evaluate_entities( merged_targets, merged_predictions, merged_confidences, - title="Entity Confusion matrix", + title="Entity Prediction Confidence Distribution", hist_filename=histogram_filename, ) diff --git a/rasa/shared/importers/default_config.yml b/rasa/shared/importers/default_config.yml index 95c9716b0d4e..63d10d9249ab 100644 --- a/rasa/shared/importers/default_config.yml +++ b/rasa/shared/importers/default_config.yml @@ -13,9 +13,13 @@ pipeline: max_ngram: 4 - name: DIETClassifier epochs: 100 + constrain_similarities: true + model_confidence: cosine - name: EntitySynonymMapper - name: ResponseSelector epochs: 100 + constrain_similarities: true + model_confidence: cosine - name: FallbackClassifier threshold: 0.3 ambiguity_threshold: 0.1 @@ -27,4 +31,6 @@ policies: - name: TEDPolicy max_history: 5 epochs: 100 + constrain_similarities: true + model_confidence: cosine - name: RulePolicy diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py index 7eba3d6d0f7f..c816f26f77a9 100644 --- a/rasa/utils/plotting.py +++ b/rasa/utils/plotting.py @@ -5,6 +5,7 @@ import numpy as np from typing import List, Text, Optional, Union, Any import matplotlib +from matplotlib.ticker import FormatStrFormatter import rasa.shared.utils.io from rasa.constants import RESULTS_FILE @@ -133,21 +134,37 @@ def plot_histogram( # Wine-ish colour for the confidences of hits. # Blue-ish colour for the confidences of misses. colors = ["#009292", "#920000"] - bins = [0.05 * i for i in range(1, 21)] + n_bins = 25 + max_value = max( + [max(hist_data[0], default=0), max(hist_data[1], default=0)], default=0 + ) + min_value = min( + [min(hist_data[0], default=0), min(hist_data[1], default=0)], default=0 + ) + + bin_width = (max_value - min_value) / n_bins + bins = [min_value + (i * bin_width) for i in range(1, n_bins + 1)] binned_data_sets = [np.histogram(d, bins=bins)[0] for d in hist_data] max_xlims = [max(binned_data_set) for binned_data_set in binned_data_sets] max_xlims = [xlim + np.ceil(0.25 * xlim) for xlim in max_xlims] # padding - min_ylim = bins[ - min( - [ - (binned_data_set != 0).argmax(axis=0) - for binned_data_set in binned_data_sets - ] - ) - ] + min_ylim = ( + bins[ + min( + [ + (binned_data_set != 0).argmax(axis=0) + for binned_data_set in binned_data_sets + ] + ) + ] + - bin_width + ) + + max_ylim = max(bins) + bin_width + + yticks = [float("{:.2f}".format(x)) for x in bins] centers = 0.5 * (0.05 + (bins + np.roll(bins, 0))[:-1]) heights = 0.75 * np.diff(bins) @@ -170,16 +187,20 @@ def plot_histogram( color=colors[1], label="misses", ) + axes[1].set(title="Wrong") - axes[0].set(yticks=bins, xlim=(0, max_xlims[0]), ylim=(min_ylim, 1.0)) - axes[1].set(yticks=bins, xlim=(0, max_xlims[1]), ylim=(min_ylim, 1.0)) + axes[0].set(yticks=yticks, xlim=(0, max_xlims[0]), ylim=(min_ylim, max_ylim)) + axes[1].set(yticks=yticks, xlim=(0, max_xlims[1]), ylim=(min_ylim, max_ylim)) + + axes[0].yaxis.set_major_formatter(FormatStrFormatter("%.2f")) + axes[0].yaxis.set_minor_formatter(FormatStrFormatter("%.2f")) axes[0].invert_xaxis() axes[0].yaxis.tick_right() fig.subplots_adjust( - wspace=0.14 + wspace=0.17 ) # get the graphs exactly far enough apart for yaxis labels fig.suptitle(title, fontsize="x-large", fontweight="bold") diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 29c046258dac..d43c85066b9e 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -38,6 +38,7 @@ DROP_RATE_ATTENTION = "drop_rate_attention" DROP_RATE_DIALOGUE = "drop_rate_dialogue" DROP_RATE_LABEL = "drop_rate_label" +CONSTRAIN_SIMILARITIES = "constrain_similarities" WEIGHT_SPARSITY = "weight_sparsity" @@ -52,6 +53,7 @@ DENSE_INPUT_DROPOUT = "use_dense_input_dropout" RANKING_LENGTH = "ranking_length" +MODEL_CONFIDENCE = "model_confidence" BILOU_FLAG = "BILOU_flag" @@ -64,6 +66,7 @@ AUTO = "auto" INNER = "inner" COSINE = "cosine" +CROSS_ENTROPY = "cross_entropy" BALANCED = "balanced" diff --git a/rasa/utils/tensorflow/exceptions.py b/rasa/utils/tensorflow/exceptions.py new file mode 100644 index 000000000000..53e1cd4703c1 --- /dev/null +++ b/rasa/utils/tensorflow/exceptions.py @@ -0,0 +1,5 @@ +from rasa.shared.exceptions import RasaException + + +class TFLayerConfigException(RasaException): + """Raised when wrong parameters are passed to tensorflow layers.""" diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 3e9007d90af7..4b1266d20b3e 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -5,7 +5,14 @@ import rasa.utils.tensorflow.crf from tensorflow.python.keras.utils import tf_utils from tensorflow.python.keras import backend as K -from rasa.utils.tensorflow.constants import SOFTMAX, MARGIN, COSINE, INNER +from rasa.utils.tensorflow.constants import ( + SOFTMAX, + MARGIN, + COSINE, + INNER, + CROSS_ENTROPY, +) +from rasa.utils.tensorflow.exceptions import TFLayerConfigException logger = logging.getLogger(__name__) @@ -269,13 +276,6 @@ def call( class Embed(tf.keras.layers.Layer): """Dense embedding layer. - Arguments: - embed_dim: Positive integer, dimensionality of the output space. - reg_lambda: Float; regularization factor. - layer_name_suffix: Text added to the name of the layers. - similarity_type: Optional type of similarity measure to use, - either 'cosine' or 'inner'. - Input shape: N-D tensor with shape: `(batch_size, ..., input_dim)`. The most common situation would be @@ -288,20 +288,16 @@ class Embed(tf.keras.layers.Layer): """ def __init__( - self, - embed_dim: int, - reg_lambda: float, - layer_name_suffix: Text, - similarity_type: Optional[Text] = None, + self, embed_dim: int, reg_lambda: float, layer_name_suffix: Text ) -> None: - super().__init__(name=f"embed_{layer_name_suffix}") + """Initialize layer. - self.similarity_type = similarity_type - if self.similarity_type and self.similarity_type not in {COSINE, INNER}: - raise ValueError( - f"Wrong similarity type '{self.similarity_type}', " - f"should be '{COSINE}' or '{INNER}'." - ) + Args: + embed_dim: Dimensionality of the output space. + reg_lambda: Regularization factor. + layer_name_suffix: Text added to the name of the layers. + """ + super().__init__(name=f"embed_{layer_name_suffix}") regularizer = tf.keras.regularizers.l2(reg_lambda) self._dense = tf.keras.layers.Dense( @@ -313,10 +309,8 @@ def __init__( # noinspection PyMethodOverriding def call(self, x: tf.Tensor) -> tf.Tensor: + """Apply dense layer.""" x = self._dense(x) - if self.similarity_type == COSINE: - x = tf.nn.l2_normalize(x, axis=-1) - return x @@ -542,31 +536,7 @@ def f1_score( class DotProductLoss(tf.keras.layers.Layer): - """Dot-product loss layer. - - Arguments: - num_neg: Positive integer, the number of incorrect labels; - the algorithm will minimize their similarity to the input. - loss_type: The type of the loss function, either 'softmax' or 'margin'. - mu_pos: Float, indicates how similar the algorithm should - try to make embedding vectors for correct labels; - should be 0.0 < ... < 1.0 for 'cosine' similarity type. - mu_neg: Float, maximum negative similarity for incorrect labels, - should be -1.0 < ... < 1.0 for 'cosine' similarity type. - use_max_sim_neg: Boolean, if 'True' the algorithm only minimizes - maximum similarity over incorrect intent labels, - used only if 'loss_type' is set to 'margin'. - neg_lambda: Float, the scale of how important is to minimize - the maximum similarity between embeddings of different labels, - used only if 'loss_type' is set to 'margin'. - scale_loss: Boolean, if 'True' scale loss inverse proportionally to - the confidence of the correct prediction. - name: Optional name of the layer. - parallel_iterations: Positive integer, the number of iterations allowed - to run in parallel. - same_sampling: Boolean, if 'True' sample same negative labels - for the whole batch. - """ + """Dot-product loss layer.""" def __init__( self, @@ -577,10 +547,45 @@ def __init__( use_max_sim_neg: bool, neg_lambda: float, scale_loss: bool, + similarity_type: Text, name: Optional[Text] = None, - parallel_iterations: int = 1000, same_sampling: bool = False, + constrain_similarities: bool = True, + model_confidence: Text = SOFTMAX, ) -> None: + """Declare instance variables with default values. + + Args: + num_neg: Positive integer, the number of incorrect labels; + the algorithm will minimize their similarity to the input. + loss_type: The type of the loss function, either 'cross_entropy' or 'margin'. + mu_pos: Float, indicates how similar the algorithm should + try to make embedding vectors for correct labels; + should be 0.0 < ... < 1.0 for 'cosine' similarity type. + mu_neg: Float, maximum negative similarity for incorrect labels, + should be -1.0 < ... < 1.0 for 'cosine' similarity type. + use_max_sim_neg: Boolean, if 'True' the algorithm only minimizes + maximum similarity over incorrect intent labels, + used only if 'loss_type' is set to 'margin'. + neg_lambda: Float, the scale of how important is to minimize + the maximum similarity between embeddings of different labels, + used only if 'loss_type' is set to 'margin'. + scale_loss: Boolean, if 'True' scale loss inverse proportionally to + the confidence of the correct prediction. + similarity_type: Similarity measure to use, either 'cosine' or 'inner'. + name: Optional name of the layer. + same_sampling: Boolean, if 'True' sample same negative labels + for the whole batch. + constrain_similarities: Boolean, if 'True' applies sigmoid on all + similarity terms and adds to the loss function to + ensure that similarity values are approximately bounded. + Used inside _loss_cross_entropy() only. + model_confidence: Model confidence to be returned during inference. + Possible values - 'softmax', 'cosine' and 'inner'. + + Raises: + LayerConfigException: When `similarity_type` is not one of 'cosine' or 'inner'. + """ super().__init__(name=name) self.num_neg = num_neg self.loss_type = loss_type @@ -589,8 +594,15 @@ def __init__( self.use_max_sim_neg = use_max_sim_neg self.neg_lambda = neg_lambda self.scale_loss = scale_loss - self.parallel_iterations = parallel_iterations self.same_sampling = same_sampling + self.constrain_similarities = constrain_similarities + self.model_confidence = model_confidence + self.similarity_type = similarity_type + if self.similarity_type not in {COSINE, INNER}: + raise TFLayerConfigException( + f"Wrong similarity type '{self.similarity_type}', " + f"should be '{COSINE}' or '{INNER}'." + ) @staticmethod def _make_flat(x: tf.Tensor) -> tf.Tensor: @@ -685,24 +697,49 @@ def _sample_negatives( labels_bad_negs, ) - @staticmethod - def sim(a: tf.Tensor, b: tf.Tensor, mask: Optional[tf.Tensor] = None) -> tf.Tensor: + def sim( + self, a: tf.Tensor, b: tf.Tensor, mask: Optional[tf.Tensor] = None + ) -> tf.Tensor: """Calculate similarity between given tensors.""" - + if self.similarity_type == COSINE: + a = tf.nn.l2_normalize(a, axis=-1) + b = tf.nn.l2_normalize(b, axis=-1) sim = tf.reduce_sum(a * b, axis=-1) if mask is not None: sim *= tf.expand_dims(mask, 2) return sim - @staticmethod - def confidence_from_sim(sim: tf.Tensor, similarity_type: Text) -> tf.Tensor: - if similarity_type == COSINE: - # clip negative values to zero - return tf.nn.relu(sim) - else: - # normalize result to [0, 1] with softmax - return tf.nn.softmax(sim) + def _similarity_confidence_from_embeddings( + self, + input_embeddings: tf.Tensor, + label_embeddings: tf.Tensor, + mask: Optional[tf.Tensor] = None, + ) -> Tuple[tf.Tensor, tf.Tensor]: + """Computes similarity between input and label embeddings and model's confidence. + + First compute the similarity from embeddings and then apply an activation + function if needed to get the confidence. + + Args: + input_embeddings: Embeddings of input. + label_embeddings: Embeddings of labels. + mask: Mask over input and output sequence. + + Returns: + similarity between input and label embeddings and model's prediction confidence for each label. + """ + # If model's prediction confidence is configured to be cosine similarity, + # then normalize embeddings to unit vectors. + if self.model_confidence == COSINE: + input_embeddings = tf.nn.l2_normalize(input_embeddings, axis=-1) + label_embeddings = tf.nn.l2_normalize(label_embeddings, axis=-1) + + similarities = self.sim(input_embeddings, label_embeddings, mask) + confidences = similarities + if self.model_confidence == SOFTMAX: + confidences = tf.nn.softmax(similarities) + return similarities, confidences def _train_sim( self, @@ -806,7 +843,7 @@ def _loss_margin( return loss - def _loss_softmax( + def _loss_cross_entropy( self, sim_pos: tf.Tensor, sim_neg_il: tf.Tensor, @@ -815,18 +852,15 @@ def _loss_softmax( sim_neg_li: tf.Tensor, mask: Optional[tf.Tensor], ) -> tf.Tensor: - """Define softmax loss.""" - - logits = tf.concat( - [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 + """Defines cross entropy loss.""" + loss = self._compute_softmax_loss( + sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li ) - # create label_ids for softmax - label_ids = tf.zeros_like(logits[..., 0], tf.int32) - - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=label_ids, logits=logits - ) + if self.constrain_similarities: + loss += self._compute_sigmoid_loss( + sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li + ) if self.scale_loss: # in case of cross entropy log_likelihood = -loss @@ -845,18 +879,68 @@ def _loss_softmax( # average the loss over the batch return tf.reduce_mean(loss) + @staticmethod + def _compute_sigmoid_loss( + sim_pos: tf.Tensor, + sim_neg_il: tf.Tensor, + sim_neg_ll: tf.Tensor, + sim_neg_ii: tf.Tensor, + sim_neg_li: tf.Tensor, + ) -> tf.Tensor: + # Constrain similarity values in a range by applying sigmoid + # on them individually so that they saturate at extreme values. + sigmoid_logits = tf.concat( + [sim_pos, sim_neg_il, sim_neg_ll, sim_neg_ii, sim_neg_li], axis=-1 + ) + sigmoid_labels = tf.concat( + [ + tf.ones_like(sigmoid_logits[..., :1]), + tf.zeros_like(sigmoid_logits[..., 1:]), + ], + axis=-1, + ) + sigmoid_loss = tf.nn.sigmoid_cross_entropy_with_logits( + labels=sigmoid_labels, logits=sigmoid_logits + ) + # average over logits axis + return tf.reduce_mean(sigmoid_loss, axis=-1) + + def _compute_softmax_loss( + self, + sim_pos: tf.Tensor, + sim_neg_il: tf.Tensor, + sim_neg_ll: tf.Tensor, + sim_neg_ii: tf.Tensor, + sim_neg_li: tf.Tensor, + ) -> tf.Tensor: + # Similarity terms between input and label should be optimized relative + # to each other and hence use them as logits for softmax term + softmax_logits = tf.concat([sim_pos, sim_neg_il, sim_neg_li], axis=-1) + if not self.constrain_similarities: + # Concatenate other similarity terms as well. Due to this, + # similarity values between input and label may not be + # approximately bounded in a defined range. + softmax_logits = tf.concat( + [softmax_logits, sim_neg_ii, sim_neg_ll], axis=-1 + ) + # create label_ids for softmax + softmax_label_ids = tf.zeros_like(softmax_logits[..., 0], tf.int32) + softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=softmax_label_ids, logits=softmax_logits + ) + return softmax_loss + @property def _chosen_loss(self) -> Callable: """Use loss depending on given option.""" - if self.loss_type == MARGIN: return self._loss_margin - elif self.loss_type == SOFTMAX: - return self._loss_softmax + elif self.loss_type == CROSS_ENTROPY: + return self._loss_cross_entropy else: - raise ValueError( + raise TFLayerConfigException( f"Wrong loss type '{self.loss_type}', " - f"should be '{MARGIN}' or '{SOFTMAX}'" + f"should be '{MARGIN}' or '{CROSS_ENTROPY}'" ) # noinspection PyMethodOverriding diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 6aaf465556f0..697076abef84 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -55,6 +55,8 @@ CONCAT_DIMENSION, DROP_RATE_ATTENTION, SCALE_LOSS, + CONSTRAIN_SIMILARITIES, + MODEL_CONFIDENCE, ) from rasa.utils.tensorflow import layers from rasa.utils.tensorflow.transformer import TransformerEncoder @@ -730,7 +732,6 @@ def _prepare_embed_layers(self, name: Text, prefix: Text = "embed") -> None: self.config[EMBEDDING_DIMENSION], self.config[REGULARIZATION_CONSTANT], name, - self.config[SIMILARITY_TYPE], ) def _prepare_ffnn_layer( @@ -789,8 +790,9 @@ def _prepare_dot_product_loss( self.config[USE_MAX_NEG_SIM], self.config[NEGATIVE_MARGIN_SCALE], scale_loss, - # set to 1 to get deterministic behaviour - parallel_iterations=1 if self.random_seed is not None else 1000, + similarity_type=self.config[SIMILARITY_TYPE], + constrain_similarities=self.config[CONSTRAIN_SIMILARITIES], + model_confidence=self.config[MODEL_CONFIDENCE], ) def _prepare_sparse_dense_dropout_layers( diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py index b620a87b8a46..ecf0910729ea 100644 --- a/rasa/utils/train_utils.py +++ b/rasa/utils/train_utils.py @@ -19,9 +19,12 @@ AUTO, INNER, COSINE, + CROSS_ENTROPY, TRANSFORMER_SIZE, NUM_TRANSFORMER_LAYERS, DENSE_DIMENSION, + CONSTRAIN_SIMILARITIES, + MODEL_CONFIDENCE, ) from rasa.shared.nlu.constants import ( ACTION_NAME, @@ -31,6 +34,7 @@ ) from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS from rasa.core.constants import DIALOGUE +from rasa.shared.exceptions import InvalidConfigException if TYPE_CHECKING: from rasa.nlu.extractors.extractor import EntityTagSpec @@ -63,7 +67,7 @@ def update_similarity_type(config: Dict[Text, Any]) -> Dict[Text, Any]: Returns: updated model configuration """ if config.get(SIMILARITY_TYPE) == AUTO: - if config[LOSS_TYPE] == SOFTMAX: + if config[LOSS_TYPE] == CROSS_ENTROPY: config[SIMILARITY_TYPE] = INNER elif config[LOSS_TYPE] == MARGIN: config[SIMILARITY_TYPE] = COSINE @@ -71,6 +75,28 @@ def update_similarity_type(config: Dict[Text, Any]) -> Dict[Text, Any]: return config +def update_deprecated_loss_type(config: Dict[Text, Any]) -> Dict[Text, Any]: + """If LOSS_TYPE is set to 'softmax', update it to 'cross_entropy' since former is deprecated. + + Args: + config: model configuration + + Returns: + updated model configuration + """ + # TODO: Completely deprecate this with 3.0 + if config.get(LOSS_TYPE) == SOFTMAX: + rasa.shared.utils.io.raise_deprecation_warning( + f"`{LOSS_TYPE}={SOFTMAX}` is deprecated. " + f"Please update your configuration file to use" + f"`{LOSS_TYPE}={CROSS_ENTROPY}` instead.", + warn_until_version=NEXT_MAJOR_VERSION_FOR_DEPRECATIONS, + ) + config[LOSS_TYPE] = CROSS_ENTROPY + + return config + + def align_token_features( list_of_tokens: List[List["Token"]], in_token_features: np.ndarray, @@ -342,6 +368,94 @@ def override_defaults( return config +def update_confidence_type(component_config: Dict[Text, Any]) -> Dict[Text, Any]: + """Set model confidence to cosine if margin loss is used. + + Args: + component_config: model configuration + + Returns: + updated model configuration + """ + # TODO: Remove this once model_confidence is set to cosine by default. + if ( + component_config[LOSS_TYPE] == MARGIN + and component_config[MODEL_CONFIDENCE] == SOFTMAX + ): + rasa.shared.utils.io.raise_warning( + f"Overriding defaults by setting {MODEL_CONFIDENCE} to " + f"{COSINE} as {LOSS_TYPE} is set to {MARGIN} in the configuration." + ) + component_config[MODEL_CONFIDENCE] = COSINE + return component_config + + +def validate_configuration_settings(component_config: Dict[Text, Any]) -> None: + """Performs checks to validate that combination of parameters in the configuration are correctly set. + + Args: + component_config: Configuration to validate. + """ + _check_loss_setting(component_config) + _check_confidence_setting(component_config) + _check_similarity_loss_setting(component_config) + + +def _check_confidence_setting(component_config: Dict[Text, Any]) -> None: + if component_config[MODEL_CONFIDENCE] == SOFTMAX: + rasa.shared.utils.io.raise_warning( + f"{MODEL_CONFIDENCE} is set to `softmax`. It is recommended " + f"to set it to `cosine`. It will be set to `cosine` by default, " + f"Rasa Open Source 3.0.0 onwards.", + category=UserWarning, + ) + if component_config[LOSS_TYPE] not in [SOFTMAX, CROSS_ENTROPY]: + raise InvalidConfigException( + f"{LOSS_TYPE}={component_config[LOSS_TYPE]} and " + f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid " + f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} " + f"only with {LOSS_TYPE}={CROSS_ENTROPY}." + ) + if component_config[SIMILARITY_TYPE] not in [INNER, AUTO]: + raise InvalidConfigException( + f"{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]} and " + f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid " + f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} " + f"only with {SIMILARITY_TYPE}={INNER}." + ) + + +def _check_loss_setting(component_config: Dict[Text, Any]) -> None: + if not component_config[CONSTRAIN_SIMILARITIES] and component_config[LOSS_TYPE] in [ + SOFTMAX, + CROSS_ENTROPY, + ]: + rasa.shared.utils.io.raise_warning( + f"{CONSTRAIN_SIMILARITIES} is set to `False`. It is recommended " + f"to set it to `True` when using cross-entropy loss. It will be set to `True` by default, " + f"Rasa Open Source 3.0.0 onwards.", + category=UserWarning, + ) + + +def _check_similarity_loss_setting(component_config: Dict[Text, Any]) -> None: + if ( + component_config[SIMILARITY_TYPE] == COSINE + and component_config[LOSS_TYPE] == CROSS_ENTROPY + or component_config[SIMILARITY_TYPE] == INNER + and component_config[LOSS_TYPE] == MARGIN + ): + rasa.shared.utils.io.raise_warning( + f"`{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]}`" + f" and `{LOSS_TYPE}={component_config[LOSS_TYPE]}` " + f"is not a recommended setting as it may not lead to best results." + f"Ideally use `{SIMILARITY_TYPE}={INNER}`" + f" and `{LOSS_TYPE}={CROSS_ENTROPY}` or" + f"`{SIMILARITY_TYPE}={COSINE}` and `{LOSS_TYPE}={MARGIN}`.", + category=UserWarning, + ) + + def init_split_entities( split_entities_config, default_split_entity ) -> Dict[Text, bool]: diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py index ea790d422127..de3c7668e008 100644 --- a/tests/core/policies/test_ted_policy.py +++ b/tests/core/policies/test_ted_policy.py @@ -32,8 +32,12 @@ SCALE_LOSS, SIMILARITY_TYPE, VALUE_RELATIVE_ATTENTION, + MODEL_CONFIDENCE, + COSINE, + INNER, ) from tests.core.test_policies import PolicyTestCollection +from rasa.shared.constants import DEFAULT_SENDER_ID UTTER_GREET_ACTION = "utter_greet" GREET_INTENT_NAME = "greet" @@ -264,7 +268,10 @@ def create_policy( ) def test_similarity_type(self, trained_policy: TEDPolicy): - assert trained_policy.config[SIMILARITY_TYPE] == "cosine" + assert trained_policy.config[SIMILARITY_TYPE] == COSINE + + def test_confidence_type(self, trained_policy: TEDPolicy): + assert trained_policy.config[MODEL_CONFIDENCE] == COSINE def test_normalization( self, @@ -283,6 +290,18 @@ def test_normalization( # function should not get called for margin loss_type mock.normalize.assert_not_called() + def test_prediction_on_empty_tracker( + self, trained_policy: Policy, default_domain: Domain + ): + tracker = DialogueStateTracker(DEFAULT_SENDER_ID, default_domain.slots) + prediction = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + assert not prediction.is_end_to_end_prediction + assert len(prediction.probabilities) == default_domain.num_actions + assert max(prediction.probabilities) <= 1.0 + assert min(prediction.probabilities) >= -1.0 + class TestTEDPolicyWithEval(TestTEDPolicy): def create_policy( @@ -330,6 +349,106 @@ def test_normalization( mock.normalize.assert_not_called() +class TestTEDPolicyCosineConfidence(TestTEDPolicy): + def create_policy( + self, featurizer: Optional[TrackerFeaturizer], priority: int + ) -> Policy: + return TEDPolicy( + featurizer=featurizer, priority=priority, **{MODEL_CONFIDENCE: COSINE} + ) + + def test_confidence_type(self, trained_policy: TEDPolicy): + assert trained_policy.config[MODEL_CONFIDENCE] == COSINE + + def test_normalization( + self, + trained_policy: Policy, + tracker: DialogueStateTracker, + default_domain: Domain, + monkeypatch: MonkeyPatch, + ): + # first check the output is what we expect + predicted_probabilities = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ).probabilities + # there should be no normalization + confidence_in_range = [ + -1 <= confidence <= 1 for confidence in predicted_probabilities + ] + assert all(confidence_in_range) + + # also check our function is not called + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + + mock.normalize.assert_not_called() + + def test_prediction_on_empty_tracker( + self, trained_policy: Policy, default_domain: Domain + ): + tracker = DialogueStateTracker(DEFAULT_SENDER_ID, default_domain.slots) + prediction = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + assert not prediction.is_end_to_end_prediction + assert len(prediction.probabilities) == default_domain.num_actions + assert max(prediction.probabilities) <= 1.0 + assert min(prediction.probabilities) >= -1.0 + + +class TestTEDPolicyInnerConfidence(TestTEDPolicy): + def create_policy( + self, featurizer: Optional[TrackerFeaturizer], priority: int + ) -> Policy: + return TEDPolicy( + featurizer=featurizer, priority=priority, **{MODEL_CONFIDENCE: INNER} + ) + + def test_confidence_type(self, trained_policy: TEDPolicy): + assert trained_policy.config[MODEL_CONFIDENCE] == INNER + + def test_normalization( + self, + trained_policy: Policy, + tracker: DialogueStateTracker, + default_domain: Domain, + monkeypatch: MonkeyPatch, + ): + # first check the output is what we expect + predicted_probabilities = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ).probabilities + # there should be no normalization + confidence_in_range = [ + -1e9 <= confidence <= 1e9 for confidence in predicted_probabilities + ] + assert all(confidence_in_range) + + # also check our function is not called + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + + mock.normalize.assert_not_called() + + def test_prediction_on_empty_tracker( + self, trained_policy: Policy, default_domain: Domain + ): + tracker = DialogueStateTracker(DEFAULT_SENDER_ID, default_domain.slots) + prediction = trained_policy.predict_action_probabilities( + tracker, default_domain, RegexInterpreter() + ) + assert not prediction.is_end_to_end_prediction + assert len(prediction.probabilities) == default_domain.num_actions + assert max(prediction.probabilities) <= 1e9 + assert min(prediction.probabilities) >= -1e9 + + class TestTEDPolicyLowRankingLength(TestTEDPolicy): def create_policy( self, featurizer: Optional[TrackerFeaturizer], priority: int diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py index bb56ee931347..bb0c2aa13cb0 100644 --- a/tests/nlu/classifiers/test_diet_classifier.py +++ b/tests/nlu/classifiers/test_diet_classifier.py @@ -4,6 +4,7 @@ import pytest from unittest.mock import Mock from typing import List, Text, Dict, Any +from _pytest.monkeypatch import MonkeyPatch import rasa.model from rasa.shared.nlu.training_data.features import Features @@ -31,6 +32,7 @@ BILOU_FLAG, ENTITY_RECOGNITION, INTENT_CLASSIFICATION, + MODEL_CONFIDENCE, ) from rasa.nlu.components import ComponentBuilder from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer @@ -367,6 +369,72 @@ async def test_softmax_normalization( assert parse_data.get("intent") == intent_ranking[0] +@pytest.mark.parametrize( + "classifier_params, prediction_min, prediction_max, output_length", + [ + ( + {RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "cosine"}, + -1, + 1, + LABEL_RANKING_LENGTH, + ), + ( + {RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "inner"}, + -1e9, + 1e9, + LABEL_RANKING_LENGTH, + ), + ], +) +async def test_cross_entropy_without_normalization( + component_builder: ComponentBuilder, + tmp_path: Path, + classifier_params: Dict[Text, Any], + prediction_min: float, + prediction_max: float, + output_length: int, + monkeypatch: MonkeyPatch, +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "DIETClassifier" + ) + assert pipeline[2]["name"] == "DIETClassifier" + pipeline[2].update(classifier_params) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, + path=str(tmp_path), + data="data/test/many_intents.md", + component_builder=component_builder, + ) + loaded = Interpreter.load(persisted_path, component_builder) + + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + + parse_data = loaded.parse("hello") + intent_ranking = parse_data.get("intent_ranking") + + # check that the output was correctly truncated + assert len(intent_ranking) == output_length + + intent_confidences = [intent.get("confidence") for intent in intent_ranking] + + # check each confidence is in range + confidence_in_range = [ + prediction_min <= confidence <= prediction_max + for confidence in intent_confidences + ] + assert all(confidence_in_range) + + # normalize shouldn't have been called + mock.normalize.assert_not_called() + + # check whether the normalization of rankings is reflected in intent prediction + assert parse_data.get("intent") == intent_ranking[0] + + @pytest.mark.parametrize( "classifier_params, output_length", [({LOSS_TYPE: "margin", RANDOM_SEED: 42, EPOCHS: 1}, LABEL_RANKING_LENGTH)], diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index 6d5c4aabea9d..610ef3304efb 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -3,6 +3,8 @@ import pytest import numpy as np from typing import List, Dict, Text, Any +from mock import Mock +from _pytest.monkeypatch import MonkeyPatch import rasa.model from rasa.nlu import train @@ -19,12 +21,18 @@ EVAL_NUM_EPOCHS, EVAL_NUM_EXAMPLES, CHECKPOINT_MODEL, + MODEL_CONFIDENCE, + RANDOM_SEED, + RANKING_LENGTH, + LOSS_TYPE, ) +from rasa.utils import train_utils from rasa.shared.nlu.constants import TEXT from rasa.shared.constants import DIAGNOSTIC_DATA from rasa.nlu.selectors.response_selector import ResponseSelector from rasa.shared.nlu.training_data.message import Message from rasa.shared.nlu.training_data.training_data import TrainingData +from tests.nlu.classifiers.test_diet_classifier import as_pipeline @pytest.mark.parametrize( @@ -315,3 +323,126 @@ async def test_process_gives_diagnostic_data(trained_response_selector_bot: Path assert "attention_weights" in diagnostic_data[name] # By default, ResponseSelector has `number_of_transformer_layers = 0` assert diagnostic_data[name].get("attention_weights") is None + + +@pytest.mark.parametrize( + "classifier_params, prediction_min, prediction_max, output_length", + [ + ({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "cosine"}, -1, 1, 9), + ({RANDOM_SEED: 42, EPOCHS: 1, MODEL_CONFIDENCE: "inner"}, -1e9, 1e9, 9), + ], +) +async def test_cross_entropy_without_normalization( + component_builder: ComponentBuilder, + tmp_path: Path, + classifier_params: Dict[Text, Any], + prediction_min: float, + prediction_max: float, + output_length: int, + monkeypatch: MonkeyPatch, +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" + ) + assert pipeline[2]["name"] == "ResponseSelector" + pipeline[2].update(classifier_params) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, + path=str(tmp_path), + data="data/test_selectors", + component_builder=component_builder, + ) + loaded = Interpreter.load(persisted_path, component_builder) + + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + + parse_data = loaded.parse("hello") + response_ranking = parse_data.get("response_selector").get("default").get("ranking") + + # check that the output was correctly truncated + assert len(response_ranking) == output_length + + response_confidences = [response.get("confidence") for response in response_ranking] + + # check each confidence is in range + confidence_in_range = [ + prediction_min <= confidence <= prediction_max + for confidence in response_confidences + ] + assert all(confidence_in_range) + + # normalize shouldn't have been called + mock.normalize.assert_not_called() + + +@pytest.mark.parametrize( + "classifier_params", [({LOSS_TYPE: "margin", RANDOM_SEED: 42, EPOCHS: 1})], +) +async def test_margin_loss_is_not_normalized( + monkeypatch: MonkeyPatch, + component_builder: ComponentBuilder, + tmp_path: Path, + classifier_params: Dict[Text, int], +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" + ) + assert pipeline[2]["name"] == "ResponseSelector" + pipeline[2].update(classifier_params) + + mock = Mock() + monkeypatch.setattr(train_utils, "normalize", mock.normalize) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, + path=str(tmp_path), + data="data/test_selectors", + component_builder=component_builder, + ) + loaded = Interpreter.load(persisted_path, component_builder) + + parse_data = loaded.parse("hello") + response_ranking = parse_data.get("response_selector").get("default").get("ranking") + + # check that the output was not normalized + mock.normalize.assert_not_called() + + # check that the output was correctly truncated + assert len(response_ranking) == 9 + + +@pytest.mark.parametrize( + "classifier_params, data_path, output_length", + [ + ({RANDOM_SEED: 42, EPOCHS: 1}, "data/test_selectors", 9), + ({RANDOM_SEED: 42, RANKING_LENGTH: 0, EPOCHS: 1}, "data/test_selectors", 9), + ({RANDOM_SEED: 42, RANKING_LENGTH: 2, EPOCHS: 1}, "data/test_selectors", 2), + ], +) +async def test_softmax_ranking( + component_builder: ComponentBuilder, + tmp_path: Path, + classifier_params: Dict[Text, int], + data_path: Text, + output_length: int, +): + pipeline = as_pipeline( + "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" + ) + assert pipeline[2]["name"] == "ResponseSelector" + pipeline[2].update(classifier_params) + + _config = RasaNLUModelConfig({"pipeline": pipeline}) + (trained_model, _, persisted_path) = await train( + _config, path=str(tmp_path), data=data_path, component_builder=component_builder + ) + loaded = Interpreter.load(persisted_path, component_builder) + + parse_data = loaded.parse("hello") + response_ranking = parse_data.get("response_selector").get("default").get("ranking") + # check that the output was correctly truncated after normalization + assert len(response_ranking) == output_length diff --git a/tests/utils/test_train_utils.py b/tests/utils/test_train_utils.py index 9ec906d9606e..e3e02d7ca31a 100644 --- a/tests/utils/test_train_utils.py +++ b/tests/utils/test_train_utils.py @@ -2,6 +2,7 @@ import numpy as np import pytest +from typing import Text import rasa.utils.train_utils as train_utils from rasa.nlu.constants import NUMBER_OF_SUB_TOKENS @@ -10,6 +11,17 @@ SPLIT_ENTITIES_BY_COMMA_DEFAULT_VALUE, SPLIT_ENTITIES_BY_COMMA, ) +from rasa.utils.tensorflow.constants import ( + MODEL_CONFIDENCE, + SIMILARITY_TYPE, + LOSS_TYPE, + COSINE, + SOFTMAX, + INNER, + CROSS_ENTROPY, + MARGIN, +) +from rasa.shared.exceptions import InvalidConfigException def test_align_token_features(): @@ -35,6 +47,18 @@ def test_align_token_features(): assert np.all(actual_features[0][4] == np.mean(token_features[0][5:10], axis=0)) +@pytest.mark.parametrize( + "input_values, ranking_length, output_values", + [ + ([0.2, 0.7, 0.1], 2, [0.2222222, 0.77777778, 0.0]), + ([0.1, 0.7, 0.1], 5, [0.11111111, 0.77777778, 0.11111111]), + ], +) +def test_normalize(input_values, ranking_length, output_values): + normalized_values = train_utils.normalize(np.array(input_values), ranking_length) + assert np.allclose(normalized_values, np.array(output_values), atol=1e-5) + + @pytest.mark.parametrize( "split_entities_config, expected_initialized_config", [ @@ -61,3 +85,66 @@ def test_init_split_entities_config( ) == expected_initialized_config ) + + +@pytest.mark.parametrize( + "component_config, raises_exception", + [ + ({MODEL_CONFIDENCE: SOFTMAX, LOSS_TYPE: MARGIN}, True), + ({MODEL_CONFIDENCE: SOFTMAX, LOSS_TYPE: SOFTMAX}, False), + ({MODEL_CONFIDENCE: SOFTMAX, LOSS_TYPE: CROSS_ENTROPY}, False), + ({MODEL_CONFIDENCE: COSINE, LOSS_TYPE: MARGIN}, False), + ({MODEL_CONFIDENCE: COSINE, LOSS_TYPE: SOFTMAX}, False), + ({MODEL_CONFIDENCE: COSINE, LOSS_TYPE: CROSS_ENTROPY}, False), + ({MODEL_CONFIDENCE: INNER, LOSS_TYPE: MARGIN}, False), + ({MODEL_CONFIDENCE: INNER, LOSS_TYPE: SOFTMAX}, False), + ({MODEL_CONFIDENCE: INNER, LOSS_TYPE: CROSS_ENTROPY}, False), + ], +) +def test_confidence_loss_settings( + component_config: Dict[Text, Any], raises_exception: bool +): + component_config[SIMILARITY_TYPE] = INNER + if raises_exception: + with pytest.raises(InvalidConfigException): + train_utils._check_confidence_setting(component_config) + else: + train_utils._check_confidence_setting(component_config) + + +@pytest.mark.parametrize( + "component_config, raises_exception", + [ + ({MODEL_CONFIDENCE: SOFTMAX, SIMILARITY_TYPE: INNER}, False), + ({MODEL_CONFIDENCE: SOFTMAX, SIMILARITY_TYPE: COSINE}, True), + ({MODEL_CONFIDENCE: COSINE, SIMILARITY_TYPE: INNER}, False), + ({MODEL_CONFIDENCE: COSINE, SIMILARITY_TYPE: COSINE}, False), + ({MODEL_CONFIDENCE: INNER, SIMILARITY_TYPE: INNER}, False), + ({MODEL_CONFIDENCE: INNER, SIMILARITY_TYPE: COSINE}, False), + ], +) +def test_confidence_similarity_settings( + component_config: Dict[Text, Any], raises_exception: bool +): + component_config[LOSS_TYPE] = SOFTMAX + if raises_exception: + with pytest.raises(InvalidConfigException): + train_utils._check_confidence_setting(component_config) + else: + train_utils._check_confidence_setting(component_config) + + +@pytest.mark.parametrize( + "component_config, model_confidence", + [ + ({MODEL_CONFIDENCE: SOFTMAX, LOSS_TYPE: MARGIN}, COSINE), + ({MODEL_CONFIDENCE: SOFTMAX, LOSS_TYPE: CROSS_ENTROPY}, SOFTMAX), + ({MODEL_CONFIDENCE: COSINE, LOSS_TYPE: CROSS_ENTROPY}, COSINE), + ({MODEL_CONFIDENCE: COSINE, LOSS_TYPE: MARGIN}, COSINE), + ], +) +def test_update_confidence_type( + component_config: Dict[Text, Text], model_confidence: Text +): + component_config = train_utils.update_confidence_type(component_config) + assert component_config[MODEL_CONFIDENCE] == model_confidence