diff --git a/rasa/core/policies/_memoization.py b/rasa/core/policies/_memoization.py new file mode 100644 index 000000000000..107da9a94433 --- /dev/null +++ b/rasa/core/policies/_memoization.py @@ -0,0 +1,369 @@ +# WARNING: This module will be dropped before Rasa Open Source 3.0 is released. +# Please don't do any changes in this module and rather adapt `MemoizationPolicyGraphComponent` from +# the regular `rasa.core.policies.memoization` module. This module is a +# workaround to defer breaking changes due to the architecture revamp in 3.0. +# flake8: noqa +import zlib + +import base64 +import json +import logging + +from tqdm import tqdm +from typing import Optional, Any, Dict, List, Text + +import rasa.utils.io +import rasa.shared.utils.io +from rasa.shared.core.domain import State, Domain +from rasa.shared.core.events import ActionExecuted +from rasa.core.featurizers.tracker_featurizers import ( + TrackerFeaturizer, + MaxHistoryTrackerFeaturizer, +) +from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter +from rasa.core.policies.policy import Policy, PolicyPrediction +from rasa.shared.core.trackers import DialogueStateTracker +from rasa.shared.core.generator import TrackerWithCachedStates +from rasa.shared.utils.io import is_logging_disabled +from rasa.core.constants import MEMOIZATION_POLICY_PRIORITY, DEFAULT_MAX_HISTORY + +logger = logging.getLogger(__name__) + + +class MemoizationPolicy(Policy): + """A policy that follows exact examples of `max_history` turns in training stories. + + Since `slots` that are set some time in the past are + preserved in all future feature vectors until they are set + to None, this policy implicitly remembers and most importantly + recalls examples in the context of the current dialogue + longer than `max_history`. + + This policy is not supposed to be the only policy in an ensemble, + it is optimized for precision and not recall. + It should get a 100% precision because it emits probabilities of 1.1 + along it's predictions, which makes every mistake fatal as + no other policy can overrule it. + + If it is needed to recall turns from training dialogues where + some slots might not be set during prediction time, and there are + training stories for this, use AugmentedMemoizationPolicy. + """ + + ENABLE_FEATURE_STRING_COMPRESSION = True + + USE_NLU_CONFIDENCE_AS_SCORE = False + + @staticmethod + def _standard_featurizer( + max_history: Optional[int] = None, + ) -> MaxHistoryTrackerFeaturizer: + # Memoization policy always uses MaxHistoryTrackerFeaturizer + # without state_featurizer + return MaxHistoryTrackerFeaturizer( + state_featurizer=None, max_history=max_history + ) + + def __init__( + self, + featurizer: Optional[TrackerFeaturizer] = None, + priority: int = MEMOIZATION_POLICY_PRIORITY, + max_history: Optional[int] = DEFAULT_MAX_HISTORY, + lookup: Optional[Dict] = None, + **kwargs: Any, + ) -> None: + """Initialize the policy. + + Args: + featurizer: tracker featurizer + priority: the priority of the policy + max_history: maximum history to take into account when featurizing trackers + lookup: a dictionary that stores featurized tracker states and + predicted actions for them + """ + if not featurizer: + featurizer = self._standard_featurizer(max_history) + + super().__init__(featurizer, priority, **kwargs) + + self.max_history = self.featurizer.max_history + self.lookup = lookup if lookup is not None else {} + + def _create_lookup_from_states( + self, + trackers_as_states: List[List[State]], + trackers_as_actions: List[List[Text]], + ) -> Dict[Text, Text]: + """Creates lookup dictionary from the tracker represented as states. + + Args: + trackers_as_states: representation of the trackers as a list of states + trackers_as_actions: representation of the trackers as a list of actions + + Returns: + lookup dictionary + """ + + lookup = {} + + if not trackers_as_states: + return lookup + + assert len(trackers_as_actions[0]) == 1, ( + f"The second dimension of trackers_as_action should be 1, " + f"instead of {len(trackers_as_actions[0])}" + ) + + ambiguous_feature_keys = set() + + pbar = tqdm( + zip(trackers_as_states, trackers_as_actions), + desc="Processed actions", + disable=is_logging_disabled(), + ) + for states, actions in pbar: + action = actions[0] + + feature_key = self._create_feature_key(states) + if not feature_key: + continue + + if feature_key not in ambiguous_feature_keys: + if feature_key in lookup.keys(): + if lookup[feature_key] != action: + # delete contradicting example created by + # partial history augmentation from memory + ambiguous_feature_keys.add(feature_key) + del lookup[feature_key] + else: + lookup[feature_key] = action + pbar.set_postfix({"# examples": "{:d}".format(len(lookup))}) + + return lookup + + def _create_feature_key(self, states: List[State]) -> Text: + # we sort keys to make sure that the same states + # represented as dictionaries have the same json strings + # quotes are removed for aesthetic reasons + feature_str = json.dumps(states, sort_keys=True).replace('"', "") + if self.ENABLE_FEATURE_STRING_COMPRESSION: + compressed = zlib.compress( + bytes(feature_str, rasa.shared.utils.io.DEFAULT_ENCODING) + ) + return base64.b64encode(compressed).decode( + rasa.shared.utils.io.DEFAULT_ENCODING + ) + else: + return feature_str + + def train( + self, + training_trackers: List[TrackerWithCachedStates], + domain: Domain, + interpreter: NaturalLanguageInterpreter, + **kwargs: Any, + ) -> None: + # only considers original trackers (no augmented ones) + training_trackers = [ + t + for t in training_trackers + if not hasattr(t, "is_augmented") or not t.is_augmented + ] + ( + trackers_as_states, + trackers_as_actions, + ) = self.featurizer.training_states_and_labels(training_trackers, domain) + self.lookup = self._create_lookup_from_states( + trackers_as_states, trackers_as_actions + ) + logger.debug(f"Memorized {len(self.lookup)} unique examples.") + + def _recall_states(self, states: List[State]) -> Optional[Text]: + return self.lookup.get(self._create_feature_key(states)) + + def recall( + self, states: List[State], tracker: DialogueStateTracker, domain: Domain, + ) -> Optional[Text]: + """Finds the action based on the given states. + + Args: + states: List of states. + tracker: The tracker. + domain: The Domain. + + Returns: + The name of the action. + """ + return self._recall_states(states) + + def _prediction_result( + self, action_name: Text, tracker: DialogueStateTracker, domain: Domain + ) -> List[float]: + result = self._default_predictions(domain) + if action_name: + if self.USE_NLU_CONFIDENCE_AS_SCORE: + # the memoization will use the confidence of NLU on the latest + # user message to set the confidence of the action + score = tracker.latest_message.intent.get("confidence", 1.0) + else: + score = 1.0 + + result[domain.index_for_action(action_name)] = score + + return result + + def predict_action_probabilities( + self, + tracker: DialogueStateTracker, + domain: Domain, + interpreter: NaturalLanguageInterpreter, + **kwargs: Any, + ) -> PolicyPrediction: + """Predicts the next action the bot should take after seeing the tracker. + + Args: + tracker: the :class:`rasa.core.trackers.DialogueStateTracker` + domain: the :class:`rasa.shared.core.domain.Domain` + interpreter: Interpreter which may be used by the policies to create + additional features. + + Returns: + The policy's prediction (e.g. the probabilities for the actions). + """ + result = self._default_predictions(domain) + + states = self._prediction_states(tracker, domain) + logger.debug(f"Current tracker state:{self.format_tracker_states(states)}") + predicted_action_name = self.recall(states, tracker, domain) + if predicted_action_name is not None: + logger.debug(f"There is a memorised next action '{predicted_action_name}'") + result = self._prediction_result(predicted_action_name, tracker, domain) + else: + logger.debug("There is no memorised next action") + + return self._prediction(result) + + def _metadata(self) -> Dict[Text, Any]: + return { + "priority": self.priority, + "max_history": self.max_history, + "lookup": self.lookup, + } + + @classmethod + def _metadata_filename(cls) -> Text: + return "memorized_turns.json" + + +class AugmentedMemoizationPolicy(MemoizationPolicy): + """The policy that remembers examples from training stories + for `max_history` turns. + + If it is needed to recall turns from training dialogues + where some slots might not be set during prediction time, + add relevant stories without such slots to training data. + E.g. reminder stories. + + Since `slots` that are set some time in the past are + preserved in all future feature vectors until they are set + to None, this policy has a capability to recall the turns + up to `max_history` from training stories during prediction + even if additional slots were filled in the past + for current dialogue. + """ + + @staticmethod + def _back_to_the_future( + tracker: DialogueStateTracker, again: bool = False + ) -> Optional[DialogueStateTracker]: + """Send Marty to the past to get + the new featurization for the future""" + + idx_of_first_action = None + idx_of_second_action = None + + # we need to find second executed action + for e_i, event in enumerate(tracker.applied_events()): + # find second ActionExecuted + if isinstance(event, ActionExecuted): + if idx_of_first_action is None: + idx_of_first_action = e_i + else: + idx_of_second_action = e_i + break + + # use first action, if we went first time and second action, if we went again + idx_to_use = idx_of_second_action if again else idx_of_first_action + if idx_to_use is None: + return None + + # make second ActionExecuted the first one + events = tracker.applied_events()[idx_to_use:] + if not events: + return None + + mcfly_tracker = tracker.init_copy() + for e in events: + mcfly_tracker.update(e) + + return mcfly_tracker + + def _recall_using_delorean( + self, old_states: List[State], tracker: DialogueStateTracker, domain: Domain, + ) -> Optional[Text]: + """Applies to the future idea to change the past and get the new future. + + Recursively go to the past to correctly forget slots, + and then back to the future to recall. + + Args: + old_states: List of states. + tracker: The tracker. + domain: The Domain. + + Returns: + The name of the action. + """ + logger.debug("Launch DeLorean...") + + mcfly_tracker = self._back_to_the_future(tracker) + while mcfly_tracker is not None: + states = self._prediction_states(mcfly_tracker, domain,) + + if old_states != states: + # check if we like new futures + memorised = self._recall_states(states) + if memorised is not None: + logger.debug(f"Current tracker state {states}") + return memorised + old_states = states + + # go back again + mcfly_tracker = self._back_to_the_future(mcfly_tracker, again=True) + + # No match found + logger.debug(f"Current tracker state {old_states}") + return None + + def recall( + self, states: List[State], tracker: DialogueStateTracker, domain: Domain, + ) -> Optional[Text]: + """Finds the action based on the given states. + + Uses back to the future idea to change the past and check whether the new future + can be used to recall the action. + + Args: + states: List of states. + tracker: The tracker. + domain: The Domain. + + Returns: + The name of the action. + """ + predicted_action_name = self._recall_states(states) + if predicted_action_name is None: + # let's try a different method to recall that tracker + return self._recall_using_delorean(states, tracker, domain,) + else: + return predicted_action_name diff --git a/rasa/core/policies/memoization.py b/rasa/core/policies/memoization.py index 9c123eb99014..dce6670079e4 100644 --- a/rasa/core/policies/memoization.py +++ b/rasa/core/policies/memoization.py @@ -1,3 +1,4 @@ +from __future__ import annotations import zlib import base64 @@ -6,26 +7,46 @@ from tqdm import tqdm from typing import Optional, Any, Dict, List, Text +from pathlib import Path import rasa.utils.io import rasa.shared.utils.io +from rasa.engine.graph import ExecutionContext +from rasa.engine.storage.resource import Resource +from rasa.engine.storage.storage import ModelStorage from rasa.shared.core.domain import State, Domain from rasa.shared.core.events import ActionExecuted from rasa.core.featurizers.tracker_featurizers import ( TrackerFeaturizer, MaxHistoryTrackerFeaturizer, + FEATURIZER_FILE, ) +from rasa.shared.exceptions import FileIOException from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter -from rasa.core.policies.policy import Policy, PolicyPrediction +from rasa.core.policies.policy import PolicyPrediction, PolicyGraphComponent from rasa.shared.core.trackers import DialogueStateTracker from rasa.shared.core.generator import TrackerWithCachedStates from rasa.shared.utils.io import is_logging_disabled -from rasa.core.constants import MEMOIZATION_POLICY_PRIORITY, DEFAULT_MAX_HISTORY +from rasa.core.constants import ( + MEMOIZATION_POLICY_PRIORITY, + DEFAULT_MAX_HISTORY, + POLICY_MAX_HISTORY, + POLICY_PRIORITY, +) +from rasa.core.policies._memoization import ( + MemoizationPolicy, + AugmentedMemoizationPolicy, +) + +# TODO: This is a workaround around until we have all components migrated to +# `GraphComponent`. +MemoizationPolicy = MemoizationPolicy +AugmentedMemoizationPolicy = AugmentedMemoizationPolicy logger = logging.getLogger(__name__) -class MemoizationPolicy(Policy): +class MemoizationPolicyGraphComponent(PolicyGraphComponent): """A policy that follows exact examples of `max_history` turns in training stories. Since `slots` that are set some time in the past are @@ -45,44 +66,36 @@ class MemoizationPolicy(Policy): training stories for this, use AugmentedMemoizationPolicy. """ - ENABLE_FEATURE_STRING_COMPRESSION = True - - USE_NLU_CONFIDENCE_AS_SCORE = False - @staticmethod - def _standard_featurizer( - max_history: Optional[int] = None, - ) -> MaxHistoryTrackerFeaturizer: + def get_default_config() -> Dict[Text, Any]: + """Returns the default config (see parent class for full docstring).""" + # please make sure to update the docs when changing a default parameter + return { + "enable_feature_string_compression": True, + "use_nlu_confidence_as_score": False, + POLICY_PRIORITY: MEMOIZATION_POLICY_PRIORITY, + POLICY_MAX_HISTORY: DEFAULT_MAX_HISTORY, + } + + def _standard_featurizer(self) -> MaxHistoryTrackerFeaturizer: # Memoization policy always uses MaxHistoryTrackerFeaturizer # without state_featurizer return MaxHistoryTrackerFeaturizer( - state_featurizer=None, max_history=max_history + state_featurizer=None, max_history=self.config[POLICY_MAX_HISTORY] ) def __init__( self, + config: Dict[Text, Any], + model_storage: ModelStorage, + resource: Resource, + execution_context: ExecutionContext, featurizer: Optional[TrackerFeaturizer] = None, - priority: int = MEMOIZATION_POLICY_PRIORITY, - max_history: Optional[int] = DEFAULT_MAX_HISTORY, lookup: Optional[Dict] = None, - **kwargs: Any, ) -> None: - """Initialize the policy. - - Args: - featurizer: tracker featurizer - priority: the priority of the policy - max_history: maximum history to take into account when featurizing trackers - lookup: a dictionary that stores featurized tracker states and - predicted actions for them - """ - if not featurizer: - featurizer = self._standard_featurizer(max_history) - - super().__init__(featurizer, priority, **kwargs) - - self.max_history = self.featurizer.max_history - self.lookup = lookup if lookup is not None else {} + """Initialize the policy.""" + super().__init__(config, model_storage, resource, execution_context, featurizer) + self.lookup = lookup or {} def _create_lookup_from_states( self, @@ -98,7 +111,6 @@ def _create_lookup_from_states( Returns: lookup dictionary """ - lookup = {} if not trackers_as_states: @@ -141,7 +153,7 @@ def _create_feature_key(self, states: List[State]) -> Text: # represented as dictionaries have the same json strings # quotes are removed for aesthetic reasons feature_str = json.dumps(states, sort_keys=True).replace('"', "") - if self.ENABLE_FEATURE_STRING_COMPRESSION: + if self.config["enable_feature_string_compression"]: compressed = zlib.compress( bytes(feature_str, rasa.shared.utils.io.DEFAULT_ENCODING) ) @@ -155,7 +167,6 @@ def train( self, training_trackers: List[TrackerWithCachedStates], domain: Domain, - interpreter: NaturalLanguageInterpreter, **kwargs: Any, ) -> None: # only considers original trackers (no augmented ones) @@ -173,6 +184,8 @@ def train( ) logger.debug(f"Memorized {len(self.lookup)} unique examples.") + self.persist() + def _recall_states(self, states: List[State]) -> Optional[Text]: return self.lookup.get(self._create_feature_key(states)) @@ -196,7 +209,7 @@ def _prediction_result( ) -> List[float]: result = self._default_predictions(domain) if action_name: - if self.USE_NLU_CONFIDENCE_AS_SCORE: + if self.config["use_nlu_confidence_as_score"]: # the memoization will use the confidence of NLU on the latest # user message to set the confidence of the action score = tracker.latest_message.intent.get("confidence", 1.0) @@ -239,20 +252,64 @@ def predict_action_probabilities( return self._prediction(result) def _metadata(self) -> Dict[Text, Any]: - return { - "priority": self.priority, - "max_history": self.max_history, - "lookup": self.lookup, - } + return {"lookup": self.lookup} @classmethod def _metadata_filename(cls) -> Text: return "memorized_turns.json" + def persist(self) -> None: + """Persists the policy to storage.""" + with self._model_storage.write_to(self._resource) as path: + # not all policies have a featurizer + if self.featurizer is not None: + self.featurizer.persist(path) + + file = Path(path) / self._metadata_filename() + + rasa.shared.utils.io.create_directory_for_file(file) + rasa.shared.utils.io.dump_obj_as_json_to_file(file, self._metadata()) + + @classmethod + def load( + cls, + config: Dict[Text, Any], + model_storage: ModelStorage, + resource: Resource, + execution_context: ExecutionContext, + **kwargs: Any, + ) -> MemoizationPolicyGraphComponent: + """Loads a trained policy (see parent class for full docstring).""" + featurizer = None + lookup = None + + try: + with model_storage.read_from(resource) as path: + metadata_file = Path(path) / cls._metadata_filename() + metadata = rasa.shared.utils.io.read_json_file(metadata_file) + lookup = metadata["lookup"] + + if (Path(path) / FEATURIZER_FILE).is_file(): + featurizer = TrackerFeaturizer.load(path) + + except (ValueError, FileNotFoundError, FileIOException): + logger.warning( + f"Couldn't load metadata for policy '{cls.__name__}' as the persisted " + f"metadata couldn't be loaded." + ) -class AugmentedMemoizationPolicy(MemoizationPolicy): - """The policy that remembers examples from training stories - for `max_history` turns. + return cls( + config, + model_storage, + resource, + execution_context, + featurizer=featurizer, + lookup=lookup, + ) + + +class AugmentedMemoizationPolicyGraphComponent(MemoizationPolicyGraphComponent): + """The policy that remembers examples from training stories for `max_history` turns. If it is needed to recall turns from training dialogues where some slots might not be set during prediction time, @@ -324,8 +381,10 @@ def _recall_using_delorean( logger.debug("Launch DeLorean...") # Truncate the tracker based on `max_history` - mcfly_tracker = _trim_tracker_by_max_history(tracker, self.max_history) - mcfly_tracker = self._back_to_the_future(tracker) + mcfly_tracker = _trim_tracker_by_max_history( + tracker, self.config[POLICY_MAX_HISTORY] + ) + mcfly_tracker = self._back_to_the_future(mcfly_tracker) while mcfly_tracker is not None: states = self._prediction_states(mcfly_tracker, domain,) diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py index 3a8b736683a6..aa16fbb924fa 100644 --- a/rasa/core/policies/policy.py +++ b/rasa/core/policies/policy.py @@ -1,7 +1,6 @@ from __future__ import annotations import abc import copy -import json import logging from enum import Enum from pathlib import Path @@ -38,7 +37,11 @@ from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter from rasa.shared.core.trackers import DialogueStateTracker from rasa.shared.core.generator import TrackerWithCachedStates -from rasa.core.constants import DEFAULT_POLICY_PRIORITY, POLICY_PRIORITY +from rasa.core.constants import ( + DEFAULT_POLICY_PRIORITY, + POLICY_PRIORITY, + POLICY_MAX_HISTORY, +) from rasa.shared.core.constants import ( USER, SLOTS, @@ -123,8 +126,9 @@ def __init__( featurizer: Optional[TrackerFeaturizer] = None, ) -> None: """Constructs a new Policy object.""" + self.config = config if featurizer is None: - featurizer = self._create_featurizer(config) + featurizer = self._create_featurizer() self.__featurizer = featurizer self.priority = config.get(POLICY_PRIORITY, DEFAULT_POLICY_PRIORITY) @@ -147,18 +151,17 @@ def create( """Creates a new untrained policy (see parent class for full docstring).""" return cls(config, model_storage, resource, execution_context) - @classmethod - def _create_featurizer(cls, policy_config: Dict[Text, Any]) -> TrackerFeaturizer: - policy_config = copy.deepcopy(policy_config) + def _create_featurizer(self) -> TrackerFeaturizer: + policy_config = copy.deepcopy(self.config) featurizer_configs = policy_config.get("featurizer") if not featurizer_configs: - return cls._standard_featurizer() + return self._standard_featurizer() featurizer_func = _get_featurizer_from_config( featurizer_configs, - cls.__name__, + self.__class__.__name__, lookup_path="rasa.core.featurizers.tracker_featurizers", ) featurizer_config = featurizer_configs[0] @@ -167,7 +170,7 @@ def _create_featurizer(cls, policy_config: Dict[Text, Any]) -> TrackerFeaturizer if state_featurizer_configs: state_featurizer_func = _get_featurizer_from_config( state_featurizer_configs, - cls.__name__, + self.__class__.__name__, lookup_path="rasa.core.featurizers.single_state_featurizer", ) state_featurizer_config = state_featurizer_configs[0] @@ -176,11 +179,20 @@ def _create_featurizer(cls, policy_config: Dict[Text, Any]) -> TrackerFeaturizer **state_featurizer_config ) - return featurizer_func(**featurizer_config) - - @staticmethod - def _standard_featurizer() -> MaxHistoryTrackerFeaturizer: - return MaxHistoryTrackerFeaturizer(SingleStateFeaturizer()) + featurizer = featurizer_func(**featurizer_config) + if ( + isinstance(featurizer, MaxHistoryTrackerFeaturizer) + and POLICY_MAX_HISTORY in policy_config + and POLICY_MAX_HISTORY not in featurizer_config + ): + featurizer.max_history = policy_config[POLICY_MAX_HISTORY] + return featurizer + + def _standard_featurizer(self) -> MaxHistoryTrackerFeaturizer: + """Initializes the standard featurizer for this policy.""" + return MaxHistoryTrackerFeaturizer( + SingleStateFeaturizer(), self.config.get(POLICY_MAX_HISTORY) + ) @property def featurizer(self) -> TrackerFeaturizer: @@ -389,41 +401,6 @@ def _prediction( action_metadata=action_metadata, ) - def _metadata(self) -> Optional[Dict[Text, Any]]: - """Returns this policy's attributes that should be persisted. - - Policies using the default `persist()` and `load()` implementations must - implement the `_metadata()` method." - - Returns: - The policy metadata. - """ - pass - - @classmethod - def _metadata_filename(cls) -> Text: - """Returns the filename of the persisted policy metadata. - - Policies using the default `persist()` and `load()` implementations must - implement the `_metadata_filename()` method. - - Returns: - The filename of the persisted policy metadata. - """ - pass - - def persist(self) -> None: - """Persists the policy to storage.""" - with self._model_storage.write_to(self._resource) as path: - # not all policies have a featurizer - if self.featurizer is not None: - self.featurizer.persist(path) - - file = Path(path) / self._metadata_filename() - - rasa.shared.utils.io.create_directory_for_file(file) - rasa.shared.utils.io.dump_obj_as_json_to_file(file, self._metadata()) - @classmethod def load( cls, @@ -434,14 +411,10 @@ def load( **kwargs: Any, ) -> "PolicyGraphComponent": """Loads a trained policy (see parent class for full docstring).""" - config = {} featurizer = None try: with model_storage.read_from(resource) as path: - metadata_file = Path(path) / cls._metadata_filename() - config = json.loads(rasa.shared.utils.io.read_file(metadata_file)) - if (Path(path) / FEATURIZER_FILE).is_file(): featurizer = TrackerFeaturizer.load(path) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index c9b61c157161..7d7cd8363107 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -24,7 +24,6 @@ TrackerFeaturizer, MaxHistoryTrackerFeaturizer, ) -from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer from rasa.shared.exceptions import RasaException from rasa.shared.nlu.constants import ( ACTION_TEXT, @@ -41,7 +40,13 @@ ) from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter, RegexInterpreter from rasa.core.policies.policy import PolicyPrediction, PolicyGraphComponent -from rasa.core.constants import DIALOGUE, POLICY_MAX_HISTORY +from rasa.core.constants import ( + DIALOGUE, + POLICY_MAX_HISTORY, + DEFAULT_MAX_HISTORY, + DEFAULT_POLICY_PRIORITY, + POLICY_PRIORITY, +) from rasa.shared.constants import DIAGNOSTIC_DATA from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS, ACTION_LISTEN_NAME from rasa.shared.core.trackers import DialogueStateTracker @@ -332,14 +337,12 @@ def get_default_config() -> Dict[Text, Any]: # ingredients in a recipe, but it doesn't make sense for the parts of # an address SPLIT_ENTITIES_BY_COMMA: SPLIT_ENTITIES_BY_COMMA_DEFAULT_VALUE, + # Max history of the policy, unbounded by default + POLICY_MAX_HISTORY: DEFAULT_MAX_HISTORY, + # Determines the importance of policies, higher values take precedence + POLICY_PRIORITY: DEFAULT_POLICY_PRIORITY, } - @staticmethod - def _standard_featurizer(max_history: Optional[int] = None) -> TrackerFeaturizer: - return MaxHistoryTrackerFeaturizer( - SingleStateFeaturizer(), max_history=max_history - ) - def __init__( self, config: Dict[Text, Any], @@ -360,13 +363,6 @@ def __init__( config[SPLIT_ENTITIES_BY_COMMA], SPLIT_ENTITIES_BY_COMMA_DEFAULT_VALUE ) - # TODO: check if this statement can be removed. - # More context here - - # https://github.com/RasaHQ/rasa/issues/5786#issuecomment-840762751 - max_history = config.get(POLICY_MAX_HISTORY) - if isinstance(self.featurizer, MaxHistoryTrackerFeaturizer) and max_history: - self.featurizer.max_history = max_history - self._load_params(config) self.model = model diff --git a/rasa/core/policies/unexpected_intent_policy.py b/rasa/core/policies/unexpected_intent_policy.py index 266858a59eea..beafa4288c92 100644 --- a/rasa/core/policies/unexpected_intent_policy.py +++ b/rasa/core/policies/unexpected_intent_policy.py @@ -30,7 +30,7 @@ IntentTokenizerSingleStateFeaturizer, ) from rasa.shared.core.generator import TrackerWithCachedStates -from rasa.core.constants import DIALOGUE +from rasa.core.constants import DIALOGUE, POLICY_MAX_HISTORY from rasa.core.policies.policy import PolicyPrediction from rasa.core.policies.ted_policy import ( LABEL_KEY, @@ -326,10 +326,10 @@ def __init__( common.mark_as_experimental_feature("UnexpecTED Intent Policy") - @staticmethod - def _standard_featurizer(max_history: Optional[int] = None) -> TrackerFeaturizer: + def _standard_featurizer(self) -> TrackerFeaturizer: return IntentMaxHistoryTrackerFeaturizer( - IntentTokenizerSingleStateFeaturizer(), max_history=max_history + IntentTokenizerSingleStateFeaturizer(), + max_history=self.config.get(POLICY_MAX_HISTORY), ) @staticmethod diff --git a/tests/core/conftest.py b/tests/core/conftest.py index 06495d89aecf..e0a0edc3b76c 100644 --- a/tests/core/conftest.py +++ b/tests/core/conftest.py @@ -17,7 +17,7 @@ from rasa.shared.core.domain import Domain from rasa.shared.core.events import ReminderScheduled, UserUttered, ActionExecuted from rasa.core.nlg import TemplatedNaturalLanguageGenerator, NaturalLanguageGenerator -from rasa.core.policies.memoization import Policy +from rasa.core.policies import Policy from rasa.core.processor import MessageProcessor from rasa.shared.core.slots import Slot from rasa.core.tracker_store import InMemoryTrackerStore, MongoTrackerStore diff --git a/tests/core/policies/test_memoization.py b/tests/core/policies/test_memoization.py deleted file mode 100644 index 9eb8ad415322..000000000000 --- a/tests/core/policies/test_memoization.py +++ /dev/null @@ -1,195 +0,0 @@ -import pytest - -from rasa.engine.graph import ExecutionContext -from rasa.engine.storage.resource import Resource -from rasa.engine.storage.storage import ModelStorage -from tests.core.test_policies import PolicyTestCollection -from typing import Optional, Dict, Text, Any -from rasa.core.featurizers.tracker_featurizers import ( - TrackerFeaturizer, - MaxHistoryTrackerFeaturizer, -) -from rasa.shared.core.generator import TrackerWithCachedStates -from rasa.core.policies.memoization import AugmentedMemoizationPolicy, MemoizationPolicy -from rasa.shared.core.domain import Domain -from rasa.shared.core.events import ( - ActionExecuted, - UserUttered, - SlotSet, -) -from rasa.shared.nlu.interpreter import RegexInterpreter - - -class TestMemoizationPolicy(PolicyTestCollection): - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: Optional[int], - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> MemoizationPolicy: - return MemoizationPolicy(featurizer=featurizer, priority=priority) - - @pytest.mark.parametrize("max_history", [1, 2, 3, 4, None]) - def test_prediction( - self, - max_history: Optional[int], - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - ): - policy = self.create_policy( - featurizer=MaxHistoryTrackerFeaturizer(max_history=max_history), - priority=1, - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - - GREET_INTENT_NAME = "greet" - UTTER_GREET_ACTION = "utter_greet" - UTTER_BYE_ACTION = "utter_goodbye" - domain = Domain.from_yaml( - f""" - intents: - - {GREET_INTENT_NAME} - actions: - - {UTTER_GREET_ACTION} - - {UTTER_BYE_ACTION} - slots: - slot_1: - type: bool - slot_2: - type: bool - slot_3: - type: bool - slot_4: - type: bool - """ - ) - events = [ - UserUttered(intent={"name": GREET_INTENT_NAME}), - ActionExecuted(UTTER_GREET_ACTION), - SlotSet("slot_1", True), - ActionExecuted(UTTER_GREET_ACTION), - SlotSet("slot_2", True), - SlotSet("slot_3", True), - ActionExecuted(UTTER_GREET_ACTION), - ActionExecuted(UTTER_GREET_ACTION), - UserUttered(intent={"name": GREET_INTENT_NAME}), - ActionExecuted(UTTER_GREET_ACTION), - SlotSet("slot_4", True), - ActionExecuted(UTTER_BYE_ACTION), - ] - training_story = TrackerWithCachedStates.from_events( - "training story", evts=events, domain=domain, slots=domain.slots, - ) - test_story = TrackerWithCachedStates.from_events( - "training story", events[:-1], domain=domain, slots=domain.slots, - ) - interpreter = RegexInterpreter() - policy.train([training_story], domain, interpreter) - prediction = policy.predict_action_probabilities( - test_story, domain, interpreter - ) - assert ( - domain.action_names_or_texts[ - prediction.probabilities.index(max(prediction.probabilities)) - ] - == UTTER_BYE_ACTION - ) - - -class TestAugmentedMemoizationPolicy(TestMemoizationPolicy): - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: Optional[int], - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> MemoizationPolicy: - return AugmentedMemoizationPolicy(featurizer=featurizer, priority=priority) - - @pytest.mark.parametrize("max_history", [1, 2, 3, 4, None]) - def test_augmented_prediction( - self, - max_history: Optional[int], - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - ): - policy = self.create_policy( - featurizer=MaxHistoryTrackerFeaturizer(max_history=max_history), - priority=1, - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - - GREET_INTENT_NAME = "greet" - UTTER_GREET_ACTION = "utter_greet" - UTTER_BYE_ACTION = "utter_goodbye" - domain = Domain.from_yaml( - f""" - intents: - - {GREET_INTENT_NAME} - actions: - - {UTTER_GREET_ACTION} - - {UTTER_BYE_ACTION} - slots: - slot_1: - type: bool - initial_value: true - slot_2: - type: bool - slot_3: - type: bool - """ - ) - training_story = TrackerWithCachedStates.from_events( - "training story", - [ - ActionExecuted(UTTER_GREET_ACTION), - UserUttered(intent={"name": GREET_INTENT_NAME}), - ActionExecuted(UTTER_GREET_ACTION), - SlotSet("slot_3", True), - ActionExecuted(UTTER_BYE_ACTION), - ], - domain=domain, - slots=domain.slots, - ) - test_story = TrackerWithCachedStates.from_events( - "test story", - [ - UserUttered(intent={"name": GREET_INTENT_NAME}), - ActionExecuted(UTTER_GREET_ACTION), - SlotSet("slot_1", False), - ActionExecuted(UTTER_GREET_ACTION), - ActionExecuted(UTTER_GREET_ACTION), - UserUttered(intent={"name": GREET_INTENT_NAME}), - ActionExecuted(UTTER_GREET_ACTION), - SlotSet("slot_2", True), - ActionExecuted(UTTER_GREET_ACTION), - UserUttered(intent={"name": GREET_INTENT_NAME}), - ActionExecuted(UTTER_GREET_ACTION), - SlotSet("slot_3", True), - # ActionExecuted(UTTER_BYE_ACTION), - ], - domain=domain, - slots=domain.slots, - ) - interpreter = RegexInterpreter() - policy.train([training_story], domain, interpreter) - prediction = policy.predict_action_probabilities( - test_story, domain, interpreter - ) - assert ( - domain.action_names_or_texts[ - prediction.probabilities.index(max(prediction.probabilities)) - ] - == UTTER_BYE_ACTION - ) diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py index e3bad2b79b1c..f5cf1ab1c763 100644 --- a/tests/core/policies/test_ted_policy.py +++ b/tests/core/policies/test_ted_policy.py @@ -6,7 +6,7 @@ import tests.core.test_policies from _pytest.monkeypatch import MonkeyPatch from _pytest.logging import LogCaptureFixture -from rasa.core.constants import POLICY_PRIORITY, POLICY_MAX_HISTORY +from rasa.core.constants import POLICY_MAX_HISTORY from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer from rasa.core.featurizers.tracker_featurizers import ( @@ -120,33 +120,6 @@ class TestTEDPolicy(PolicyTestCollection): def _policy_class_to_test() -> Type[TEDPolicy]: return TEDPolicy - def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None - ) -> Dict[Text, Any]: - config_override = config_override or {} - return { - **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, - **config_override, - } - - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> TEDPolicy: - return TEDPolicy( - self._config(priority, config), - featurizer=featurizer, - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - def test_train_model_checkpointing(self, tmp_path: Path): checkpoint_dir = get_checkpoint_dir_path(tmp_path) assert not checkpoint_dir.is_dir() @@ -220,7 +193,7 @@ def test_epoch_override_when_loaded( ): execution_context.is_finetuning = should_finetune loaded_policy = trained_policy.__class__.load( - {**self._config(trained_policy.priority), EPOCH_OVERRIDE: epoch_override}, + {**self._config(), EPOCH_OVERRIDE: epoch_override}, model_storage, resource, execution_context, @@ -251,7 +224,6 @@ def test_train_fails_with_checkpoint_zero_eval_num_epochs(self, tmp_path: Path): def test_training_with_no_intent( self, featurizer: Optional[TrackerFeaturizer], - priority: int, default_domain: Domain, tmp_path: Path, caplog: LogCaptureFixture, @@ -271,7 +243,6 @@ def test_training_with_no_intent( ) policy = self.create_policy( featurizer=featurizer, - priority=priority, model_storage=model_storage, resource=resource, execution_context=execution_context, @@ -571,36 +542,57 @@ def test_ignore_action_unlikely_intent( == prediction_without_action.probabilities ) + @pytest.mark.parametrize( + "featurizer_config, tracker_featurizer, state_featurizer", + [ + (None, MaxHistoryTrackerFeaturizer(), SingleStateFeaturizer), + ([], MaxHistoryTrackerFeaturizer(), SingleStateFeaturizer), + ], + ) + def test_empty_featurizer_configs( + self, + featurizer_config: Optional[Dict[Text, Any]], + model_storage: ModelStorage, + resource: Resource, + execution_context: ExecutionContext, + tracker_featurizer: MaxHistoryTrackerFeaturizer, + state_featurizer: Type[SingleStateFeaturizer], + ): + featurizer_config_override = ( + {"featurizer": featurizer_config} if featurizer_config else {} + ) + policy = self.create_policy( + None, + model_storage=model_storage, + resource=resource, + execution_context=execution_context, + config=self._config(featurizer_config_override), + ) + + featurizer = policy.featurizer + assert isinstance(featurizer, tracker_featurizer.__class__) + + if featurizer_config: + expected_max_history = featurizer_config[0].get(POLICY_MAX_HISTORY) + else: + expected_max_history = self._config().get(POLICY_MAX_HISTORY) + + assert featurizer.max_history == expected_max_history + + assert isinstance(featurizer.state_featurizer, state_featurizer) + class TestTEDPolicyMargin(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, LOSS_TYPE: "margin", **config_override, } - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - return TEDPolicy( - self._config(priority, config), - featurizer=featurizer, - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - def test_similarity_type(self, trained_policy: TEDPolicy): assert trained_policy.config[SIMILARITY_TYPE] == COSINE @@ -639,64 +631,28 @@ def test_prediction_on_empty_tracker( class TestTEDPolicyWithEval(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, SCALE_LOSS: False, EVAL_NUM_EXAMPLES: 4, **config_override, } - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - return TEDPolicy( - featurizer=featurizer, - config=self._config(priority, config), - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - class TestTEDPolicyNoNormalization(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), RANKING_LENGTH: 0, - POLICY_PRIORITY: priority, **config_override, } - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - return TEDPolicy( - featurizer=featurizer, - config=self._config(priority, config), - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - def test_ranking_length(self, trained_policy: TEDPolicy): assert trained_policy.config[RANKING_LENGTH] == 0 @@ -726,33 +682,15 @@ def test_normalization( class TestTEDPolicyLinearNormConfidence(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, MODEL_CONFIDENCE: LINEAR_NORM, **config_override, } - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - return TEDPolicy( - featurizer=featurizer, - config=self._config(priority, config), - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - def test_confidence_type(self, trained_policy: TEDPolicy): assert trained_policy.config[MODEL_CONFIDENCE] == LINEAR_NORM @@ -795,85 +733,47 @@ def test_prediction_on_empty_tracker( class TestTEDPolicyLowRankingLength(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, RANKING_LENGTH: 3, **config_override, } - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - return TEDPolicy( - featurizer=featurizer, - config=self._config(priority, config), - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - def test_ranking_length(self, trained_policy: TEDPolicy): assert trained_policy.config[RANKING_LENGTH] == 3 class TestTEDPolicyHighRankingLength(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, RANKING_LENGTH: 11, **config_override, } - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - return TEDPolicy( - featurizer=featurizer, - config=self._config(priority, config), - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - def test_ranking_length(self, trained_policy: TEDPolicy): assert trained_policy.config[RANKING_LENGTH] == 11 class TestTEDPolicyWithStandardFeaturizer(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, **config_override, } def create_policy( self, featurizer: Optional[TrackerFeaturizer], - priority: int, model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, @@ -883,7 +783,7 @@ def create_policy( # since it is using MaxHistoryTrackerFeaturizer # if max_history is not specified return TEDPolicy( - config=self._config(priority, config), + config=self._config(config), model_storage=model_storage, resource=resource, execution_context=execution_context, @@ -903,7 +803,7 @@ def test_featurizer( ) loaded = trained_policy.__class__.load( - self._config(trained_policy.priority), + self._config(trained_policy.config), model_storage, resource, execution_context, @@ -915,12 +815,11 @@ def test_featurizer( class TestTEDPolicyWithMaxHistory(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, POLICY_MAX_HISTORY: self.max_history, **config_override, } @@ -928,7 +827,6 @@ def _config( def create_policy( self, featurizer: Optional[TrackerFeaturizer], - priority: int, model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, @@ -938,100 +836,39 @@ def create_policy( # since it is using MaxHistoryTrackerFeaturizer # if max_history is specified return TEDPolicy( - config=self._config(priority, config), + config=self._config(config), model_storage=model_storage, resource=resource, execution_context=execution_context, ) - def test_featurizer( - self, - trained_policy: Policy, - resource: Resource, - model_storage: ModelStorage, - tmp_path: Path, - execution_context: ExecutionContext, - ): - assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer) - assert trained_policy.featurizer.max_history == self.max_history - assert isinstance( - trained_policy.featurizer.state_featurizer, SingleStateFeaturizer - ) - - loaded = trained_policy.__class__.load( - self._config(trained_policy.priority), - model_storage, - resource, - execution_context, - ) - - assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer) - assert loaded.featurizer.max_history == self.max_history - assert isinstance(loaded.featurizer.state_featurizer, SingleStateFeaturizer) - class TestTEDPolicyWithRelativeAttention(TestTEDPolicy): def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, KEY_RELATIVE_ATTENTION: True, VALUE_RELATIVE_ATTENTION: True, MAX_RELATIVE_POSITION: 5, **config_override, } - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - return TEDPolicy( - featurizer=featurizer, - config=self._config(priority, config), - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - class TestTEDPolicyWithRelativeAttentionMaxHistoryOne(TestTEDPolicy): max_history = 1 def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: config_override = config_override or {} return { **TEDPolicy.get_default_config(), - POLICY_PRIORITY: priority, KEY_RELATIVE_ATTENTION: True, VALUE_RELATIVE_ATTENTION: True, MAX_RELATIVE_POSITION: 5, **config_override, } - - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - return TEDPolicy( - featurizer=featurizer, - config=self._config(priority, config), - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) diff --git a/tests/core/policies/test_unexpected_intent_policy.py b/tests/core/policies/test_unexpected_intent_policy.py index e1726493b799..4269def42ab1 100644 --- a/tests/core/policies/test_unexpected_intent_policy.py +++ b/tests/core/policies/test_unexpected_intent_policy.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Optional, List, Dict, Text, Type, Any +from typing import Optional, List, Dict, Text, Type import tensorflow as tf import numpy as np import pytest @@ -7,7 +7,6 @@ from _pytest.logging import LogCaptureFixture import logging -from rasa.core.constants import POLICY_PRIORITY from rasa.core.featurizers.single_state_featurizer import ( IntentTokenizerSingleStateFeaturizer, ) @@ -63,33 +62,6 @@ class TestUnexpecTEDIntentPolicy(TestTEDPolicy): def _policy_class_to_test() -> Type[UnexpecTEDIntentPolicy]: return UnexpecTEDIntentPolicy - def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None - ) -> Dict[Text, Any]: - config_override = config_override or {} - return { - **UnexpecTEDIntentPolicy.get_default_config(), - POLICY_PRIORITY: priority, - **config_override, - } - - def create_policy( - self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, - resource: Resource, - execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> UnexpecTEDIntentPolicy: - return UnexpecTEDIntentPolicy( - self._config(priority, config), - featurizer=featurizer, - model_storage=model_storage, - resource=resource, - execution_context=execution_context, - ) - @pytest.fixture(scope="class") def featurizer(self) -> TrackerFeaturizer: featurizer = IntentMaxHistoryTrackerFeaturizer( @@ -154,7 +126,6 @@ def test_label_data_assembly( def test_training_with_no_intent( self, featurizer: Optional[TrackerFeaturizer], - priority: int, default_domain: Domain, tmp_path: Path, caplog: LogCaptureFixture, @@ -174,7 +145,6 @@ def test_training_with_no_intent( ) policy = self.create_policy( featurizer=featurizer, - priority=priority, model_storage=model_storage, resource=resource, execution_context=execution_context, diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py index 9bb7182d0b61..45dc7a2d0e9b 100644 --- a/tests/core/test_policies.py +++ b/tests/core/test_policies.py @@ -6,8 +6,8 @@ import numpy as np import pytest from _pytest.tmpdir import TempPathFactory -from rasa.core.constants import DEFAULT_POLICY_PRIORITY, POLICY_MAX_HISTORY -from rasa.engine.graph import ExecutionContext, GraphSchema, GraphComponent +from rasa.core.constants import POLICY_MAX_HISTORY +from rasa.engine.graph import ExecutionContext, GraphSchema from rasa.engine.storage.local_model_storage import LocalModelStorage from rasa.engine.storage.resource import Resource from rasa.engine.storage.storage import ModelStorage @@ -38,10 +38,19 @@ IntentMaxHistoryTrackerFeaturizer, ) from rasa.shared.nlu.interpreter import RegexInterpreter -from rasa.core.policies.policy import SupportedData, Policy, InvalidPolicyConfig +from rasa.core.policies.policy import ( + SupportedData, + Policy, + InvalidPolicyConfig, + PolicyGraphComponent, +) from rasa.core.policies.rule_policy import RulePolicy from rasa.core.policies.ted_policy import TEDPolicy -from rasa.core.policies.memoization import AugmentedMemoizationPolicy, MemoizationPolicy +from rasa.core.policies.memoization import ( + AugmentedMemoizationPolicyGraphComponent as AugmentedMemoizationPolicy, + MemoizationPolicyGraphComponent as MemoizationPolicy, +) + from rasa.shared.core.trackers import DialogueStateTracker from tests.dialogues import TEST_DEFAULT_DIALOGUE from tests.core.utilities import get_tracker, tracker_from_dialogue @@ -67,6 +76,10 @@ class PolicyTestCollection: Each policy can declare further tests on its own.""" + @staticmethod + def _policy_class_to_test() -> Type[PolicyGraphComponent]: + raise NotImplementedError + max_history = 3 # this is the amount of history we test on @pytest.fixture(scope="class") @@ -82,20 +95,27 @@ def execution_context(self) -> ExecutionContext: return ExecutionContext(GraphSchema({}), uuid.uuid4().hex) def _config( - self, priority: int, config_override: Optional[Dict[Text, Any]] = None + self, config_override: Optional[Dict[Text, Any]] = None ) -> Dict[Text, Any]: - raise NotImplementedError + config_override = config_override or {} + config = self._policy_class_to_test().get_default_config() + return {**config, **config_override} def create_policy( self, featurizer: Optional[TrackerFeaturizer], - priority: Optional[int], model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - raise NotImplementedError + ) -> PolicyGraphComponent: + return self._policy_class_to_test()( + config=self._config(config), + model_storage=model_storage, + resource=resource, + execution_context=execution_context, + featurizer=featurizer, + ) @pytest.fixture(scope="class") def featurizer(self) -> TrackerFeaturizer: @@ -104,10 +124,6 @@ def featurizer(self) -> TrackerFeaturizer: ) return featurizer - @pytest.fixture(scope="class") - def priority(self) -> int: - return 1 - @pytest.fixture(scope="class") def default_domain(self, domain_path: Text) -> Domain: return Domain.load(domain_path) @@ -120,25 +136,24 @@ def tracker(self, default_domain: Domain) -> DialogueStateTracker: def trained_policy( self, featurizer: Optional[TrackerFeaturizer], - priority: int, stories_path: Text, default_domain: Domain, model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, - ) -> Policy: + ) -> PolicyGraphComponent: policy = self.create_policy( - featurizer, priority, model_storage, resource, execution_context + featurizer, model_storage, resource, execution_context ) training_trackers = train_trackers( default_domain, stories_path, augmentation_factor=20 ) - policy.train(training_trackers, default_domain, RegexInterpreter()) + policy.train(training_trackers, default_domain) return policy def test_featurizer( self, - trained_policy: Policy, + trained_policy: PolicyGraphComponent, resource: Resource, model_storage: ModelStorage, tmp_path: Path, @@ -150,17 +165,12 @@ def test_featurizer( trained_policy.featurizer.state_featurizer, SingleStateFeaturizer ) - if isinstance(trained_policy, GraphComponent): - loaded = trained_policy.__class__.load( - self._config(trained_policy.priority), - model_storage, - resource, - execution_context, - ) - else: - # TODO: Drop after all policies are migrated to `GraphComponent` - trained_policy.persist(str(tmp_path)) - loaded = trained_policy.__class__.load(str(tmp_path)) + loaded = trained_policy.__class__.load( + self._config(trained_policy.config), + model_storage, + resource, + execution_context, + ) assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer) assert loaded.featurizer.max_history == self.max_history @@ -169,28 +179,21 @@ def test_featurizer( @pytest.mark.parametrize("should_finetune", [False, True]) def test_persist_and_load( self, - trained_policy: Policy, + trained_policy: PolicyGraphComponent, default_domain: Domain, - tmp_path: Path, should_finetune: bool, stories_path: Text, model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, ): - if isinstance(trained_policy, GraphComponent): - loaded = trained_policy.__class__.load( - self._config(trained_policy.priority), - model_storage, - resource, - dataclasses.replace(execution_context, is_finetuning=should_finetune), - ) - else: - # TODO: Drop after all policies are migrated to `GraphComponent` - trained_policy.persist(str(tmp_path)) - loaded = trained_policy.__class__.load( - str(tmp_path), should_finetune=should_finetune - ) + loaded = trained_policy.__class__.load( + self._config(trained_policy.config), + model_storage, + resource, + dataclasses.replace(execution_context, is_finetuning=should_finetune), + ) + assert loaded.finetune_mode == should_finetune trackers = train_trackers(default_domain, stories_path, augmentation_factor=20) @@ -221,37 +224,26 @@ def test_prediction_on_empty_tracker( ) def test_persist_and_load_empty_policy( self, - tmp_path: Path, default_domain: Domain, default_model_storage: ModelStorage, execution_context: ExecutionContext, ): resource = Resource(uuid.uuid4().hex) empty_policy = self.create_policy( - None, - DEFAULT_POLICY_PRIORITY, - default_model_storage, - resource, - execution_context, + None, default_model_storage, resource, execution_context, ) - empty_policy.train([], default_domain, RegexInterpreter()) - if isinstance(empty_policy, GraphComponent): - loaded = empty_policy.__class__.load( - self._config(DEFAULT_POLICY_PRIORITY), - default_model_storage, - resource, - execution_context, - ) - else: - # TODO: Drop after all policies are migrated to `GraphComponent` - empty_policy.persist(str(tmp_path)) - loaded = empty_policy.__class__.load(str(tmp_path)) + empty_policy.train([], default_domain) + loaded = empty_policy.__class__.load( + self._config(), default_model_storage, resource, execution_context, + ) assert loaded is not None @staticmethod - def _get_next_action(policy: Policy, events: List[Event], domain: Domain) -> Text: + def _get_next_action( + policy: PolicyGraphComponent, events: List[Event], domain: Domain + ) -> Text: tracker = get_tracker(events) scores = policy.predict_action_probabilities( @@ -263,13 +255,11 @@ def _get_next_action(policy: Policy, events: List[Event], domain: Domain) -> Tex @pytest.mark.parametrize( "featurizer_config, tracker_featurizer, state_featurizer", [ - (None, MaxHistoryTrackerFeaturizer(), SingleStateFeaturizer), - ([], MaxHistoryTrackerFeaturizer(), SingleStateFeaturizer), ( [ { "name": "MaxHistoryTrackerFeaturizer", - "max_history": 12, + POLICY_MAX_HISTORY: 12, "state_featurizer": [], } ], @@ -277,7 +267,7 @@ def _get_next_action(policy: Policy, events: List[Event], domain: Domain) -> Tex type(None), ), ( - [{"name": "MaxHistoryTrackerFeaturizer", "max_history": 12}], + [{"name": "MaxHistoryTrackerFeaturizer", POLICY_MAX_HISTORY: 12}], MaxHistoryTrackerFeaturizer(max_history=12), type(None), ), @@ -285,7 +275,7 @@ def _get_next_action(policy: Policy, events: List[Event], domain: Domain) -> Tex [ { "name": "IntentMaxHistoryTrackerFeaturizer", - "max_history": 12, + POLICY_MAX_HISTORY: 12, "state_featurizer": [ {"name": "IntentTokenizerSingleStateFeaturizer"} ], @@ -305,25 +295,25 @@ def test_different_featurizer_configs( tracker_featurizer: MaxHistoryTrackerFeaturizer, state_featurizer: Type[SingleStateFeaturizer], ): + featurizer_config_override = ( + {"featurizer": featurizer_config} if featurizer_config else {} + ) policy = self.create_policy( None, - priority=1, model_storage=model_storage, resource=resource, execution_context=execution_context, - config={"featurizer": featurizer_config}, + config=self._config(featurizer_config_override), ) - if not isinstance(policy, GraphComponent): - # TODO: Drop this after all policies have been migration to graph components - return - featurizer = policy.featurizer assert isinstance(featurizer, tracker_featurizer.__class__) - expected_max_history = self._config(DEFAULT_POLICY_PRIORITY).get( - POLICY_MAX_HISTORY, tracker_featurizer.max_history - ) + if featurizer_config: + expected_max_history = featurizer_config[0].get(POLICY_MAX_HISTORY) + else: + expected_max_history = self._config().get(POLICY_MAX_HISTORY) + assert featurizer.max_history == expected_max_history assert isinstance(featurizer.state_featurizer, state_featurizer) @@ -332,13 +322,13 @@ def test_different_featurizer_configs( "featurizer_config", [ [ - {"name": "MaxHistoryTrackerFeaturizer", "max_history": 12}, - {"name": "MaxHistoryTrackerFeaturizer", "max_history": 12}, + {"name": "MaxHistoryTrackerFeaturizer", POLICY_MAX_HISTORY: 12}, + {"name": "MaxHistoryTrackerFeaturizer", POLICY_MAX_HISTORY: 12}, ], [ { "name": "IntentMaxHistoryTrackerFeaturizer", - "max_history": 12, + POLICY_MAX_HISTORY: 12, "state_featurizer": [ {"name": "IntentTokenizerSingleStateFeaturizer"}, {"name": "IntentTokenizerSingleStateFeaturizer"}, @@ -349,20 +339,15 @@ def test_different_featurizer_configs( ) def test_different_invalid_featurizer_configs( self, - trained_policy: Policy, + trained_policy: PolicyGraphComponent, featurizer_config: Optional[Dict[Text, Any]], model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, ): - if not isinstance(trained_policy, GraphComponent): - # TODO: Drop this after all policies have been migration to graph components - return - with pytest.raises(InvalidPolicyConfig): self.create_policy( None, - priority=1, model_storage=model_storage, resource=resource, execution_context=execution_context, @@ -371,25 +356,31 @@ def test_different_invalid_featurizer_configs( class TestMemoizationPolicy(PolicyTestCollection): - def create_policy( + @staticmethod + def _policy_class_to_test() -> Type[PolicyGraphComponent]: + return MemoizationPolicy + + @pytest.fixture(scope="class") + def featurizer(self) -> TrackerFeaturizer: + featurizer = MaxHistoryTrackerFeaturizer(None, max_history=self.max_history) + return featurizer + + def test_featurizer( self, - featurizer: Optional[TrackerFeaturizer], - priority: int, - model_storage: ModelStorage, + trained_policy: PolicyGraphComponent, resource: Resource, + model_storage: ModelStorage, + tmp_path: Path, execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - max_history = None - if isinstance(featurizer, MaxHistoryTrackerFeaturizer): - max_history = featurizer.max_history - return MemoizationPolicy(priority=priority, max_history=max_history) - - def test_featurizer(self, trained_policy: Policy, tmp_path: Path): + ): assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer) assert trained_policy.featurizer.state_featurizer is None - trained_policy.persist(str(tmp_path)) - loaded = trained_policy.__class__.load(str(tmp_path)) + loaded = trained_policy.__class__.load( + self._config(trained_policy.config), + model_storage, + resource, + execution_context, + ) assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer) assert loaded.featurizer.state_featurizer is None @@ -400,7 +391,7 @@ def test_memorise( stories_path: Text, ): trackers = train_trackers(default_domain, stories_path, augmentation_factor=20) - trained_policy.train(trackers, default_domain, RegexInterpreter()) + trained_policy.train(trackers, default_domain) lookup_with_augmentation = trained_policy.lookup trackers = [ @@ -426,9 +417,8 @@ def test_memorise( trackers_no_augmentation = train_trackers( default_domain, stories_path, augmentation_factor=0 ) - trained_policy.train( - trackers_no_augmentation, default_domain, RegexInterpreter() - ) + trained_policy.train(trackers_no_augmentation, default_domain) + lookup_no_augmentation = trained_policy.lookup assert lookup_no_augmentation == lookup_with_augmentation @@ -445,14 +435,17 @@ def test_memorise_with_nlu( def test_finetune_after_load( self, trained_policy: MemoizationPolicy, + resource: Resource, + model_storage: ModelStorage, + execution_context: ExecutionContext, default_domain: Domain, - tmp_path: Path, stories_path: Text, ): - trained_policy.persist(tmp_path) - - loaded_policy = MemoizationPolicy.load(tmp_path, should_finetune=True) + execution_context = dataclasses.replace(execution_context, is_finetuning=True) + loaded_policy = MemoizationPolicy.load( + trained_policy.config, model_storage, resource, execution_context + ) assert loaded_policy.finetune_mode @@ -470,9 +463,7 @@ def test_finetune_after_load( original_train_data = train_trackers( default_domain, stories_path, augmentation_factor=20 ) - loaded_policy.train( - original_train_data + [new_story], default_domain, RegexInterpreter() - ) + loaded_policy.train(original_train_data + [new_story], default_domain) # Get the hash of the tracker state of new story new_story_states, _ = loaded_policy.featurizer.training_states_and_labels( @@ -543,21 +534,197 @@ def test_ignore_action_unlikely_intent( == prediction_without_action.probabilities ) + @pytest.mark.parametrize( + "featurizer_config, tracker_featurizer, state_featurizer", + [ + (None, MaxHistoryTrackerFeaturizer(), type(None)), + ([], MaxHistoryTrackerFeaturizer(), type(None)), + ], + ) + def test_empty_featurizer_configs( + self, + featurizer_config: Optional[Dict[Text, Any]], + model_storage: ModelStorage, + resource: Resource, + execution_context: ExecutionContext, + tracker_featurizer: MaxHistoryTrackerFeaturizer, + state_featurizer: Type[SingleStateFeaturizer], + ): + featurizer_config_override = ( + {"featurizer": featurizer_config} if featurizer_config else {} + ) + policy = self.create_policy( + None, + model_storage=model_storage, + resource=resource, + execution_context=execution_context, + config=self._config(featurizer_config_override), + ) + + featurizer = policy.featurizer + assert isinstance(featurizer, tracker_featurizer.__class__) + + if featurizer_config: + expected_max_history = featurizer_config[0].get(POLICY_MAX_HISTORY) + else: + expected_max_history = self._config().get(POLICY_MAX_HISTORY) + + assert featurizer.max_history == expected_max_history + + assert isinstance(featurizer.state_featurizer, state_featurizer) + + @pytest.mark.parametrize("max_history", [1, 2, 3, 4, None]) + def test_prediction( + self, + max_history: Optional[int], + model_storage: ModelStorage, + resource: Resource, + execution_context: ExecutionContext, + ): + policy = self.create_policy( + featurizer=MaxHistoryTrackerFeaturizer(max_history=max_history), + model_storage=model_storage, + resource=resource, + execution_context=execution_context, + ) + + GREET_INTENT_NAME = "greet" + UTTER_GREET_ACTION = "utter_greet" + UTTER_BYE_ACTION = "utter_goodbye" + domain = Domain.from_yaml( + f""" + intents: + - {GREET_INTENT_NAME} + actions: + - {UTTER_GREET_ACTION} + - {UTTER_BYE_ACTION} + slots: + slot_1: + type: bool + slot_2: + type: bool + slot_3: + type: bool + slot_4: + type: bool + """ + ) + events = [ + UserUttered(intent={"name": GREET_INTENT_NAME}), + ActionExecuted(UTTER_GREET_ACTION), + SlotSet("slot_1", True), + ActionExecuted(UTTER_GREET_ACTION), + SlotSet("slot_2", True), + SlotSet("slot_3", True), + ActionExecuted(UTTER_GREET_ACTION), + ActionExecuted(UTTER_GREET_ACTION), + UserUttered(intent={"name": GREET_INTENT_NAME}), + ActionExecuted(UTTER_GREET_ACTION), + SlotSet("slot_4", True), + ActionExecuted(UTTER_BYE_ACTION), + ] + training_story = TrackerWithCachedStates.from_events( + "training story", evts=events, domain=domain, slots=domain.slots, + ) + test_story = TrackerWithCachedStates.from_events( + "training story", events[:-1], domain=domain, slots=domain.slots, + ) + policy.train([training_story], domain) + prediction = policy.predict_action_probabilities( + test_story, domain, RegexInterpreter() + ) + assert ( + domain.action_names_or_texts[ + prediction.probabilities.index(max(prediction.probabilities)) + ] + == UTTER_BYE_ACTION + ) + class TestAugmentedMemoizationPolicy(TestMemoizationPolicy): - def create_policy( + """Test suite for AugmentedMemoizationPolicy.""" + + @staticmethod + def _policy_class_to_test() -> Type[PolicyGraphComponent]: + return AugmentedMemoizationPolicy + + @pytest.mark.parametrize("max_history", [1, 2, 3, 4, None]) + def test_augmented_prediction( self, - featurizer: Optional[TrackerFeaturizer], - priority: int, + max_history: Optional[int], model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, - config: Optional[Dict[Text, Any]] = None, - ) -> Policy: - max_history = None - if isinstance(featurizer, MaxHistoryTrackerFeaturizer): - max_history = featurizer.max_history - return AugmentedMemoizationPolicy(priority=priority, max_history=max_history) + ): + policy = self.create_policy( + featurizer=MaxHistoryTrackerFeaturizer(max_history=max_history), + model_storage=model_storage, + resource=resource, + execution_context=execution_context, + ) + + GREET_INTENT_NAME = "greet" + UTTER_GREET_ACTION = "utter_greet" + UTTER_BYE_ACTION = "utter_goodbye" + domain = Domain.from_yaml( + f""" + intents: + - {GREET_INTENT_NAME} + actions: + - {UTTER_GREET_ACTION} + - {UTTER_BYE_ACTION} + slots: + slot_1: + type: bool + initial_value: true + slot_2: + type: bool + slot_3: + type: bool + """ + ) + training_story = TrackerWithCachedStates.from_events( + "training story", + [ + ActionExecuted(UTTER_GREET_ACTION), + UserUttered(intent={"name": GREET_INTENT_NAME}), + ActionExecuted(UTTER_GREET_ACTION), + SlotSet("slot_3", True), + ActionExecuted(UTTER_BYE_ACTION), + ], + domain=domain, + slots=domain.slots, + ) + test_story = TrackerWithCachedStates.from_events( + "test story", + [ + UserUttered(intent={"name": GREET_INTENT_NAME}), + ActionExecuted(UTTER_GREET_ACTION), + SlotSet("slot_1", False), + ActionExecuted(UTTER_GREET_ACTION), + ActionExecuted(UTTER_GREET_ACTION), + UserUttered(intent={"name": GREET_INTENT_NAME}), + ActionExecuted(UTTER_GREET_ACTION), + SlotSet("slot_2", True), + ActionExecuted(UTTER_GREET_ACTION), + UserUttered(intent={"name": GREET_INTENT_NAME}), + ActionExecuted(UTTER_GREET_ACTION), + SlotSet("slot_3", True), + # ActionExecuted(UTTER_BYE_ACTION), + ], + domain=domain, + slots=domain.slots, + ) + policy.train([training_story], domain) + prediction = policy.predict_action_probabilities( + test_story, domain, RegexInterpreter() + ) + assert ( + domain.action_names_or_texts[ + prediction.probabilities.index(max(prediction.probabilities)) + ] + == UTTER_BYE_ACTION + ) @pytest.mark.parametrize(