From cb5963764bd60c9afba07994d76c1e4fd27d50bc Mon Sep 17 00:00:00 2001 From: Alexander Khizov Date: Tue, 7 Jul 2020 15:22:17 +0200 Subject: [PATCH] Domain file changes for the 2.0 format - Add possibility to split the domain into separate files - Changes default session expiration time to 60 minutes --- .github/scripts/mr_generate_summary.py | 3 +- changelog/6132.feature.rst | 5 + rasa/cli/arguments/default_arguments.py | 4 +- rasa/core/domain.py | 123 +++++++++++++++--------- rasa/data.py | 15 --- rasa/importers/multi_project.py | 2 +- tests/core/test_domain.py | 25 ++--- 7 files changed, 101 insertions(+), 76 deletions(-) create mode 100644 changelog/6132.feature.rst diff --git a/.github/scripts/mr_generate_summary.py b/.github/scripts/mr_generate_summary.py index c8d1bbc72c2d..ebfad6928ad9 100644 --- a/.github/scripts/mr_generate_summary.py +++ b/.github/scripts/mr_generate_summary.py @@ -32,6 +32,7 @@ def generate_json(file, task, data): return data + def read_results(file): with open(file) as json_file: data = json.load(json_file) @@ -53,7 +54,7 @@ def read_results(file): if f not in task_mapping.keys(): continue - data = generate_json(os.path.join(dirpath, f),task_mapping[f], data) + data = generate_json(os.path.join(dirpath, f), task_mapping[f], data) with open(SUMMARY_FILE, "w") as f: json.dump(data, f, sort_keys=True, indent=2) diff --git a/changelog/6132.feature.rst b/changelog/6132.feature.rst new file mode 100644 index 000000000000..b721782ca648 --- /dev/null +++ b/changelog/6132.feature.rst @@ -0,0 +1,5 @@ +Added possibility to split the domain into separate files. All YAML files under the path specified with ``--domain`` will be scanned for domain information (e.g. intents, actions, etc) and then combined into a single domain. + +The default value for ``--domain`` is still ``domain.yml``. + +Also, the default session expiration time is set to 60 minutes now. \ No newline at end of file diff --git a/rasa/cli/arguments/default_arguments.py b/rasa/cli/arguments/default_arguments.py index f4494f98146f..9431650c8b93 100644 --- a/rasa/cli/arguments/default_arguments.py +++ b/rasa/cli/arguments/default_arguments.py @@ -56,7 +56,9 @@ def add_domain_param( "--domain", type=str, default=DEFAULT_DOMAIN_PATH, - help="Domain specification (yml file).", + help="Domain specification. It can be a single 'yaml' file, or a directory " + "that contains several files with domain specification in it. The content " + "of these files will be read and merged together.", ) diff --git a/rasa/core/domain.py b/rasa/core/domain.py index 40953e3c99a5..8f1728b99d65 100644 --- a/rasa/core/domain.py +++ b/rasa/core/domain.py @@ -7,6 +7,8 @@ from pathlib import Path from typing import Any, Dict, List, NamedTuple, Optional, Set, Text, Tuple, Union +from ruamel.yaml import YAMLError + import rasa.core.constants from rasa.utils.common import ( raise_warning, @@ -19,6 +21,7 @@ DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION, DOMAIN_SCHEMA_FILE, DOCS_URL_DOMAINS, + DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, ) from rasa.core import utils from rasa.core.actions import action # pytype: disable=pyi-error @@ -47,6 +50,23 @@ USE_ENTITIES_KEY = "use_entities" IGNORE_ENTITIES_KEY = "ignore_entities" +KEY_SLOTS = "slots" +KEY_INTENTS = "intents" +KEY_ENTITIES = "entities" +KEY_RESPONSES = "responses" +KEY_ACTIONS = "actions" +KEY_FORMS = "forms" + +ALL_DOMAIN_KEYS = [ + KEY_SLOTS, + KEY_FORMS, + KEY_ACTIONS, + KEY_ENTITIES, + KEY_INTENTS, + KEY_RESPONSES, +] + + if typing.TYPE_CHECKING: from rasa.core.trackers import DialogueStateTracker @@ -69,7 +89,10 @@ class SessionConfig(NamedTuple): @staticmethod def default() -> "SessionConfig": # TODO: 2.0, reconsider how to apply sessions to old projects - return SessionConfig(0, DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION) + return SessionConfig( + DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES, + DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION, + ) def are_sessions_enabled(self) -> bool: return self.session_expiration_time > 0 @@ -134,7 +157,7 @@ def from_yaml(cls, yaml: Text) -> "Domain": @classmethod def from_dict(cls, data: Dict) -> "Domain": - utter_templates = cls.collect_templates(data.get("responses", {})) + utter_templates = cls.collect_templates(data.get(KEY_RESPONSES, {})) if "templates" in data: raise_warning( "Your domain file contains the key: 'templates'. This has been " @@ -146,54 +169,45 @@ def from_dict(cls, data: Dict) -> "Domain": ) utter_templates = cls.collect_templates(data.get("templates", {})) - slots = cls.collect_slots(data.get("slots", {})) + slots = cls.collect_slots(data.get(KEY_SLOTS, {})) additional_arguments = data.get("config", {}) session_config = cls._get_session_config(data.get(SESSION_CONFIG_KEY, {})) - intents = data.get("intents", {}) + intents = data.get(KEY_INTENTS, {}) return cls( intents, - data.get("entities", []), + data.get(KEY_ENTITIES, []), slots, utter_templates, - data.get("actions", []), - data.get("forms", []), + data.get(KEY_ACTIONS, []), + data.get(KEY_FORMS, []), session_config=session_config, **additional_arguments, ) @staticmethod def _get_session_config(session_config: Dict) -> SessionConfig: - session_expiration_time = session_config.get(SESSION_EXPIRATION_TIME_KEY) + session_expiration_time_min = session_config.get(SESSION_EXPIRATION_TIME_KEY) # TODO: 2.0 reconsider how to apply sessions to old projects and legacy trackers - if session_expiration_time is None: - raise_warning( - "No tracker session configuration was found in the loaded domain. " - "Domains without a session config will automatically receive a " - "session expiration time of 60 minutes in Rasa version 2.0 if not " - "configured otherwise.", - FutureWarning, - docs=DOCS_URL_DOMAINS + "#session-configuration", - ) - session_expiration_time = 0 + if session_expiration_time_min is None: + session_expiration_time_min = DEFAULT_SESSION_EXPIRATION_TIME_IN_MINUTES carry_over_slots = session_config.get( CARRY_OVER_SLOTS_KEY, DEFAULT_CARRY_OVER_SLOTS_TO_NEW_SESSION ) - return SessionConfig(session_expiration_time, carry_over_slots) + return SessionConfig(session_expiration_time_min, carry_over_slots) @classmethod def from_directory(cls, path: Text) -> "Domain": """Loads and merges multiple domain files recursively from a directory tree.""" - from rasa import data domain = Domain.empty() for root, _, files in os.walk(path, followlinks=True): for file in files: full_path = os.path.join(root, file) - if data.is_domain_file(full_path): + if Domain.is_domain_file(full_path): other = Domain.from_file(full_path) domain = other.merge(domain) @@ -236,20 +250,20 @@ def merge_lists(l1: List[Any], l2: List[Any]) -> List[Any]: combined[SESSION_CONFIG_KEY] = domain_dict[SESSION_CONFIG_KEY] # intents is list of dicts - intents_1 = {list(i.keys())[0]: i for i in combined["intents"]} - intents_2 = {list(i.keys())[0]: i for i in domain_dict["intents"]} + intents_1 = {list(i.keys())[0]: i for i in combined[KEY_INTENTS]} + intents_2 = {list(i.keys())[0]: i for i in domain_dict[KEY_INTENTS]} merged_intents = merge_dicts(intents_1, intents_2, override) - combined["intents"] = list(merged_intents.values()) + combined[KEY_INTENTS] = list(merged_intents.values()) # remove existing forms from new actions - for form in combined["forms"]: - if form in domain_dict["actions"]: - domain_dict["actions"].remove(form) + for form in combined[KEY_FORMS]: + if form in domain_dict[KEY_ACTIONS]: + domain_dict[KEY_ACTIONS].remove(form) - for key in ["entities", "actions", "forms"]: + for key in [KEY_ENTITIES, KEY_ACTIONS, KEY_FORMS]: combined[key] = merge_lists(combined[key], domain_dict[key]) - for key in ["responses", "slots"]: + for key in [KEY_RESPONSES, KEY_SLOTS]: combined[key] = merge_dicts(combined[key], domain_dict[key], override) return self.__class__.from_dict(combined) @@ -431,8 +445,8 @@ def __init__( def __hash__(self) -> int: self_as_dict = self.as_dict() - self_as_dict["intents"] = sort_list_of_dicts_by_first_key( - self_as_dict["intents"] + self_as_dict[KEY_INTENTS] = sort_list_of_dicts_by_first_key( + self_as_dict[KEY_INTENTS] ) self_as_string = json.dumps(self_as_dict, sort_keys=True) text_hash = utils.get_text_hash(self_as_string) @@ -774,12 +788,12 @@ def as_dict(self) -> Dict[Text, Any]: SESSION_EXPIRATION_TIME_KEY: self.session_config.session_expiration_time, CARRY_OVER_SLOTS_KEY: self.session_config.carry_over_slots, }, - "intents": self._transform_intents_for_file(), - "entities": self.entities, - "slots": self._slot_definitions(), - "responses": self.templates, - "actions": self.user_actions, # class names of the actions - "forms": self.form_names, + KEY_INTENTS: self._transform_intents_for_file(), + KEY_ENTITIES: self.entities, + KEY_SLOTS: self._slot_definitions(), + KEY_RESPONSES: self.templates, + KEY_ACTIONS: self.user_actions, # class names of the actions + KEY_FORMS: self.form_names, } def persist(self, filename: Union[Text, Path]) -> None: @@ -827,16 +841,16 @@ def cleaned_domain(self) -> Dict[Text, Any]: """ domain_data = self.as_dict() - for idx, intent_info in enumerate(domain_data["intents"]): + for idx, intent_info in enumerate(domain_data[KEY_INTENTS]): for name, intent in intent_info.items(): if intent.get(USE_ENTITIES_KEY) is True: del intent[USE_ENTITIES_KEY] if not intent.get(IGNORE_ENTITIES_KEY): intent.pop(IGNORE_ENTITIES_KEY, None) if len(intent) == 0: - domain_data["intents"][idx] = name + domain_data[KEY_INTENTS][idx] = name - for slot in domain_data["slots"].values(): # pytype: disable=attribute-error + for slot in domain_data[KEY_SLOTS].values(): # pytype: disable=attribute-error if slot["initial_value"] is None: del slot["initial_value"] if slot["auto_fill"]: @@ -1040,9 +1054,9 @@ def get_duplicate_exception_message( raise InvalidDomain( get_exception_message( [ - (duplicate_actions, "actions"), - (duplicate_slots, "slots"), - (duplicate_entities, "entities"), + (duplicate_actions, KEY_ACTIONS), + (duplicate_slots, KEY_SLOTS), + (duplicate_entities, KEY_ENTITIES), ], incorrect_mappings, ) @@ -1074,6 +1088,29 @@ def is_empty(self) -> bool: return self.as_dict() == Domain.empty().as_dict() + @staticmethod + def is_domain_file(filename: Text) -> bool: + """Checks whether the given file path is a Rasa domain file. + + Args: + filename: Path of the file which should be checked. + + Returns: + `True` if it's a domain file, otherwise `False`. + """ + from rasa.data import YAML_FILE_EXTENSIONS + + if not Path(filename).suffix in YAML_FILE_EXTENSIONS: + return False + try: + content = rasa.utils.io.read_yaml_file(filename) + if any(key in content for key in ALL_DOMAIN_KEYS): + return True + except YAMLError: + pass + + return False + class TemplateDomain(Domain): pass diff --git a/rasa/data.py b/rasa/data.py index 2041ddf1a519..fbcf7c9d7e45 100644 --- a/rasa/data.py +++ b/rasa/data.py @@ -185,21 +185,6 @@ def is_end_to_end_conversation_test_file(file_path: Text) -> bool: ) -def is_domain_file(file_path: Text) -> bool: - """Checks whether the given file path is a Rasa domain file. - - Args: - file_path: Path of the file which should be checked. - - Returns: - `True` if it's a domain file, otherwise `False`. - """ - - file_name = os.path.basename(file_path) - - return file_name in ["domain.yml", "domain.yaml"] - - def is_config_file(file_path: Text) -> bool: """Checks whether the given file path is a Rasa config file. diff --git a/rasa/importers/multi_project.py b/rasa/importers/multi_project.py index 381e3a80cd05..17dff5af01af 100644 --- a/rasa/importers/multi_project.py +++ b/rasa/importers/multi_project.py @@ -97,7 +97,7 @@ def _init_from_directory(self, path: Text): if data.is_end_to_end_conversation_test_file(full_path): self._e2e_story_paths.append(full_path) - elif data.is_domain_file(full_path): + elif Domain.is_domain_file(full_path): self._domain_paths.append(full_path) elif data.is_nlu_file(full_path): self._nlu_paths.append(full_path) diff --git a/tests/core/test_domain.py b/tests/core/test_domain.py index 4953d7282374..7fbaec1e2f49 100644 --- a/tests/core/test_domain.py +++ b/tests/core/test_domain.py @@ -430,29 +430,24 @@ def test_collect_intent_properties(intents, entities, intent_properties): def test_load_domain_from_directory_tree(tmpdir_factory: TempdirFactory): root = tmpdir_factory.mktemp("Parent Bot") root_domain = {"actions": ["utter_root", "utter_root2"]} - utils.dump_obj_as_yaml_to_file(root / "domain.yml", root_domain) + utils.dump_obj_as_yaml_to_file(root / "domain_pt1.yml", root_domain) subdirectory_1 = root / "Skill 1" subdirectory_1.mkdir() skill_1_domain = {"actions": ["utter_skill_1"]} - utils.dump_obj_as_yaml_to_file(subdirectory_1 / "domain.yml", skill_1_domain) + utils.dump_obj_as_yaml_to_file(subdirectory_1 / "domain_pt2.yml", skill_1_domain) subdirectory_2 = root / "Skill 2" subdirectory_2.mkdir() skill_2_domain = {"actions": ["utter_skill_2"]} - utils.dump_obj_as_yaml_to_file(subdirectory_2 / "domain.yml", skill_2_domain) + utils.dump_obj_as_yaml_to_file(subdirectory_2 / "domain_pt3.yml", skill_2_domain) subsubdirectory = subdirectory_2 / "Skill 2-1" subsubdirectory.mkdir() skill_2_1_domain = {"actions": ["utter_subskill", "utter_root"]} # Check if loading from `.yaml` also works - utils.dump_obj_as_yaml_to_file(subsubdirectory / "domain.yaml", skill_2_1_domain) - - subsubdirectory_2 = subdirectory_2 / "Skill 2-2" - subsubdirectory_2.mkdir() - excluded_domain = {"actions": ["should not be loaded"]} utils.dump_obj_as_yaml_to_file( - subsubdirectory_2 / "other_name.yaml", excluded_domain + subsubdirectory / "domain_pt4.yaml", skill_2_1_domain ) actual = Domain.load(str(root)) @@ -644,7 +639,7 @@ def test_clean_domain_for_file(): "actions": ["utter_default", "utter_goodbye", "utter_greet"], "session_config": { "carry_over_slots_to_new_session": True, - "session_expiration_time": 0, + "session_expiration_time": 60, }, } @@ -706,16 +701,16 @@ def test_add_knowledge_base_slots(default_domain): [ ( """session_config: - session_expiration_time: 0 + session_expiration_time: 60 carry_over_slots_to_new_session: true""", - 0, + 60, True, ), - ("", 0, True), + ("", 60, True), ( """session_config: carry_over_slots_to_new_session: false""", - 0, + 60, False, ), ( @@ -725,7 +720,7 @@ def test_add_knowledge_base_slots(default_domain): 20.2, False, ), - ("""session_config: {}""", 0, True), + ("""session_config: {}""", 60, True), ], ) def test_session_config(