From 100a1c9cc6d189d44225c25985877433f3ddc107 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Wed, 4 Sep 2019 11:08:07 +0200 Subject: [PATCH 01/45] Avoid retraining Core model if only Templates section is changed --- rasa/core/agent.py | 10 +++- rasa/core/train.py | 4 +- rasa/model.py | 126 ++++++++++++++++++++------------------- rasa/train.py | 23 +++++-- tests/core/test_model.py | 106 ++++++++++++++++++++------------ 5 files changed, 162 insertions(+), 107 deletions(-) diff --git a/rasa/core/agent.py b/rasa/core/agent.py index f4410b7bf6df..8be118add638 100644 --- a/rasa/core/agent.py +++ b/rasa/core/agent.py @@ -753,7 +753,12 @@ def _clear_model_directory(model_path: Text) -> None: "overwritten.".format(model_path) ) - def persist(self, model_path: Text, dump_flattened_stories: bool = False) -> None: + def persist( + self, + model_path: Text, + dump_flattened_stories: bool = False, + replace_templates_only: bool = False, + ) -> None: """Persists this agent into a directory for later loading and usage.""" if not self.is_core_ready(): @@ -764,7 +769,8 @@ def persist(self, model_path: Text, dump_flattened_stories: bool = False) -> Non self._clear_model_directory(model_path) - self.policy_ensemble.persist(model_path, dump_flattened_stories) + if not replace_templates_only: + self.policy_ensemble.persist(model_path, dump_flattened_stories) self.domain.persist(os.path.join(model_path, DEFAULT_DOMAIN_PATH)) self.domain.persist_specification(model_path) diff --git a/rasa/core/train.py b/rasa/core/train.py index 9fe1c7c83a2f..f4f7942d547c 100644 --- a/rasa/core/train.py +++ b/rasa/core/train.py @@ -27,6 +27,7 @@ async def train( dump_stories: bool = False, policy_config: Optional[Union[Text, Dict]] = None, exclusion_percentage: int = None, + replace_templates_only: bool = False, kwargs: Optional[Dict] = None, ): from rasa.core.agent import Agent @@ -59,12 +60,11 @@ async def train( "debug_plots", }, ) - training_data = await agent.load_data( training_resource, exclusion_percentage=exclusion_percentage, **data_load_args ) agent.train(training_data, **kwargs) - agent.persist(output_path, dump_stories) + agent.persist(output_path, dump_stories, replace_templates_only) return agent diff --git a/rasa/model.py b/rasa/model.py index f4b1375e728a..4b2e2c58af29 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -4,6 +4,7 @@ import shutil import tempfile import typing +from collections import namedtuple from typing import Text, Tuple, Union, Optional, List, Dict import rasa.utils.io @@ -22,23 +23,50 @@ if typing.TYPE_CHECKING: from rasa.importers.importer import TrainingDataImporter -# Type alias for the fingerprint -Fingerprint = Dict[Text, Union[Text, List[Text], int, float]] logger = logging.getLogger(__name__) + +# Type alias for the fingerprint +Fingerprint = Dict[Text, Union[Text, List[Text], int, float]] + FINGERPRINT_FILE_PATH = "fingerprint.json" FINGERPRINT_CONFIG_KEY = "config" FINGERPRINT_CONFIG_CORE_KEY = "core-config" FINGERPRINT_CONFIG_NLU_KEY = "nlu-config" -FINGERPRINT_DOMAIN_KEY = "domain" +FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY = "domain" +FINGERPRINT_TEMPLATES_KEY = "templates" FINGERPRINT_RASA_VERSION_KEY = "version" FINGERPRINT_STORIES_KEY = "stories" FINGERPRINT_NLU_DATA_KEY = "messages" FINGERPRINT_TRAINED_AT_KEY = "trained_at" +Section = namedtuple("Section", ["name", "relevant_keys"]) + +SECTION_CORE = Section( + name="Core model", + relevant_keys=[ + FINGERPRINT_CONFIG_KEY, + FINGERPRINT_CONFIG_CORE_KEY, + FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY, + FINGERPRINT_STORIES_KEY, + FINGERPRINT_RASA_VERSION_KEY, + ], +) +SECTION_NLU = Section( + name="NLU model", + relevant_keys=[ + FINGERPRINT_CONFIG_KEY, + FINGERPRINT_CONFIG_NLU_KEY, + FINGERPRINT_NLU_DATA_KEY, + FINGERPRINT_RASA_VERSION_KEY, + ], +) +SECTION_TEMPLATES = Section(name="Templates", relevant_keys=[FINGERPRINT_TEMPLATES_KEY]) + + def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: """Gets a model and unpacks it. Raises a `ModelNotFound` exception if no model could be found at the provided path. @@ -200,6 +228,8 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin The fingerprint. """ + from rasa.core.domain import Domain + import rasa import time @@ -208,6 +238,10 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin stories = await file_importer.get_stories() nlu_data = await file_importer.get_nlu_data() + domain_dict = domain.as_dict() + templates = domain_dict.pop("templates") + domain_without_templates = Domain.from_dict(domain_dict) + return { FINGERPRINT_CONFIG_KEY: _get_hash_of_config( config, exclude_keys=CONFIG_MANDATORY_KEYS @@ -218,7 +252,8 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin FINGERPRINT_CONFIG_NLU_KEY: _get_hash_of_config( config, include_keys=CONFIG_MANDATORY_KEYS_NLU ), - FINGERPRINT_DOMAIN_KEY: hash(domain), + FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY: hash(domain_without_templates), + FINGERPRINT_TEMPLATES_KEY: get_dict_hash(templates), FINGERPRINT_NLU_DATA_KEY: hash(nlu_data), FINGERPRINT_STORIES_KEY: hash(stories), FINGERPRINT_TRAINED_AT_KEY: time.time(), @@ -275,58 +310,13 @@ def persist_fingerprint(output_path: Text, fingerprint: Fingerprint): dump_obj_as_json_to_file(path, fingerprint) -def core_fingerprint_changed( - fingerprint1: Fingerprint, fingerprint2: Fingerprint +def section_fingerprint_changed( + fingerprint1: Fingerprint, fingerprint2: Fingerprint, section: Section ) -> bool: - """Checks whether the fingerprints of the Core model changed. - - Args: - fingerprint1: A fingerprint. - fingerprint2: Another fingerprint. - - Returns: - `True` if the fingerprint for the Core model changed, else `False`. - - """ - relevant_keys = [ - FINGERPRINT_CONFIG_KEY, - FINGERPRINT_CONFIG_CORE_KEY, - FINGERPRINT_DOMAIN_KEY, - FINGERPRINT_STORIES_KEY, - FINGERPRINT_RASA_VERSION_KEY, - ] - - for k in relevant_keys: - if fingerprint1.get(k) != fingerprint2.get(k): - logger.info("Data ({}) for Core model changed.".format(k)) - return True - return False - - -def nlu_fingerprint_changed( - fingerprint1: Fingerprint, fingerprint2: Fingerprint -) -> bool: - """Checks whether the fingerprints of the NLU model changed. - - Args: - fingerprint1: A fingerprint. - fingerprint2: Another fingerprint. - - Returns: - `True` if the fingerprint for the NLU model changed, else `False`. - - """ - - relevant_keys = [ - FINGERPRINT_CONFIG_KEY, - FINGERPRINT_CONFIG_NLU_KEY, - FINGERPRINT_NLU_DATA_KEY, - FINGERPRINT_RASA_VERSION_KEY, - ] - - for k in relevant_keys: + """Check whether the fingerprint of a section has changed.""" + for k in section.relevant_keys: if fingerprint1.get(k) != fingerprint2.get(k): - logger.info("Data ({}) for NLU model changed.".format(k)) + logger.info("Data ({}) for {} section changed.".format(k, section.name)) return True return False @@ -346,7 +336,7 @@ def merge_model(source: Text, target: Text) -> bool: shutil.move(source, target) return True except Exception as e: - logging.debug(e) + logging.debug("Could not merge model: {}".format(e)) return False @@ -363,25 +353,37 @@ def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Te to be retrained or not. """ - retrain_nlu = retrain_core = True + retrain_nlu = retrain_core = replace_templates = True if old_model is None or not os.path.exists(old_model): - return retrain_core, retrain_nlu + return retrain_core, retrain_nlu, replace_templates with unpack_model(old_model) as unpacked: last_fingerprint = fingerprint_from_path(unpacked) - old_core, old_nlu = get_model_subdirectories(unpacked) - if not core_fingerprint_changed(last_fingerprint, new_fingerprint): + retrain_core = section_fingerprint_changed( + last_fingerprint, new_fingerprint, SECTION_CORE + ) + replace_templates = section_fingerprint_changed( + last_fingerprint, new_fingerprint, SECTION_TEMPLATES + ) + retrain_nlu = section_fingerprint_changed( + last_fingerprint, new_fingerprint, SECTION_NLU + ) + + # If merging directories fails, force retrain + if not retrain_core: target_path = os.path.join(train_path, "core") retrain_core = not merge_model(old_core, target_path) - - if not nlu_fingerprint_changed(last_fingerprint, new_fingerprint): + else: + target_path = os.path.join(train_path, "core") + replace_templates = not merge_model(old_core, target_path) + if not retrain_nlu: target_path = os.path.join(train_path, "nlu") retrain_nlu = not merge_model(old_nlu, target_path) - return retrain_core, retrain_nlu + return retrain_core, retrain_nlu, replace_templates def package_model( diff --git a/rasa/train.py b/rasa/train.py index a520b38fb108..51668ced4d21 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -159,11 +159,11 @@ async def _train_async_internal( ) old_model = model.get_latest_model(output_path) - retrain_core, retrain_nlu = model.should_retrain( + retrain_core, retrain_nlu, replace_templates = model.should_retrain( new_fingerprint, old_model, train_path ) - if force_training or retrain_core or retrain_nlu: + if force_training or retrain_core or retrain_nlu or replace_templates: await _do_training( file_importer, output_path=output_path, @@ -171,6 +171,7 @@ async def _train_async_internal( force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, + replace_templates=replace_templates, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, @@ -197,17 +198,29 @@ async def _do_training( force_training: bool = False, retrain_core: bool = True, retrain_nlu: bool = True, + replace_templates: bool = True, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, ): - if force_training or retrain_core: + if retrain_core or replace_templates or force_training: + replace_templates_only = ( + replace_templates and not retrain_core and not force_training + ) + if replace_templates_only: + print_color( + "Core stories/configuration did not change. " + "Only the templates section has been changed. A new model with " + "the updated templates will be created.", + color=bcolors.OKBLUE, + ) await _train_core_with_validated_data( file_importer, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, + replace_templates_only=replace_templates_only, kwargs=kwargs, ) else: @@ -216,7 +229,7 @@ async def _do_training( color=bcolors.OKBLUE, ) - if force_training or retrain_nlu: + if retrain_nlu or force_training: await _train_nlu_with_validated_data( file_importer, output=output_path, @@ -314,6 +327,7 @@ async def _train_core_with_validated_data( output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, + replace_templates_only: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" @@ -337,6 +351,7 @@ async def _train_core_with_validated_data( training_resource=file_importer, output_path=os.path.join(_train_path, "core"), policy_config=config, + replace_templates_only=replace_templates_only, kwargs=kwargs, ) print_color("Core model training completed.", color=bcolors.OKBLUE) diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 40824f596984..90530d303016 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -2,7 +2,7 @@ import tempfile import time import shutil -from typing import Text, Optional +from typing import Text, Optional, List, Any import pytest from _pytest.tmpdir import TempdirFactory @@ -11,27 +11,31 @@ import rasa.core import rasa.nlu from rasa.importers.rasa import RasaFileImporter -from rasa.constants import DEFAULT_CONFIG_PATH, DEFAULT_DATA_PATH +from rasa.constants import DEFAULT_CONFIG_PATH, DEFAULT_DATA_PATH, DEFAULT_DOMAIN_PATH from rasa.core.domain import Domain +from rasa.core.utils import get_dict_hash from rasa.model import ( FINGERPRINT_CONFIG_KEY, - FINGERPRINT_DOMAIN_KEY, + FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY, + FINGERPRINT_TEMPLATES_KEY, FINGERPRINT_FILE_PATH, FINGERPRINT_NLU_DATA_KEY, FINGERPRINT_RASA_VERSION_KEY, FINGERPRINT_STORIES_KEY, FINGERPRINT_TRAINED_AT_KEY, - core_fingerprint_changed, + FINGERPRINT_CONFIG_CORE_KEY, + FINGERPRINT_CONFIG_NLU_KEY, + SECTION_CORE, + SECTION_NLU, + SECTION_TEMPLATES, create_package_rasa, get_latest_model, get_model, get_model_subdirectories, model_fingerprint, - nlu_fingerprint_changed, Fingerprint, + section_fingerprint_changed, should_retrain, - FINGERPRINT_CONFIG_CORE_KEY, - FINGERPRINT_CONFIG_NLU_KEY, ) from rasa.exceptions import ModelNotFound @@ -91,13 +95,14 @@ def test_get_model_from_directory_nlu_only(trained_model): def _fingerprint( - config: Optional[Text] = None, - config_nlu: Optional[Text] = None, - config_core: Optional[Text] = None, - domain: Optional[int] = None, + config: Optional[Any] = None, + config_nlu: Optional[Any] = None, + config_core: Optional[Any] = None, + domain: Optional[Any] = None, + templates: Optional[Any] = None, + stories: Optional[Any] = None, + nlu: Optional[Any] = None, rasa_version: Text = "1.0", - stories: Optional[int] = None, - nlu: Optional[int] = None, ): return { FINGERPRINT_CONFIG_KEY: config if config is not None else ["test"], @@ -105,7 +110,10 @@ def _fingerprint( if config_core is not None else ["test"], FINGERPRINT_CONFIG_NLU_KEY: config_nlu if config_nlu is not None else ["test"], - FINGERPRINT_DOMAIN_KEY: domain if domain is not None else ["test"], + FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY: domain + if domain is not None + else ["test"], + FINGERPRINT_TEMPLATES_KEY: templates if templates is not None else ["test"], FINGERPRINT_TRAINED_AT_KEY: time.time(), FINGERPRINT_RASA_VERSION_KEY: rasa_version, FINGERPRINT_STORIES_KEY: stories if stories is not None else ["test"], @@ -126,39 +134,50 @@ def test_persist_and_load_fingerprint(): @pytest.mark.parametrize( - "fingerprint2", + "fingerprint2, changed", [ - _fingerprint(config=["other"]), - _fingerprint(domain=["other"]), - _fingerprint(domain=Domain.empty()), - _fingerprint(stories=["test", "other"]), - _fingerprint(rasa_version="100"), - _fingerprint(config=["other"], domain=["other"]), + (_fingerprint(config=["other"]), True), + (_fingerprint(config_core=["other"]), True), + (_fingerprint(domain=["other"]), True), + (_fingerprint(domain=Domain.empty()), True), + (_fingerprint(stories=["test", "other"]), True), + (_fingerprint(rasa_version="100"), True), + (_fingerprint(config=["other"], domain=["other"]), True), + (_fingerprint(templates=["other"]), False), + (_fingerprint(nlu=["test", "other"]), False), + (_fingerprint(config_nlu=["other"]), False), ], ) -def test_core_fingerprint_changed(fingerprint2): +def test_core_fingerprint_changed(fingerprint2, changed): fingerprint1 = _fingerprint() - assert core_fingerprint_changed(fingerprint1, fingerprint2) + assert ( + section_fingerprint_changed(fingerprint1, fingerprint2, SECTION_CORE) is changed + ) @pytest.mark.parametrize( - "fingerprint2", + "fingerprint2, changed", [ - _fingerprint(config=["other"]), - _fingerprint(nlu=["test", "other"]), - _fingerprint(rasa_version="100"), - _fingerprint(rasa_version="100", config=["other"]), + (_fingerprint(config=["other"]), True), + (_fingerprint(nlu=["test", "other"]), True), + (_fingerprint(rasa_version="100"), True), + (_fingerprint(rasa_version="100", config=["other"]), True), + (_fingerprint(templates=["other"]), False), + (_fingerprint(config_core=["other"]), False), + (_fingerprint(stories=["other"]), False), ], ) -def test_nlu_fingerprint_changed(fingerprint2): +def test_nlu_fingerprint_changed(fingerprint2, changed): fingerprint1 = _fingerprint() - assert nlu_fingerprint_changed(fingerprint1, fingerprint2) + assert ( + section_fingerprint_changed(fingerprint1, fingerprint2, SECTION_NLU) is changed + ) def _project_files( project, config_file=DEFAULT_CONFIG_PATH, - domain="domain.yml", + domain=DEFAULT_DOMAIN_PATH, training_files=DEFAULT_DATA_PATH, ): paths = { @@ -192,9 +211,10 @@ async def test_create_fingerprint_from_invalid_paths(project, project_files): config_nlu="", config_core="", domain=hash(Domain.empty()), - rasa_version=rasa.__version__, + templates=get_dict_hash(Domain.empty().templates), stories=hash(StoryGraph([])), nlu=hash(TrainingData()), + rasa_version=rasa.__version__, ) actual = await model_fingerprint(project_files) @@ -240,48 +260,62 @@ async def test_rasa_packaging(trained_model, project, use_fingerprint): "old": _fingerprint(stories=["others"]), "retrain_core": True, "retrain_nlu": False, + "replace_templates": False, }, { "new": _fingerprint(nlu=["others"]), "old": _fingerprint(), "retrain_core": False, "retrain_nlu": True, + "replace_templates": False, }, { "new": _fingerprint(config="others"), "old": _fingerprint(), "retrain_core": True, "retrain_nlu": True, + "replace_templates": False, }, { "new": _fingerprint(config_core="others"), "old": _fingerprint(), "retrain_core": True, "retrain_nlu": False, + "replace_templates": False, }, { "new": _fingerprint(), "old": _fingerprint(config_nlu="others"), "retrain_core": False, "retrain_nlu": True, + "replace_templates": False, }, { "new": _fingerprint(), "old": _fingerprint(), "retrain_core": False, "retrain_nlu": False, + "replace_templates": False, + }, + { + "new": _fingerprint(), + "old": _fingerprint(templates=["others"]), + "retrain_core": False, + "retrain_nlu": False, + "replace_templates": True, }, ], ) -def test_should_retrain(trained_model, fingerprint): +def test_should_retrain(trained_model: Text, fingerprint: Fingerprint): old_model = set_fingerprint(trained_model, fingerprint["old"]) - retrain_core, retrain_nlu = should_retrain( + retrain_core, retrain_nlu, replace_templates = should_retrain( fingerprint["new"], old_model, tempfile.mkdtemp() ) assert retrain_core == fingerprint["retrain_core"] assert retrain_nlu == fingerprint["retrain_nlu"] + assert replace_templates == fingerprint["replace_templates"] def set_fingerprint( @@ -290,9 +324,7 @@ def set_fingerprint( unpacked_model_path = get_model(trained_model) os.remove(os.path.join(unpacked_model_path, FINGERPRINT_FILE_PATH)) - if use_fingerprint: - fingerprint = fingerprint - else: + if not use_fingerprint: fingerprint = None tempdir = tempfile.mkdtemp() From 482388b0d69bc0befdd3827e88a2f6f131729c0d Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Thu, 5 Sep 2019 01:08:29 +0200 Subject: [PATCH 02/45] Clean up documentation --- rasa/model.py | 27 +++++++++++++++------------ tests/core/test_model.py | 6 +----- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index 4b2e2c58af29..1f4437c74292 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -68,7 +68,7 @@ def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: - """Gets a model and unpacks it. Raises a `ModelNotFound` exception if + """Get a model and unpacks it. Raises a `ModelNotFound` exception if no model could be found at the provided path. Args: @@ -99,7 +99,7 @@ def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: def get_latest_model(model_path: Text = DEFAULT_MODELS_PATH) -> Optional[Text]: - """Gets the latest model from a path. + """Get the latest model from a path. Args: model_path: Path to a directory containing zipped models. @@ -122,7 +122,7 @@ def get_latest_model(model_path: Text = DEFAULT_MODELS_PATH) -> Optional[Text]: def unpack_model( model_file: Text, working_directory: Optional[Text] = None ) -> TempDirectoryPath: - """Unpacks a zipped Rasa model. + """Unpack a zipped Rasa model. Args: model_file: Path to zipped model. @@ -154,7 +154,7 @@ def unpack_model( def get_model_subdirectories( unpacked_model_path: Text ) -> Tuple[Optional[Text], Optional[Text]]: - """Returns paths for Core and NLU model directories, if they exist. + """Return paths for Core and NLU model directories, if they exist. If neither directories exist, a `ModelNotFound` exception is raised. Args: @@ -189,7 +189,7 @@ def create_package_rasa( output_filename: Text, fingerprint: Optional[Fingerprint] = None, ) -> Text: - """Creates a zipped Rasa model from trained model files. + """Create a zipped Rasa model from trained model files. Args: training_directory: Path to the directory which contains the trained @@ -219,7 +219,7 @@ def create_package_rasa( async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprint: - """Creates a model fingerprint from its used configuration and training data. + """Create a model fingerprint from its used configuration and training data. Args: file_importer: File importer which provides the training data and model config. @@ -277,7 +277,7 @@ def _get_hash_of_config( def fingerprint_from_path(model_path: Text) -> Fingerprint: - """Loads a persisted fingerprint. + """Load a persisted fingerprint. Args: model_path: Path to directory containing the fingerprint. @@ -297,7 +297,7 @@ def fingerprint_from_path(model_path: Text) -> Fingerprint: def persist_fingerprint(output_path: Text, fingerprint: Fingerprint): - """Persists a model fingerprint. + """Persist a model fingerprint. Args: output_path: Directory in which the fingerprint should be saved. @@ -322,7 +322,7 @@ def section_fingerprint_changed( def merge_model(source: Text, target: Text) -> bool: - """Merges two model directories. + """Merge two model directories. Args: source: The original folder which should be merged in another. @@ -341,7 +341,7 @@ def merge_model(source: Text, target: Text) -> bool: def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Text): - """Checks which component of a model should be retrained. + """Check which components of a model should be retrained. Args: new_fingerprint: The fingerprint of the new model to be trained. @@ -372,11 +372,14 @@ def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Te last_fingerprint, new_fingerprint, SECTION_NLU ) - # If merging directories fails, force retrain + # If merging directories fails, force retrain. if not retrain_core: target_path = os.path.join(train_path, "core") retrain_core = not merge_model(old_core, target_path) else: + # In the case of replace_templates, only do it if the whole + # of Core is not being retrained. If it is, then replacing of + # templates will be automatically taken care of during that process. target_path = os.path.join(train_path, "core") replace_templates = not merge_model(old_core, target_path) if not retrain_nlu: @@ -394,7 +397,7 @@ def package_model( model_prefix: Text = "", ): """ - Compresses a trained model. + Compress a trained model. Args: fingerprint: fingerprint of the model diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 90530d303016..0e282c84cc13 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -318,14 +318,10 @@ def test_should_retrain(trained_model: Text, fingerprint: Fingerprint): assert replace_templates == fingerprint["replace_templates"] -def set_fingerprint( - trained_model: Text, fingerprint: Fingerprint, use_fingerprint: bool = True -) -> Text: +def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: unpacked_model_path = get_model(trained_model) os.remove(os.path.join(unpacked_model_path, FINGERPRINT_FILE_PATH)) - if not use_fingerprint: - fingerprint = None tempdir = tempfile.mkdtemp() output_path = os.path.join(tempdir, "test.tar.gz") From eaf077da2e0051b5c66f73e8c1646f1b00cf173a Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Thu, 5 Sep 2019 13:33:21 +0200 Subject: [PATCH 03/45] Update CHANGELOG --- CHANGELOG.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fbd0ebdcace3..d50a7b3b64c2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,13 @@ Rasa Change Log All notable changes to this project will be documented in this file. This project adheres to `Semantic Versioning`_ starting with version 1.0. +[Unreleased 1.3.1] +^^^^^^^^^^^^^^^^^^ + +Changed +----- +- Do not retrain the entire Core model if only the Templates section of the domain is changed. + [1.3.0] - 2019-09-05 ^^^^^^^^^^^^^^^^^^^^ From 1efa65570ede5d0d4fe1d15a66783ffc46262e0e Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Fri, 6 Sep 2019 09:09:13 +0200 Subject: [PATCH 04/45] Fix underline --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d50a7b3b64c2..6837695b2b7e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,7 +11,7 @@ This project adheres to `Semantic Versioning`_ starting with version 1.0. ^^^^^^^^^^^^^^^^^^ Changed ------ +------- - Do not retrain the entire Core model if only the Templates section of the domain is changed. [1.3.0] - 2019-09-05 From f43ee78b8bff1badf7a7b914b0941ecabfd27c08 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Fri, 4 Oct 2019 13:40:28 +0200 Subject: [PATCH 05/45] 1. Rename replace_templates to retrain_nlg 2. Short-circuit flag before reaching the actual train method --- rasa/core/agent.py | 4 +--- rasa/core/train.py | 3 +-- rasa/model.py | 26 +++++++++++++------------- rasa/train.py | 39 +++++++++++++++++++++------------------ tests/core/test_model.py | 26 +++++++++++++------------- 5 files changed, 49 insertions(+), 49 deletions(-) diff --git a/rasa/core/agent.py b/rasa/core/agent.py index 75d4892e2742..cf9f41763c2a 100644 --- a/rasa/core/agent.py +++ b/rasa/core/agent.py @@ -760,7 +760,6 @@ def persist( self, model_path: Text, dump_flattened_stories: bool = False, - replace_templates_only: bool = False, ) -> None: """Persists this agent into a directory for later loading and usage.""" @@ -772,8 +771,7 @@ def persist( self._clear_model_directory(model_path) - if not replace_templates_only: - self.policy_ensemble.persist(model_path, dump_flattened_stories) + self.policy_ensemble.persist(model_path, dump_flattened_stories) self.domain.persist(os.path.join(model_path, DEFAULT_DOMAIN_PATH)) self.domain.persist_specification(model_path) diff --git a/rasa/core/train.py b/rasa/core/train.py index f4f7942d547c..3455591f11d2 100644 --- a/rasa/core/train.py +++ b/rasa/core/train.py @@ -27,7 +27,6 @@ async def train( dump_stories: bool = False, policy_config: Optional[Union[Text, Dict]] = None, exclusion_percentage: int = None, - replace_templates_only: bool = False, kwargs: Optional[Dict] = None, ): from rasa.core.agent import Agent @@ -64,7 +63,7 @@ async def train( training_resource, exclusion_percentage=exclusion_percentage, **data_load_args ) agent.train(training_data, **kwargs) - agent.persist(output_path, dump_stories, replace_templates_only) + agent.persist(output_path, dump_stories) return agent diff --git a/rasa/model.py b/rasa/model.py index 1f4437c74292..20c1af2be577 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -35,8 +35,8 @@ FINGERPRINT_CONFIG_KEY = "config" FINGERPRINT_CONFIG_CORE_KEY = "core-config" FINGERPRINT_CONFIG_NLU_KEY = "nlu-config" -FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY = "domain" -FINGERPRINT_TEMPLATES_KEY = "templates" +FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY = "domain" +FINGERPRINT_NLG_KEY = "templates" FINGERPRINT_RASA_VERSION_KEY = "version" FINGERPRINT_STORIES_KEY = "stories" FINGERPRINT_NLU_DATA_KEY = "messages" @@ -50,7 +50,7 @@ relevant_keys=[ FINGERPRINT_CONFIG_KEY, FINGERPRINT_CONFIG_CORE_KEY, - FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY, + FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY, FINGERPRINT_STORIES_KEY, FINGERPRINT_RASA_VERSION_KEY, ], @@ -64,7 +64,7 @@ FINGERPRINT_RASA_VERSION_KEY, ], ) -SECTION_TEMPLATES = Section(name="Templates", relevant_keys=[FINGERPRINT_TEMPLATES_KEY]) +SECTION_TEMPLATES = Section(name="Templates", relevant_keys=[FINGERPRINT_NLG_KEY]) def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: @@ -240,7 +240,7 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin domain_dict = domain.as_dict() templates = domain_dict.pop("templates") - domain_without_templates = Domain.from_dict(domain_dict) + domain_without_nlg = Domain.from_dict(domain_dict) return { FINGERPRINT_CONFIG_KEY: _get_hash_of_config( @@ -252,8 +252,8 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin FINGERPRINT_CONFIG_NLU_KEY: _get_hash_of_config( config, include_keys=CONFIG_MANDATORY_KEYS_NLU ), - FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY: hash(domain_without_templates), - FINGERPRINT_TEMPLATES_KEY: get_dict_hash(templates), + FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY: hash(domain_without_nlg), + FINGERPRINT_NLG_KEY: get_dict_hash(templates), FINGERPRINT_NLU_DATA_KEY: hash(nlu_data), FINGERPRINT_STORIES_KEY: hash(stories), FINGERPRINT_TRAINED_AT_KEY: time.time(), @@ -353,10 +353,10 @@ def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Te to be retrained or not. """ - retrain_nlu = retrain_core = replace_templates = True + retrain_nlu = retrain_core = retrain_nlg = True if old_model is None or not os.path.exists(old_model): - return retrain_core, retrain_nlu, replace_templates + return retrain_core, retrain_nlu, retrain_nlg with unpack_model(old_model) as unpacked: last_fingerprint = fingerprint_from_path(unpacked) @@ -365,7 +365,7 @@ def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Te retrain_core = section_fingerprint_changed( last_fingerprint, new_fingerprint, SECTION_CORE ) - replace_templates = section_fingerprint_changed( + retrain_nlg = section_fingerprint_changed( last_fingerprint, new_fingerprint, SECTION_TEMPLATES ) retrain_nlu = section_fingerprint_changed( @@ -377,16 +377,16 @@ def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Te target_path = os.path.join(train_path, "core") retrain_core = not merge_model(old_core, target_path) else: - # In the case of replace_templates, only do it if the whole + # In the case of retrain_nlg, only do it if the whole # of Core is not being retrained. If it is, then replacing of # templates will be automatically taken care of during that process. target_path = os.path.join(train_path, "core") - replace_templates = not merge_model(old_core, target_path) + retrain_nlg = not merge_model(old_core, target_path) if not retrain_nlu: target_path = os.path.join(train_path, "nlu") retrain_nlu = not merge_model(old_nlu, target_path) - return retrain_core, retrain_nlu, replace_templates + return retrain_core, retrain_nlu, retrain_nlg def package_model( diff --git a/rasa/train.py b/rasa/train.py index e1111bfeb719..42a76b4472fd 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -6,6 +6,7 @@ from rasa.importers.importer import TrainingDataImporter from rasa import model +from rasa.constants import DEFAULT_DOMAIN_PATH from rasa.core.domain import Domain from rasa.utils.common import TempDirectoryPath @@ -158,11 +159,11 @@ async def _train_async_internal( ) old_model = model.get_latest_model(output_path) - retrain_core, retrain_nlu, replace_templates = model.should_retrain( + retrain_core, retrain_nlu, retrain_nlg = model.should_retrain( new_fingerprint, old_model, train_path ) - if force_training or retrain_core or retrain_nlu or replace_templates: + if force_training or retrain_core or retrain_nlu or retrain_nlg: await _do_training( file_importer, output_path=output_path, @@ -170,7 +171,7 @@ async def _train_async_internal( force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, - replace_templates=replace_templates, + retrain_nlg=retrain_nlg, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, @@ -197,31 +198,35 @@ async def _do_training( force_training: bool = False, retrain_core: bool = True, retrain_nlu: bool = True, - replace_templates: bool = True, + retrain_nlg: bool = True, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, ): - if retrain_core or replace_templates or force_training: - replace_templates_only = ( - replace_templates and not retrain_core and not force_training + if retrain_core or retrain_nlg or force_training: + retrain_nlg_only = ( + retrain_nlg and not retrain_core and not force_training ) - if replace_templates_only: + if retrain_nlg_only: print_color( "Core stories/configuration did not change. " "Only the templates section has been changed. A new model with " "the updated templates will be created.", color=bcolors.OKBLUE, ) - await _train_core_with_validated_data( - file_importer, - output=output_path, - train_path=train_path, - fixed_model_name=fixed_model_name, - replace_templates_only=replace_templates_only, - kwargs=kwargs, - ) + model_path = os.path.join(train_path, "core") + domain = await file_importer.get_domain() + domain.persist(os.path.join(model_path, DEFAULT_DOMAIN_PATH)) + domain.persist_specification(model_path) + else: + await _train_core_with_validated_data( + file_importer, + output=output_path, + train_path=train_path, + fixed_model_name=fixed_model_name, + kwargs=kwargs, + ) else: print_color( "Core stories/configuration did not change. No need to retrain Core model.", @@ -326,7 +331,6 @@ async def _train_core_with_validated_data( output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, - replace_templates_only: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" @@ -350,7 +354,6 @@ async def _train_core_with_validated_data( training_resource=file_importer, output_path=os.path.join(_train_path, "core"), policy_config=config, - replace_templates_only=replace_templates_only, kwargs=kwargs, ) print_color("Core model training completed.", color=bcolors.OKBLUE) diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 0e282c84cc13..0bb5f157920b 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -16,8 +16,8 @@ from rasa.core.utils import get_dict_hash from rasa.model import ( FINGERPRINT_CONFIG_KEY, - FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY, - FINGERPRINT_TEMPLATES_KEY, + FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY, + FINGERPRINT_NLG_KEY, FINGERPRINT_FILE_PATH, FINGERPRINT_NLU_DATA_KEY, FINGERPRINT_RASA_VERSION_KEY, @@ -110,10 +110,10 @@ def _fingerprint( if config_core is not None else ["test"], FINGERPRINT_CONFIG_NLU_KEY: config_nlu if config_nlu is not None else ["test"], - FINGERPRINT_DOMAIN_WITHOUT_TEMPLATES_KEY: domain + FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY: domain if domain is not None else ["test"], - FINGERPRINT_TEMPLATES_KEY: templates if templates is not None else ["test"], + FINGERPRINT_NLG_KEY: templates if templates is not None else ["test"], FINGERPRINT_TRAINED_AT_KEY: time.time(), FINGERPRINT_RASA_VERSION_KEY: rasa_version, FINGERPRINT_STORIES_KEY: stories if stories is not None else ["test"], @@ -260,62 +260,62 @@ async def test_rasa_packaging(trained_model, project, use_fingerprint): "old": _fingerprint(stories=["others"]), "retrain_core": True, "retrain_nlu": False, - "replace_templates": False, + "retrain_nlg": False, }, { "new": _fingerprint(nlu=["others"]), "old": _fingerprint(), "retrain_core": False, "retrain_nlu": True, - "replace_templates": False, + "retrain_nlg": False, }, { "new": _fingerprint(config="others"), "old": _fingerprint(), "retrain_core": True, "retrain_nlu": True, - "replace_templates": False, + "retrain_nlg": False, }, { "new": _fingerprint(config_core="others"), "old": _fingerprint(), "retrain_core": True, "retrain_nlu": False, - "replace_templates": False, + "retrain_nlg": False, }, { "new": _fingerprint(), "old": _fingerprint(config_nlu="others"), "retrain_core": False, "retrain_nlu": True, - "replace_templates": False, + "retrain_nlg": False, }, { "new": _fingerprint(), "old": _fingerprint(), "retrain_core": False, "retrain_nlu": False, - "replace_templates": False, + "retrain_nlg": False, }, { "new": _fingerprint(), "old": _fingerprint(templates=["others"]), "retrain_core": False, "retrain_nlu": False, - "replace_templates": True, + "retrain_nlg": True, }, ], ) def test_should_retrain(trained_model: Text, fingerprint: Fingerprint): old_model = set_fingerprint(trained_model, fingerprint["old"]) - retrain_core, retrain_nlu, replace_templates = should_retrain( + retrain_core, retrain_nlu, retrain_nlg = should_retrain( fingerprint["new"], old_model, tempfile.mkdtemp() ) assert retrain_core == fingerprint["retrain_core"] assert retrain_nlu == fingerprint["retrain_nlu"] - assert replace_templates == fingerprint["replace_templates"] + assert retrain_nlg == fingerprint["retrain_nlg"] def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: From 1aa84e3245ba3901d70ab837df5b65c15868afc5 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Fri, 4 Oct 2019 13:45:42 +0200 Subject: [PATCH 06/45] Rename section --- rasa/model.py | 6 +++--- tests/core/test_model.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index 20c1af2be577..4ca6b7410310 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -36,7 +36,7 @@ FINGERPRINT_CONFIG_CORE_KEY = "core-config" FINGERPRINT_CONFIG_NLU_KEY = "nlu-config" FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY = "domain" -FINGERPRINT_NLG_KEY = "templates" +FINGERPRINT_NLG_KEY = "nlg" FINGERPRINT_RASA_VERSION_KEY = "version" FINGERPRINT_STORIES_KEY = "stories" FINGERPRINT_NLU_DATA_KEY = "messages" @@ -64,7 +64,7 @@ FINGERPRINT_RASA_VERSION_KEY, ], ) -SECTION_TEMPLATES = Section(name="Templates", relevant_keys=[FINGERPRINT_NLG_KEY]) +SECTION_NLG = Section(name="NLG Templates", relevant_keys=[FINGERPRINT_NLG_KEY]) def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: @@ -366,7 +366,7 @@ def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Te last_fingerprint, new_fingerprint, SECTION_CORE ) retrain_nlg = section_fingerprint_changed( - last_fingerprint, new_fingerprint, SECTION_TEMPLATES + last_fingerprint, new_fingerprint, SECTION_NLG ) retrain_nlu = section_fingerprint_changed( last_fingerprint, new_fingerprint, SECTION_NLU diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 0bb5f157920b..9faa544950d2 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -27,7 +27,7 @@ FINGERPRINT_CONFIG_NLU_KEY, SECTION_CORE, SECTION_NLU, - SECTION_TEMPLATES, + SECTION_NLG, create_package_rasa, get_latest_model, get_model, From 7471e85278159a6a79de86f1d7506239bff19460 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Fri, 4 Oct 2019 13:47:04 +0200 Subject: [PATCH 07/45] Rename test variables to nlg --- tests/core/test_model.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 9faa544950d2..4bbc7f391d04 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -99,7 +99,7 @@ def _fingerprint( config_nlu: Optional[Any] = None, config_core: Optional[Any] = None, domain: Optional[Any] = None, - templates: Optional[Any] = None, + nlg: Optional[Any] = None, stories: Optional[Any] = None, nlu: Optional[Any] = None, rasa_version: Text = "1.0", @@ -113,7 +113,7 @@ def _fingerprint( FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY: domain if domain is not None else ["test"], - FINGERPRINT_NLG_KEY: templates if templates is not None else ["test"], + FINGERPRINT_NLG_KEY: nlg if nlg is not None else ["test"], FINGERPRINT_TRAINED_AT_KEY: time.time(), FINGERPRINT_RASA_VERSION_KEY: rasa_version, FINGERPRINT_STORIES_KEY: stories if stories is not None else ["test"], @@ -143,7 +143,7 @@ def test_persist_and_load_fingerprint(): (_fingerprint(stories=["test", "other"]), True), (_fingerprint(rasa_version="100"), True), (_fingerprint(config=["other"], domain=["other"]), True), - (_fingerprint(templates=["other"]), False), + (_fingerprint(nlg=["other"]), False), (_fingerprint(nlu=["test", "other"]), False), (_fingerprint(config_nlu=["other"]), False), ], @@ -162,7 +162,7 @@ def test_core_fingerprint_changed(fingerprint2, changed): (_fingerprint(nlu=["test", "other"]), True), (_fingerprint(rasa_version="100"), True), (_fingerprint(rasa_version="100", config=["other"]), True), - (_fingerprint(templates=["other"]), False), + (_fingerprint(nlg=["other"]), False), (_fingerprint(config_core=["other"]), False), (_fingerprint(stories=["other"]), False), ], @@ -211,7 +211,7 @@ async def test_create_fingerprint_from_invalid_paths(project, project_files): config_nlu="", config_core="", domain=hash(Domain.empty()), - templates=get_dict_hash(Domain.empty().templates), + nlg=get_dict_hash(Domain.empty().templates), stories=hash(StoryGraph([])), nlu=hash(TrainingData()), rasa_version=rasa.__version__, @@ -299,7 +299,7 @@ async def test_rasa_packaging(trained_model, project, use_fingerprint): }, { "new": _fingerprint(), - "old": _fingerprint(templates=["others"]), + "old": _fingerprint(nlg=["others"]), "retrain_core": False, "retrain_nlu": False, "retrain_nlg": True, From 986a3ac9b15e1919979d033afaea11a05872d94d Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Fri, 4 Oct 2019 14:10:44 +0200 Subject: [PATCH 08/45] Black formatting --- rasa/core/agent.py | 6 +----- rasa/train.py | 4 +--- tests/core/test_model.py | 4 +--- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/rasa/core/agent.py b/rasa/core/agent.py index cf9f41763c2a..85139d62da81 100644 --- a/rasa/core/agent.py +++ b/rasa/core/agent.py @@ -756,11 +756,7 @@ def _clear_model_directory(model_path: Text) -> None: "overwritten.".format(model_path) ) - def persist( - self, - model_path: Text, - dump_flattened_stories: bool = False, - ) -> None: + def persist(self, model_path: Text, dump_flattened_stories: bool = False) -> None: """Persists this agent into a directory for later loading and usage.""" if not self.is_core_ready(): diff --git a/rasa/train.py b/rasa/train.py index 42a76b4472fd..247975dbb613 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -205,9 +205,7 @@ async def _do_training( ): if retrain_core or retrain_nlg or force_training: - retrain_nlg_only = ( - retrain_nlg and not retrain_core and not force_training - ) + retrain_nlg_only = retrain_nlg and not retrain_core and not force_training if retrain_nlg_only: print_color( "Core stories/configuration did not change. " diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 4bbc7f391d04..76d80263e3e4 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -110,9 +110,7 @@ def _fingerprint( if config_core is not None else ["test"], FINGERPRINT_CONFIG_NLU_KEY: config_nlu if config_nlu is not None else ["test"], - FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY: domain - if domain is not None - else ["test"], + FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY: domain if domain is not None else ["test"], FINGERPRINT_NLG_KEY: nlg if nlg is not None else ["test"], FINGERPRINT_TRAINED_AT_KEY: time.time(), FINGERPRINT_RASA_VERSION_KEY: rasa_version, From dcc05ff35e09b4fff9c975366fc3d6d25b476066 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Wed, 9 Oct 2019 17:43:50 +0200 Subject: [PATCH 09/45] Apply suggestions from code review Co-Authored-By: Tobias Wochinger --- CHANGELOG.rst | 2 +- rasa/model.py | 2 +- rasa/train.py | 2 +- tests/core/test_model.py | 3 +-- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 37a172f30010..8ae0de303bd4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,7 +12,7 @@ This project adheres to `Semantic Versioning`_ starting with version 1.0. Fixed ----- -- Do not retrain the entire Core model if only the Templates section of the domain is changed. +- Do not retrain the entire Core model if only the ``templates`` section of the domain is changed. - Fixed error ``Object of type 'MaxHistoryTrackerFeaturizer' is not JSON serializable`` when running ``rasa train core`` diff --git a/rasa/model.py b/rasa/model.py index 4ca6b7410310..26f247d124ac 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -68,7 +68,7 @@ def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: - """Get a model and unpacks it. Raises a `ModelNotFound` exception if + """Get a model and unpack it. Raises a `ModelNotFound` exception if no model could be found at the provided path. Args: diff --git a/rasa/train.py b/rasa/train.py index 247975dbb613..2635db31405e 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -163,7 +163,7 @@ async def _train_async_internal( new_fingerprint, old_model, train_path ) - if force_training or retrain_core or retrain_nlu or retrain_nlg: + if any([force_training, retrain_core, retrain_nlu, retrain_nlg]): await _do_training( file_importer, output_path=output_path, diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 76d80263e3e4..4c107315b5cd 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -2,7 +2,7 @@ import tempfile import time import shutil -from typing import Text, Optional, List, Any +from typing import Text, Optional, Any import pytest from _pytest.tmpdir import TempdirFactory @@ -27,7 +27,6 @@ FINGERPRINT_CONFIG_NLU_KEY, SECTION_CORE, SECTION_NLU, - SECTION_NLG, create_package_rasa, get_latest_model, get_model, From 5cdf99c30c6328b8668fa1c0c8f45e75eab3793a Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Thu, 10 Oct 2019 13:54:41 +0200 Subject: [PATCH 10/45] Update rasa/model.py Co-Authored-By: Tobias Wochinger --- rasa/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/model.py b/rasa/model.py index 26f247d124ac..6acda0891b77 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -340,7 +340,7 @@ def merge_model(source: Text, target: Text) -> bool: return False -def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Text): +def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Text) -> Tuple[bool, bool, bool]: """Check which components of a model should be retrained. Args: From 406c4547f89fa6a50def825dad00642e086bbaf2 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Thu, 10 Oct 2019 14:50:34 +0200 Subject: [PATCH 11/45] Move into separate function --- rasa/train.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/rasa/train.py b/rasa/train.py index 247975dbb613..cf765277f6c3 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -191,6 +191,15 @@ async def _train_async_internal( return old_model +async def _update_domain( + file_importer: TrainingDataImporter, train_path: Text +) -> None: + model_path = os.path.join(train_path, "core") + domain = await file_importer.get_domain() + domain.persist(os.path.join(model_path, DEFAULT_DOMAIN_PATH)) + domain.persist_specification(model_path) + + async def _do_training( file_importer: TrainingDataImporter, output_path: Text, @@ -213,10 +222,7 @@ async def _do_training( "the updated templates will be created.", color=bcolors.OKBLUE, ) - model_path = os.path.join(train_path, "core") - domain = await file_importer.get_domain() - domain.persist(os.path.join(model_path, DEFAULT_DOMAIN_PATH)) - domain.persist_specification(model_path) + await _update_domain(file_importer, train_path) else: await _train_core_with_validated_data( file_importer, From 1d30aec0a6e784806bd2171c443ab4a2709de7bb Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Thu, 10 Oct 2019 15:01:26 +0200 Subject: [PATCH 12/45] Black formatting --- rasa/train.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rasa/train.py b/rasa/train.py index cf765277f6c3..abe4d52a7a68 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -191,9 +191,7 @@ async def _train_async_internal( return old_model -async def _update_domain( - file_importer: TrainingDataImporter, train_path: Text -) -> None: +async def _update_domain(file_importer: TrainingDataImporter, train_path: Text) -> None: model_path = os.path.join(train_path, "core") domain = await file_importer.get_domain() domain.persist(os.path.join(model_path, DEFAULT_DOMAIN_PATH)) From 7b5880bd8fc13d973aecb29231865746c403afd3 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Thu, 10 Oct 2019 15:26:34 +0200 Subject: [PATCH 13/45] Simplify condition --- rasa/train.py | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/rasa/train.py b/rasa/train.py index b3ab5b447811..55d96afa3e57 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -128,8 +128,6 @@ async def _train_async_internal( Returns: Path of the trained model archive. """ - new_fingerprint = await model.model_fingerprint(file_importer) - stories = await file_importer.get_stories() nlu_data = await file_importer.get_nlu_data() @@ -158,10 +156,13 @@ async def _train_async_internal( kwargs=kwargs, ) + new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) - retrain_core, retrain_nlu, retrain_nlg = model.should_retrain( - new_fingerprint, old_model, train_path - ) + retrain_core = retrain_nlu = retrain_nlg = False + if not force_training: + retrain_core, retrain_nlu, retrain_nlg = model.should_retrain( + new_fingerprint, old_model, train_path + ) if any([force_training, retrain_core, retrain_nlu, retrain_nlg]): await _do_training( @@ -211,24 +212,22 @@ async def _do_training( kwargs: Optional[Dict] = None, ): - if retrain_core or retrain_nlg or force_training: - retrain_nlg_only = retrain_nlg and not retrain_core and not force_training - if retrain_nlg_only: - print_color( - "Core stories/configuration did not change. " - "Only the templates section has been changed. A new model with " - "the updated templates will be created.", - color=bcolors.OKBLUE, - ) - await _update_domain(file_importer, train_path) - else: - await _train_core_with_validated_data( - file_importer, - output=output_path, - train_path=train_path, - fixed_model_name=fixed_model_name, - kwargs=kwargs, - ) + if force_training or retrain_core: + await _train_core_with_validated_data( + file_importer, + output=output_path, + train_path=train_path, + fixed_model_name=fixed_model_name, + kwargs=kwargs, + ) + elif retrain_nlg: + print_color( + "Core stories/configuration did not change. " + "Only the templates section has been changed. A new model with " + "the updated templates will be created.", + color=bcolors.OKBLUE, + ) + await _update_domain(file_importer, train_path) else: print_color( "Core stories/configuration did not change. No need to retrain Core model.", From ab91d4786134060343302cf43681ded573876c7e Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Tue, 22 Oct 2019 14:31:42 +0200 Subject: [PATCH 14/45] Black formatting --- rasa/model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rasa/model.py b/rasa/model.py index 9d99da54cf81..47966c154a77 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -334,7 +334,9 @@ def merge_model(source: Text, target: Text) -> bool: return False -def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Text) -> Tuple[bool, bool, bool]: +def should_retrain( + new_fingerprint: Fingerprint, old_model: Text, train_path: Text +) -> Tuple[bool, bool, bool]: """Check which components of a model should be retrained. Args: From 484f79f5f605c1ae295522303ad713f0f7b743ae Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Tue, 22 Oct 2019 22:56:28 +0200 Subject: [PATCH 15/45] Some refactoring --- rasa/model.py | 71 +++++++++++++++++++++++++++------------- rasa/train.py | 29 +++++++--------- tests/core/test_model.py | 13 ++++---- 3 files changed, 67 insertions(+), 46 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index 9d99da54cf81..6b618af5050f 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -67,6 +67,29 @@ SECTION_NLG = Section(name="NLG Templates", relevant_keys=[FINGERPRINT_NLG_KEY]) +class ShouldRetrain(object): + def __init__(self, nlu: bool, core: bool, nlg: bool, force_train: bool = False): + self.nlu = nlu + self.core = core + self.nlg = nlg + self.force_train = force_train + + def any(self) -> bool: + return any([self.nlu, self.core, self.nlg, self.force_train]) + + def __eq__(self, obj) -> bool: + if not isinstance(obj, ShouldRetrain): + return False + return all( + [ + self.nlu == obj.nlu, + self.core == obj.core, + self.nlg == obj.nlg, + self.force_train == obj.force_train, + ] + ) + + def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: """Get a model and unpack it. Raises a `ModelNotFound` exception if no model could be found at the provided path. @@ -334,7 +357,9 @@ def merge_model(source: Text, target: Text) -> bool: return False -def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Text) -> Tuple[bool, bool, bool]: +def should_retrain( + new_fingerprint: Fingerprint, old_model: Text, train_path: Text +) -> ShouldRetrain: """Check which components of a model should be retrained. Args: @@ -347,40 +372,40 @@ def should_retrain(new_fingerprint: Fingerprint, old_model: Text, train_path: Te to be retrained or not. """ - retrain_nlu = retrain_core = retrain_nlg = True + retrain = ShouldRetrain(core=True, nlu=True, nlg=True) if old_model is None or not os.path.exists(old_model): - return retrain_core, retrain_nlu, retrain_nlg + return retrain with unpack_model(old_model) as unpacked: last_fingerprint = fingerprint_from_path(unpacked) old_core, old_nlu = get_model_subdirectories(unpacked) - retrain_core = section_fingerprint_changed( - last_fingerprint, new_fingerprint, SECTION_CORE - ) - retrain_nlg = section_fingerprint_changed( - last_fingerprint, new_fingerprint, SECTION_NLG - ) - retrain_nlu = section_fingerprint_changed( - last_fingerprint, new_fingerprint, SECTION_NLU + retrain = ShouldRetrain( + **{ + key: section_fingerprint_changed( + last_fingerprint, new_fingerprint, section + ) + for (key, section) in [ + ("core", SECTION_CORE), + ("nlu", SECTION_NLU), + ("nlg", SECTION_NLG), + ] + } ) - # If merging directories fails, force retrain. - if not retrain_core: - target_path = os.path.join(train_path, "core") - retrain_core = not merge_model(old_core, target_path) - else: - # In the case of retrain_nlg, only do it if the whole - # of Core is not being retrained. If it is, then replacing of - # templates will be automatically taken care of during that process. + core_merge_failed = False + if not retrain.core: target_path = os.path.join(train_path, "core") - retrain_nlg = not merge_model(old_core, target_path) - if not retrain_nlu: + core_merge_failed = not merge_model(old_core, target_path) + if not retrain.nlg: + retrain.nlg = core_merge_failed + + if not retrain.nlu: target_path = os.path.join(train_path, "nlu") - retrain_nlu = not merge_model(old_nlu, target_path) + retrain.nlu = not merge_model(old_nlu, target_path) - return retrain_core, retrain_nlu, retrain_nlg + return retrain def package_model( diff --git a/rasa/train.py b/rasa/train.py index 517429d9e626..1694f8ea23dd 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -6,6 +6,7 @@ from rasa.importers.importer import TrainingDataImporter from rasa import model +from rasa.model import ShouldRetrain from rasa.constants import DEFAULT_DOMAIN_PATH from rasa.core.domain import Domain from rasa.utils.common import TempDirectoryPath @@ -165,21 +166,18 @@ async def _train_async_internal( new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) - retrain_core = retrain_nlu = retrain_nlg = False - if not force_training: - retrain_core, retrain_nlu, retrain_nlg = model.should_retrain( - new_fingerprint, old_model, train_path - ) + retrain = ShouldRetrain( + core=False, nlu=False, nlg=False, force_train=force_training + ) + if not retrain.force_train: + retrain = model.should_retrain(new_fingerprint, old_model, train_path) - if any([force_training, retrain_core, retrain_nlu, retrain_nlg]): + if retrain.any(): await _do_training( file_importer, output_path=output_path, train_path=train_path, - force_training=force_training, - retrain_core=retrain_core, - retrain_nlu=retrain_nlu, - retrain_nlg=retrain_nlg, + retrain=retrain, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, @@ -210,16 +208,13 @@ async def _do_training( file_importer: TrainingDataImporter, output_path: Text, train_path: Text, - force_training: bool = False, - retrain_core: bool = True, - retrain_nlu: bool = True, - retrain_nlg: bool = True, + retrain: ShouldRetrain = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, ): - if force_training or retrain_core: + if any([retrain.force_train, retrain.core]): await _train_core_with_validated_data( file_importer, output=output_path, @@ -227,7 +222,7 @@ async def _do_training( fixed_model_name=fixed_model_name, kwargs=kwargs, ) - elif retrain_nlg: + elif retrain.nlg: print_color( "Core stories/configuration did not change. " "Only the templates section has been changed. A new model with " @@ -241,7 +236,7 @@ async def _do_training( color=bcolors.OKBLUE, ) - if retrain_nlu or force_training: + if any([retrain.nlu, retrain.force_train]): await _train_nlu_with_validated_data( file_importer, output=output_path, diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 4c107315b5cd..0881786bcf01 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -35,6 +35,7 @@ Fingerprint, section_fingerprint_changed, should_retrain, + ShouldRetrain, ) from rasa.exceptions import ModelNotFound @@ -306,13 +307,13 @@ async def test_rasa_packaging(trained_model, project, use_fingerprint): def test_should_retrain(trained_model: Text, fingerprint: Fingerprint): old_model = set_fingerprint(trained_model, fingerprint["old"]) - retrain_core, retrain_nlu, retrain_nlg = should_retrain( - fingerprint["new"], old_model, tempfile.mkdtemp() - ) + retrain = should_retrain(fingerprint["new"], old_model, tempfile.mkdtemp()) - assert retrain_core == fingerprint["retrain_core"] - assert retrain_nlu == fingerprint["retrain_nlu"] - assert retrain_nlg == fingerprint["retrain_nlg"] + assert retrain == ShouldRetrain( + core=fingerprint["retrain_core"], + nlu=fingerprint["retrain_nlu"], + nlg=fingerprint["retrain_nlg"], + ) def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: From 25d2f85c53eea0b3e18497fcafdf73795c21fff0 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Tue, 22 Oct 2019 22:59:37 +0200 Subject: [PATCH 16/45] Add empty line and update docstring --- rasa/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rasa/model.py b/rasa/model.py index 6b618af5050f..294dbde8fa58 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -368,7 +368,7 @@ def should_retrain( train_path: Path to the directory in which the new model will be trained. Returns: - A tuple of boolean values indicating whether Rasa Core and/or Rasa NLU needs + A ShouldRetrain object indicating whether Rasa Core and/or Rasa NLU needs to be retrained or not. """ @@ -398,6 +398,7 @@ def should_retrain( if not retrain.core: target_path = os.path.join(train_path, "core") core_merge_failed = not merge_model(old_core, target_path) + if not retrain.nlg: retrain.nlg = core_merge_failed From 7d3931a96408c0c37e408bc6696f99a53525922c Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Tue, 22 Oct 2019 23:35:02 +0200 Subject: [PATCH 17/45] Assign core_merge_failed to retrain.core --- rasa/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rasa/model.py b/rasa/model.py index 294dbde8fa58..c07ad2252529 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -398,6 +398,7 @@ def should_retrain( if not retrain.core: target_path = os.path.join(train_path, "core") core_merge_failed = not merge_model(old_core, target_path) + retrain.core = core_merge_failed if not retrain.nlg: retrain.nlg = core_merge_failed From 87f88e346d2fcc08d30ba81916546f4988574728 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Tue, 22 Oct 2019 23:37:59 +0200 Subject: [PATCH 18/45] Initialize using straightforward syntax --- rasa/model.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index c07ad2252529..a80aae435daa 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -382,16 +382,15 @@ def should_retrain( old_core, old_nlu = get_model_subdirectories(unpacked) retrain = ShouldRetrain( - **{ - key: section_fingerprint_changed( - last_fingerprint, new_fingerprint, section - ) - for (key, section) in [ - ("core", SECTION_CORE), - ("nlu", SECTION_NLU), - ("nlg", SECTION_NLG), - ] - } + core=section_fingerprint_changed( + last_fingerprint, new_fingerprint, SECTION_CORE + ), + nlu=section_fingerprint_changed( + last_fingerprint, new_fingerprint, SECTION_NLU + ), + nlg=section_fingerprint_changed( + last_fingerprint, new_fingerprint, SECTION_NLG + ), ) core_merge_failed = False From f4c12fb203ee39d25d2e2b6427d222b4c77a8a21 Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Wed, 23 Oct 2019 09:30:17 +0200 Subject: [PATCH 19/45] Initialize default ShouldRetrain --- rasa/train.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rasa/train.py b/rasa/train.py index 1694f8ea23dd..58088e08bc9e 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -213,6 +213,8 @@ async def _do_training( persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, ): + if not retrain: + retrain = ShouldRetrain() if any([retrain.force_train, retrain.core]): await _train_core_with_validated_data( From 22154b76fe6b0cf644414d84fa98dcb4ad3e53fe Mon Sep 17 00:00:00 2001 From: Amogh Mannekote Date: Wed, 23 Oct 2019 13:53:46 +0200 Subject: [PATCH 20/45] Pass parameters --- rasa/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/train.py b/rasa/train.py index 58088e08bc9e..cd46d36ec024 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -214,7 +214,7 @@ async def _do_training( kwargs: Optional[Dict] = None, ): if not retrain: - retrain = ShouldRetrain() + retrain = ShouldRetrain(nlu=True, core=True, nlg=True) if any([retrain.force_train, retrain.core]): await _train_core_with_validated_data( From dab7861acaf8677d33e6118b6a82bc14b525b5c2 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 14:40:01 +0100 Subject: [PATCH 21/45] correct changelog --- CHANGELOG.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f9ed0f9f4cd6..950e981c3221 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,7 @@ Added Changed ------- +- Do not retrain the entire Core model if only the ``templates`` section of the domain is changed. Removed ------- @@ -132,7 +133,6 @@ Changed Fixed ----- -- Do not retrain the entire Core model if only the ``templates`` section of the domain is changed. - Fixed error ``Object of type 'MaxHistoryTrackerFeaturizer' is not JSON serializable`` when running ``rasa train core`` - Default channel ``send_`` methods no longer support kwargs as they caused issues in incompatible channels @@ -174,9 +174,6 @@ Added Fixed ----- -- asyncio warnings are now only printed if the callback takes more than 100ms - (up from 1ms) -- ``agent.load_model_from_server`` no longer affects logging - Added the ability to properly deal with spaCy ``Doc``-objects created on empty strings as discussed `here `_. Only training samples that actually bear content are sent to ``self.nlp.pipe`` From 1ae93501d0baf69c31952710e2c4f53ea60ed0b5 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 14:42:31 +0100 Subject: [PATCH 22/45] rename `ShouldRetrain` to `FingerprintComparisonResult` --- rasa/model.py | 14 +++++++------- rasa/train.py | 8 ++++---- tests/core/test_model.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index a80aae435daa..db1df6c6d1e1 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -5,7 +5,7 @@ import tempfile import typing from collections import namedtuple -from typing import Text, Tuple, Union, Optional, List, Dict +from typing import Text, Tuple, Union, Optional, List, Dict, Any import rasa.utils.io from rasa.cli.utils import print_success, create_output_path @@ -67,7 +67,7 @@ SECTION_NLG = Section(name="NLG Templates", relevant_keys=[FINGERPRINT_NLG_KEY]) -class ShouldRetrain(object): +class FingerprintComparisonResult: def __init__(self, nlu: bool, core: bool, nlg: bool, force_train: bool = False): self.nlu = nlu self.core = core @@ -77,8 +77,8 @@ def __init__(self, nlu: bool, core: bool, nlg: bool, force_train: bool = False): def any(self) -> bool: return any([self.nlu, self.core, self.nlg, self.force_train]) - def __eq__(self, obj) -> bool: - if not isinstance(obj, ShouldRetrain): + def __eq__(self, obj: Any) -> bool: + if not isinstance(obj, FingerprintComparisonResult): return False return all( [ @@ -359,7 +359,7 @@ def merge_model(source: Text, target: Text) -> bool: def should_retrain( new_fingerprint: Fingerprint, old_model: Text, train_path: Text -) -> ShouldRetrain: +) -> FingerprintComparisonResult: """Check which components of a model should be retrained. Args: @@ -372,7 +372,7 @@ def should_retrain( to be retrained or not. """ - retrain = ShouldRetrain(core=True, nlu=True, nlg=True) + retrain = FingerprintComparisonResult(core=True, nlu=True, nlg=True) if old_model is None or not os.path.exists(old_model): return retrain @@ -381,7 +381,7 @@ def should_retrain( last_fingerprint = fingerprint_from_path(unpacked) old_core, old_nlu = get_model_subdirectories(unpacked) - retrain = ShouldRetrain( + retrain = FingerprintComparisonResult( core=section_fingerprint_changed( last_fingerprint, new_fingerprint, SECTION_CORE ), diff --git a/rasa/train.py b/rasa/train.py index cd46d36ec024..0b12597e263d 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -6,7 +6,7 @@ from rasa.importers.importer import TrainingDataImporter from rasa import model -from rasa.model import ShouldRetrain +from rasa.model import FingerprintComparisonResult from rasa.constants import DEFAULT_DOMAIN_PATH from rasa.core.domain import Domain from rasa.utils.common import TempDirectoryPath @@ -166,7 +166,7 @@ async def _train_async_internal( new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) - retrain = ShouldRetrain( + retrain = FingerprintComparisonResult( core=False, nlu=False, nlg=False, force_train=force_training ) if not retrain.force_train: @@ -208,13 +208,13 @@ async def _do_training( file_importer: TrainingDataImporter, output_path: Text, train_path: Text, - retrain: ShouldRetrain = None, + retrain: FingerprintComparisonResult = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, ): if not retrain: - retrain = ShouldRetrain(nlu=True, core=True, nlg=True) + retrain = FingerprintComparisonResult(nlu=True, core=True, nlg=True) if any([retrain.force_train, retrain.core]): await _train_core_with_validated_data( diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 0881786bcf01..b468fa151247 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -35,7 +35,7 @@ Fingerprint, section_fingerprint_changed, should_retrain, - ShouldRetrain, + FingerprintComparisonResult, ) from rasa.exceptions import ModelNotFound @@ -309,7 +309,7 @@ def test_should_retrain(trained_model: Text, fingerprint: Fingerprint): retrain = should_retrain(fingerprint["new"], old_model, tempfile.mkdtemp()) - assert retrain == ShouldRetrain( + assert retrain == FingerprintComparisonResult( core=fingerprint["retrain_core"], nlu=fingerprint["retrain_nlu"], nlg=fingerprint["retrain_nlg"], From 487fa49103d673b333f6d873eca612568ac3cf9f Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 15:00:24 +0100 Subject: [PATCH 23/45] move functions to check if training is needed to `FingerprintComparisonResult` --- rasa/model.py | 15 ++++++++++++--- rasa/train.py | 16 +++++++++------- tests/core/test_model.py | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index db1df6c6d1e1..d58f1020bc58 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -74,9 +74,6 @@ def __init__(self, nlu: bool, core: bool, nlg: bool, force_train: bool = False): self.nlg = nlg self.force_train = force_train - def any(self) -> bool: - return any([self.nlu, self.core, self.nlg, self.force_train]) - def __eq__(self, obj: Any) -> bool: if not isinstance(obj, FingerprintComparisonResult): return False @@ -89,6 +86,18 @@ def __eq__(self, obj: Any) -> bool: ] ) + def is_training_required(self) -> bool: + return any([self.nlg, self.nlu, self.core, self.force_train]) + + def should_retrain_core(self) -> bool: + return self.force_train or self.core + + def should_retrain_nlg(self) -> bool: + return self.force_train or self.nlg + + def should_retrain_nlu(self) -> bool: + return self.force_train or self.nlu + def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: """Get a model and unpack it. Raises a `ModelNotFound` exception if diff --git a/rasa/train.py b/rasa/train.py index 0b12597e263d..0e2da4aa605b 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -177,7 +177,7 @@ async def _train_async_internal( file_importer, output_path=output_path, train_path=train_path, - retrain=retrain, + fingerprint_comparison_result=retrain, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, @@ -208,15 +208,17 @@ async def _do_training( file_importer: TrainingDataImporter, output_path: Text, train_path: Text, - retrain: FingerprintComparisonResult = None, + fingerprint_comparison_result: FingerprintComparisonResult = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, ): - if not retrain: - retrain = FingerprintComparisonResult(nlu=True, core=True, nlg=True) + if not fingerprint_comparison_result: + fingerprint_comparison_result = FingerprintComparisonResult( + nlu=True, core=True, nlg=True + ) - if any([retrain.force_train, retrain.core]): + if fingerprint_comparison_result.should_retrain_core(): await _train_core_with_validated_data( file_importer, output=output_path, @@ -224,7 +226,7 @@ async def _do_training( fixed_model_name=fixed_model_name, kwargs=kwargs, ) - elif retrain.nlg: + elif fingerprint_comparison_result.should_retrain_nlg(): print_color( "Core stories/configuration did not change. " "Only the templates section has been changed. A new model with " @@ -238,7 +240,7 @@ async def _do_training( color=bcolors.OKBLUE, ) - if any([retrain.nlu, retrain.force_train]): + if fingerprint_comparison_result.should_retrain_nlu(): await _train_nlu_with_validated_data( file_importer, output=output_path, diff --git a/tests/core/test_model.py b/tests/core/test_model.py index b468fa151247..c980e1e864f1 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -327,3 +327,43 @@ def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: create_package_rasa(unpacked_model_path, output_path, fingerprint) return output_path + + +@pytest.mark.parametrize( + "comparison_result,retrain_all,retrain_core,retrain_nlg,retrain_nlu", + [ + (FingerprintComparisonResult(force_train=True), True, True, True, True), + ( + FingerprintComparisonResult(core=True, nlu=False, nlg=False), + True, + True, + False, + False, + ), + ( + FingerprintComparisonResult(core=False, nlu=True, nlg=False), + True, + False, + False, + True, + ), + ( + FingerprintComparisonResult(core=True, nlu=True, nlg=False), + True, + True, + False, + True, + ), + ], +) +def test_fingerprint_comparison_result( + comparison_result: FingerprintComparisonResult, + retrain_all: bool, + retrain_core: bool, + retrain_nlg: bool, + retrain_nlu: bool, +): + assert comparison_result.is_training_required() == retrain_all + assert comparison_result.should_retrain_core() == retrain_core + assert comparison_result.should_retrain_nlg() == retrain_nlg + assert comparison_result.should_retrain_nlu() == retrain_nlu From b8a23c33540028186d3a1833b0818370f133da56 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 15:02:46 +0100 Subject: [PATCH 24/45] use f-strings --- rasa/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index d58f1020bc58..f20805f0ac78 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -173,7 +173,7 @@ def unpack_model( # All files are in a subdirectory. with tarfile.open(model_file, mode="r:gz") as tar: tar.extractall(working_directory) - logger.debug("Extracted model to '{}'.".format(working_directory)) + logger.debug(f"Extracted model to '{working_directory}'.") return TempDirectoryPath(working_directory) @@ -342,7 +342,7 @@ def section_fingerprint_changed( """Check whether the fingerprint of a section has changed.""" for k in section.relevant_keys: if fingerprint1.get(k) != fingerprint2.get(k): - logger.info("Data ({}) for {} section changed.".format(k, section.name)) + logger.info(f"Data ({k}) for {section.name} section changed.") return True return False @@ -362,7 +362,7 @@ def merge_model(source: Text, target: Text) -> bool: shutil.move(source, target) return True except Exception as e: - logging.debug("Could not merge model: {}".format(e)) + logging.debug(f"Could not merge model: {e}") return False From af38b9871a68c5b7b505fb92aad0a6edfb08a92f Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 15:33:17 +0100 Subject: [PATCH 25/45] test domain merging --- rasa/core/domain.py | 3 ++- rasa/model.py | 31 ++++++++++++++++++++++++++++--- rasa/train.py | 18 ++++-------------- tests/core/test_model.py | 22 ++++++++++++++++++++++ 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/rasa/core/domain.py b/rasa/core/domain.py index 572ad3d70f29..61b3544aee04 100644 --- a/rasa/core/domain.py +++ b/rasa/core/domain.py @@ -3,6 +3,7 @@ import logging import os import typing +from pathlib import Path from typing import Any, Dict, List, Optional, Text, Tuple, Union, Set import rasa.core.constants @@ -664,7 +665,7 @@ def as_dict(self) -> Dict[Text, Any]: "forms": self.form_names, } - def persist(self, filename: Text) -> None: + def persist(self, filename: Union[Text, Path]) -> None: """Write domain to a file.""" domain_data = self.as_dict() diff --git a/rasa/model.py b/rasa/model.py index f20805f0ac78..be5154e6c1ef 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -5,6 +5,7 @@ import tempfile import typing from collections import namedtuple +from pathlib import Path from typing import Text, Tuple, Union, Optional, List, Dict, Any import rasa.utils.io @@ -14,6 +15,7 @@ CONFIG_MANDATORY_KEYS_CORE, CONFIG_MANDATORY_KEYS_NLU, CONFIG_MANDATORY_KEYS, + DEFAULT_DOMAIN_PATH, ) from rasa.core.utils import get_dict_hash @@ -68,7 +70,13 @@ class FingerprintComparisonResult: - def __init__(self, nlu: bool, core: bool, nlg: bool, force_train: bool = False): + def __init__( + self, + nlu: bool = True, + core: bool = True, + nlg: bool = True, + force_train: bool = False, + ): self.nlu = nlu self.core = core self.nlg = nlg @@ -152,7 +160,7 @@ def get_latest_model(model_path: Text = DEFAULT_MODELS_PATH) -> Optional[Text]: def unpack_model( - model_file: Text, working_directory: Optional[Text] = None + model_file: Text, working_directory: Optional[Union[Path, Text]] = None ) -> TempDirectoryPath: """Unpack a zipped Rasa model. @@ -381,7 +389,7 @@ def should_retrain( to be retrained or not. """ - retrain = FingerprintComparisonResult(core=True, nlu=True, nlg=True) + retrain = FingerprintComparisonResult() if old_model is None or not os.path.exists(old_model): return retrain @@ -449,3 +457,20 @@ def package_model( ) return output_directory + + +async def update_with_new_domain( + importer: "TrainingDataImporter", unpacked_model_path: Union[Path, Text] +) -> None: + """Overwrites the domain of an unpacked model with a new domain. + + Args: + importer: Importer which provides the new domain. + unpacked_model_path: Path to the unpacked model. + """ + + model_path = Path(unpacked_model_path) / "core" + domain = await importer.get_domain() + + domain.persist(model_path / DEFAULT_DOMAIN_PATH) + domain.persist_specification(str(model_path)) diff --git a/rasa/train.py b/rasa/train.py index 0e2da4aa605b..d856078082be 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -7,7 +7,6 @@ from rasa.importers.importer import TrainingDataImporter from rasa import model from rasa.model import FingerprintComparisonResult -from rasa.constants import DEFAULT_DOMAIN_PATH from rasa.core.domain import Domain from rasa.utils.common import TempDirectoryPath @@ -166,13 +165,11 @@ async def _train_async_internal( new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) - retrain = FingerprintComparisonResult( - core=False, nlu=False, nlg=False, force_train=force_training - ) + retrain = FingerprintComparisonResult(force_train=force_training) if not retrain.force_train: retrain = model.should_retrain(new_fingerprint, old_model, train_path) - if retrain.any(): + if retrain.is_training_required(): await _do_training( file_importer, output_path=output_path, @@ -197,18 +194,11 @@ async def _train_async_internal( return old_model -async def _update_domain(file_importer: TrainingDataImporter, train_path: Text) -> None: - model_path = os.path.join(train_path, "core") - domain = await file_importer.get_domain() - domain.persist(os.path.join(model_path, DEFAULT_DOMAIN_PATH)) - domain.persist_specification(model_path) - - async def _do_training( file_importer: TrainingDataImporter, output_path: Text, train_path: Text, - fingerprint_comparison_result: FingerprintComparisonResult = None, + fingerprint_comparison_result: Optional[FingerprintComparisonResult] = None, fixed_model_name: Optional[Text] = None, persist_nlu_training_data: bool = False, kwargs: Optional[Dict] = None, @@ -233,7 +223,7 @@ async def _do_training( "the updated templates will be created.", color=bcolors.OKBLUE, ) - await _update_domain(file_importer, train_path) + await model.update_with_new_domain(file_importer, train_path) else: print_color( "Core stories/configuration did not change. No need to retrain Core model.", diff --git a/tests/core/test_model.py b/tests/core/test_model.py index c980e1e864f1..848cb8ffca72 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -2,7 +2,9 @@ import tempfile import time import shutil +from pathlib import Path from typing import Text, Optional, Any +from unittest.mock import Mock import pytest from _pytest.tmpdir import TempdirFactory @@ -14,6 +16,7 @@ from rasa.constants import DEFAULT_CONFIG_PATH, DEFAULT_DATA_PATH, DEFAULT_DOMAIN_PATH from rasa.core.domain import Domain from rasa.core.utils import get_dict_hash +from rasa import model from rasa.model import ( FINGERPRINT_CONFIG_KEY, FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY, @@ -367,3 +370,22 @@ def test_fingerprint_comparison_result( assert comparison_result.should_retrain_core() == retrain_core assert comparison_result.should_retrain_nlg() == retrain_nlg assert comparison_result.should_retrain_nlu() == retrain_nlu + + +async def test_update_with_new_domain(trained_model: Text, tmpdir: Path): + _ = model.unpack_model(trained_model, tmpdir) + + new_domain = Domain.empty() + + mocked_importer = Mock() + + async def get_domain() -> Domain: + return new_domain + + mocked_importer.get_domain = get_domain + + await model.update_with_new_domain(mocked_importer, tmpdir) + + actual = Domain.load(tmpdir / "core" / DEFAULT_DOMAIN_PATH) + + assert actual.is_empty() From de8a07edb1bab2ce48d265706838fd097e745c73 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 15:37:18 +0100 Subject: [PATCH 26/45] use constant for 'core' subfolder --- rasa/constants.py | 1 + rasa/core/agent.py | 11 ++++++++--- rasa/model.py | 7 ++++--- rasa/train.py | 4 ++-- tests/core/test_model.py | 15 ++++++++++----- tests/importers/test_multi_project.py | 4 ++-- 6 files changed, 27 insertions(+), 15 deletions(-) diff --git a/rasa/constants.py b/rasa/constants.py index 7538ed3d56fa..a270ab695e18 100644 --- a/rasa/constants.py +++ b/rasa/constants.py @@ -9,6 +9,7 @@ DEFAULT_DATA_PATH = "data" DEFAULT_RESULTS_PATH = "results" DEFAULT_NLU_RESULTS_PATH = "nlu_comparison_results" +DEFAULT_CORE_SUBDIRECTORY_NAME = "core" DEFAULT_REQUEST_TIMEOUT = 60 * 5 # 5 minutes TEST_DATA_FILE = "test.md" diff --git a/rasa/core/agent.py b/rasa/core/agent.py index 4d42593adef9..49fe4eb39ad7 100644 --- a/rasa/core/agent.py +++ b/rasa/core/agent.py @@ -12,7 +12,12 @@ import rasa import rasa.utils.io import rasa.core.utils -from rasa.constants import DEFAULT_DOMAIN_PATH, LEGACY_DOCS_BASE_URL, ENV_SANIC_BACKLOG +from rasa.constants import ( + DEFAULT_DOMAIN_PATH, + LEGACY_DOCS_BASE_URL, + ENV_SANIC_BACKLOG, + DEFAULT_CORE_SUBDIRECTORY_NAME, +) from rasa.core import constants, jobs, training from rasa.core.channels.channel import InputChannel, OutputChannel, UserMessage from rasa.core.constants import DEFAULT_REQUEST_TIMEOUT @@ -764,8 +769,8 @@ def persist(self, model_path: Text, dump_flattened_stories: bool = False) -> Non if not self.is_core_ready(): raise AgentNotReady("Can't persist without a policy ensemble.") - if not model_path.endswith("core"): - model_path = os.path.join(model_path, "core") + if not model_path.endswith(DEFAULT_CORE_SUBDIRECTORY_NAME): + model_path = os.path.join(model_path, DEFAULT_CORE_SUBDIRECTORY_NAME) self._clear_model_directory(model_path) diff --git a/rasa/model.py b/rasa/model.py index be5154e6c1ef..1765da4d8fce 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -16,6 +16,7 @@ CONFIG_MANDATORY_KEYS_NLU, CONFIG_MANDATORY_KEYS, DEFAULT_DOMAIN_PATH, + DEFAULT_CORE_SUBDIRECTORY_NAME, ) from rasa.core.utils import get_dict_hash @@ -200,7 +201,7 @@ def get_model_subdirectories( path to NLU subdirectory if it exists or `None` otherwise). """ - core_path = os.path.join(unpacked_model_path, "core") + core_path = os.path.join(unpacked_model_path, DEFAULT_CORE_SUBDIRECTORY_NAME) nlu_path = os.path.join(unpacked_model_path, "nlu") if not os.path.isdir(core_path): @@ -412,7 +413,7 @@ def should_retrain( core_merge_failed = False if not retrain.core: - target_path = os.path.join(train_path, "core") + target_path = os.path.join(train_path, DEFAULT_CORE_SUBDIRECTORY_NAME) core_merge_failed = not merge_model(old_core, target_path) retrain.core = core_merge_failed @@ -469,7 +470,7 @@ async def update_with_new_domain( unpacked_model_path: Path to the unpacked model. """ - model_path = Path(unpacked_model_path) / "core" + model_path = Path(unpacked_model_path) / DEFAULT_CORE_SUBDIRECTORY_NAME domain = await importer.get_domain() domain.persist(model_path / DEFAULT_DOMAIN_PATH) diff --git a/rasa/train.py b/rasa/train.py index d856078082be..723902b97331 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -17,7 +17,7 @@ bcolors, print_color, ) -from rasa.constants import DEFAULT_MODELS_PATH +from rasa.constants import DEFAULT_MODELS_PATH, DEFAULT_CORE_SUBDIRECTORY_NAME def train( @@ -349,7 +349,7 @@ async def _train_core_with_validated_data( await rasa.core.train( domain_file=domain, training_resource=file_importer, - output_path=os.path.join(_train_path, "core"), + output_path=os.path.join(_train_path, DEFAULT_CORE_SUBDIRECTORY_NAME), policy_config=config, kwargs=kwargs, ) diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 848cb8ffca72..0d6df7762664 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -13,7 +13,12 @@ import rasa.core import rasa.nlu from rasa.importers.rasa import RasaFileImporter -from rasa.constants import DEFAULT_CONFIG_PATH, DEFAULT_DATA_PATH, DEFAULT_DOMAIN_PATH +from rasa.constants import ( + DEFAULT_CONFIG_PATH, + DEFAULT_DATA_PATH, + DEFAULT_DOMAIN_PATH, + DEFAULT_CORE_SUBDIRECTORY_NAME, +) from rasa.core.domain import Domain from rasa.core.utils import get_dict_hash from rasa import model @@ -57,7 +62,7 @@ def test_get_latest_model(trained_model): def test_get_model_from_directory(trained_model): unpacked = get_model(trained_model) - assert os.path.exists(os.path.join(unpacked, "core")) + assert os.path.exists(os.path.join(unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME)) assert os.path.exists(os.path.join(unpacked, "nlu")) @@ -90,7 +95,7 @@ def test_get_model_from_directory_with_subdirectories( def test_get_model_from_directory_nlu_only(trained_model): unpacked = get_model(trained_model) - shutil.rmtree(os.path.join(unpacked, "core")) + shutil.rmtree(os.path.join(unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME)) unpacked_core, unpacked_nlu = get_model_subdirectories(unpacked) assert not unpacked_core @@ -247,7 +252,7 @@ async def test_rasa_packaging(trained_model, project, use_fingerprint): assert ( os.path.exists(os.path.join(unpacked, FINGERPRINT_FILE_PATH)) == use_fingerprint ) - assert os.path.exists(os.path.join(unpacked, "core")) + assert os.path.exists(os.path.join(unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME)) assert os.path.exists(os.path.join(unpacked, "nlu")) assert not os.path.exists(unpacked_model_path) @@ -386,6 +391,6 @@ async def get_domain() -> Domain: await model.update_with_new_domain(mocked_importer, tmpdir) - actual = Domain.load(tmpdir / "core" / DEFAULT_DOMAIN_PATH) + actual = Domain.load(tmpdir / DEFAULT_CORE_SUBDIRECTORY_NAME / DEFAULT_DOMAIN_PATH) assert actual.is_empty() diff --git a/tests/importers/test_multi_project.py b/tests/importers/test_multi_project.py index df37c794ad51..f7f703e9e68a 100644 --- a/tests/importers/test_multi_project.py +++ b/tests/importers/test_multi_project.py @@ -4,12 +4,12 @@ from _pytest.tmpdir import TempdirFactory import os +from rasa.constants import DEFAULT_CORE_SUBDIRECTORY_NAME from rasa.nlu.training_data.formats import RasaReader from rasa import model from rasa.core import utils from rasa.core.domain import Domain from rasa.importers.multi_project import MultiProjectImporter -from rasa.train import train_async def test_load_imports_from_directory_tree(tmpdir_factory: TempdirFactory): @@ -242,7 +242,7 @@ async def test_multi_project_training(trained_async): unpacked = model.unpack_model(trained_stack_model_path) - domain_file = os.path.join(unpacked, "core", "domain.yml") + domain_file = os.path.join(unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME, "domain.yml") domain = Domain.load(domain_file) expected_intents = { From f17faf85c56e42b7c00600aaf268e44fda3c947d Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 15:38:41 +0100 Subject: [PATCH 27/45] rename `section_fingerprint_changed` to `did_section_fingerprint_change` --- rasa/model.py | 8 ++++---- tests/core/test_model.py | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index 1765da4d8fce..6b8c1688b884 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -345,7 +345,7 @@ def persist_fingerprint(output_path: Text, fingerprint: Fingerprint): rasa.utils.io.dump_obj_as_json_to_file(path, fingerprint) -def section_fingerprint_changed( +def did_section_fingerprint_change( fingerprint1: Fingerprint, fingerprint2: Fingerprint, section: Section ) -> bool: """Check whether the fingerprint of a section has changed.""" @@ -400,13 +400,13 @@ def should_retrain( old_core, old_nlu = get_model_subdirectories(unpacked) retrain = FingerprintComparisonResult( - core=section_fingerprint_changed( + core=did_section_fingerprint_change( last_fingerprint, new_fingerprint, SECTION_CORE ), - nlu=section_fingerprint_changed( + nlu=did_section_fingerprint_change( last_fingerprint, new_fingerprint, SECTION_NLU ), - nlg=section_fingerprint_changed( + nlg=did_section_fingerprint_change( last_fingerprint, new_fingerprint, SECTION_NLG ), ) diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 0d6df7762664..962edc365ae2 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -41,7 +41,7 @@ get_model_subdirectories, model_fingerprint, Fingerprint, - section_fingerprint_changed, + did_section_fingerprint_change, should_retrain, FingerprintComparisonResult, ) @@ -157,7 +157,8 @@ def test_persist_and_load_fingerprint(): def test_core_fingerprint_changed(fingerprint2, changed): fingerprint1 = _fingerprint() assert ( - section_fingerprint_changed(fingerprint1, fingerprint2, SECTION_CORE) is changed + did_section_fingerprint_change(fingerprint1, fingerprint2, SECTION_CORE) + is changed ) @@ -176,7 +177,8 @@ def test_core_fingerprint_changed(fingerprint2, changed): def test_nlu_fingerprint_changed(fingerprint2, changed): fingerprint1 = _fingerprint() assert ( - section_fingerprint_changed(fingerprint1, fingerprint2, SECTION_NLU) is changed + did_section_fingerprint_change(fingerprint1, fingerprint2, SECTION_NLU) + is changed ) From 388b45cf6a79bc86b21444ecfd28c01610bba748 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 15:43:01 +0100 Subject: [PATCH 28/45] add docstring --- rasa/model.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index 6b8c1688b884..4ad25e0ff49a 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -78,33 +78,37 @@ def __init__( nlg: bool = True, force_train: bool = False, ): + """Creates a `FingerprintComparisonResult` instance. + + Args: + nlu: `True` if the NLU model should be retrained. + core: `True` if the Core model should be retrained. + nlg: `True` if the templates in the domain should be updated. + force_train: `True` if a training of all parts is forced. + """ self.nlu = nlu self.core = core self.nlg = nlg self.force_train = force_train - def __eq__(self, obj: Any) -> bool: - if not isinstance(obj, FingerprintComparisonResult): - return False - return all( - [ - self.nlu == obj.nlu, - self.core == obj.core, - self.nlg == obj.nlg, - self.force_train == obj.force_train, - ] - ) - def is_training_required(self) -> bool: + """Check if anything has to be retrained.""" + return any([self.nlg, self.nlu, self.core, self.force_train]) def should_retrain_core(self) -> bool: + """Check if the Core model has to be updated.""" + return self.force_train or self.core def should_retrain_nlg(self) -> bool: + """Check if the templates have to be updated.""" + return self.force_train or self.nlg def should_retrain_nlu(self) -> bool: + """Check if the NLU model has to be updated.""" + return self.force_train or self.nlu From 860339bd470ef29ca92d1acaab19cd5eb99bbbab Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 16:37:45 +0100 Subject: [PATCH 29/45] rename --- rasa/model.py | 2 +- rasa/train.py | 2 +- tests/core/test_model.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index 4ad25e0ff49a..e4128f0dbaa5 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -464,7 +464,7 @@ def package_model( return output_directory -async def update_with_new_domain( +async def update_model_with_new_domain( importer: "TrainingDataImporter", unpacked_model_path: Union[Path, Text] ) -> None: """Overwrites the domain of an unpacked model with a new domain. diff --git a/rasa/train.py b/rasa/train.py index 723902b97331..0e49356368da 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -223,7 +223,7 @@ async def _do_training( "the updated templates will be created.", color=bcolors.OKBLUE, ) - await model.update_with_new_domain(file_importer, train_path) + await model.update_model_with_new_domain(file_importer, train_path) else: print_color( "Core stories/configuration did not change. No need to retrain Core model.", diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 962edc365ae2..9ef3bf0c8ed5 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -391,7 +391,7 @@ async def get_domain() -> Domain: mocked_importer.get_domain = get_domain - await model.update_with_new_domain(mocked_importer, tmpdir) + await model.update_model_with_new_domain(mocked_importer, tmpdir) actual = Domain.load(tmpdir / DEFAULT_CORE_SUBDIRECTORY_NAME / DEFAULT_DOMAIN_PATH) From 7a1cc9cfc6bdc118d944af5f90e8b8ef93f2bda1 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 16:38:34 +0100 Subject: [PATCH 30/45] remove dumping of states --- rasa/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rasa/model.py b/rasa/model.py index e4128f0dbaa5..aefebf672a08 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -478,4 +478,3 @@ async def update_model_with_new_domain( domain = await importer.get_domain() domain.persist(model_path / DEFAULT_DOMAIN_PATH) - domain.persist_specification(str(model_path)) From e33536314d735fdee84182bb3769cd0f6f5dc7af Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 16:40:37 +0100 Subject: [PATCH 31/45] set `should_retrain_nlg` to `True` in case Core is retrained --- rasa/model.py | 2 +- tests/core/test_model.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index aefebf672a08..e9762b73f26c 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -104,7 +104,7 @@ def should_retrain_core(self) -> bool: def should_retrain_nlg(self) -> bool: """Check if the templates have to be updated.""" - return self.force_train or self.nlg + return self.should_retrain_core() or self.nlg def should_retrain_nlu(self) -> bool: """Check if the NLU model has to be updated.""" diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 9ef3bf0c8ed5..ae0a26224717 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -347,7 +347,7 @@ def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: FingerprintComparisonResult(core=True, nlu=False, nlg=False), True, True, - False, + True, False, ), ( @@ -361,7 +361,7 @@ def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: FingerprintComparisonResult(core=True, nlu=True, nlg=False), True, True, - False, + True, True, ), ], From 7a7798511b5658a9e2621b4f05cf7ebcc0be2f62 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 16:45:27 +0100 Subject: [PATCH 32/45] fix failing tests --- tests/core/test_model.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/core/test_model.py b/tests/core/test_model.py index ae0a26224717..c4e304a067ba 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -268,7 +268,7 @@ async def test_rasa_packaging(trained_model, project, use_fingerprint): "old": _fingerprint(stories=["others"]), "retrain_core": True, "retrain_nlu": False, - "retrain_nlg": False, + "retrain_nlg": True, }, { "new": _fingerprint(nlu=["others"]), @@ -282,14 +282,14 @@ async def test_rasa_packaging(trained_model, project, use_fingerprint): "old": _fingerprint(), "retrain_core": True, "retrain_nlu": True, - "retrain_nlg": False, + "retrain_nlg": True, }, { "new": _fingerprint(config_core="others"), "old": _fingerprint(), "retrain_core": True, "retrain_nlu": False, - "retrain_nlg": False, + "retrain_nlg": True, }, { "new": _fingerprint(), @@ -319,11 +319,9 @@ def test_should_retrain(trained_model: Text, fingerprint: Fingerprint): retrain = should_retrain(fingerprint["new"], old_model, tempfile.mkdtemp()) - assert retrain == FingerprintComparisonResult( - core=fingerprint["retrain_core"], - nlu=fingerprint["retrain_nlu"], - nlg=fingerprint["retrain_nlg"], - ) + assert retrain.should_retrain_core() == fingerprint["retrain_core"] + assert retrain.should_retrain_nlg() == fingerprint["retrain_nlg"] + assert retrain.should_retrain_nlu() == fingerprint["retrain_nlu"] def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: From a06350e3e85b54a953287438d430c5fc743d62dc Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 16:59:04 +0100 Subject: [PATCH 33/45] make naming more consistent --- rasa/model.py | 12 ++++++------ rasa/train.py | 14 +++++++++----- tests/core/test_model.py | 2 +- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index e9762b73f26c..dd239d8a9ab7 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -76,7 +76,7 @@ def __init__( nlu: bool = True, core: bool = True, nlg: bool = True, - force_train: bool = False, + force_trainining: bool = False, ): """Creates a `FingerprintComparisonResult` instance. @@ -84,22 +84,22 @@ def __init__( nlu: `True` if the NLU model should be retrained. core: `True` if the Core model should be retrained. nlg: `True` if the templates in the domain should be updated. - force_train: `True` if a training of all parts is forced. + force_trainining: `True` if a training of all parts is forced. """ self.nlu = nlu self.core = core self.nlg = nlg - self.force_train = force_train + self.force_training = force_trainining def is_training_required(self) -> bool: """Check if anything has to be retrained.""" - return any([self.nlg, self.nlu, self.core, self.force_train]) + return any([self.nlg, self.nlu, self.core, self.force_training]) def should_retrain_core(self) -> bool: """Check if the Core model has to be updated.""" - return self.force_train or self.core + return self.force_training or self.core def should_retrain_nlg(self) -> bool: """Check if the templates have to be updated.""" @@ -109,7 +109,7 @@ def should_retrain_nlg(self) -> bool: def should_retrain_nlu(self) -> bool: """Check if the NLU model has to be updated.""" - return self.force_train or self.nlu + return self.force_training or self.nlu def get_model(model_path: Text = DEFAULT_MODELS_PATH) -> TempDirectoryPath: diff --git a/rasa/train.py b/rasa/train.py index 0e49356368da..5ca187e31793 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -165,16 +165,20 @@ async def _train_async_internal( new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) - retrain = FingerprintComparisonResult(force_train=force_training) - if not retrain.force_train: - retrain = model.should_retrain(new_fingerprint, old_model, train_path) + fingerprint_comparison = FingerprintComparisonResult( + force_trainining=force_training + ) + if not force_training: + fingerprint_comparison = model.should_retrain( + new_fingerprint, old_model, train_path + ) - if retrain.is_training_required(): + if fingerprint_comparison.is_training_required(): await _do_training( file_importer, output_path=output_path, train_path=train_path, - fingerprint_comparison_result=retrain, + fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, kwargs=kwargs, diff --git a/tests/core/test_model.py b/tests/core/test_model.py index c4e304a067ba..11216f8a7c4f 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -340,7 +340,7 @@ def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: @pytest.mark.parametrize( "comparison_result,retrain_all,retrain_core,retrain_nlg,retrain_nlu", [ - (FingerprintComparisonResult(force_train=True), True, True, True, True), + (FingerprintComparisonResult(force_trainining=True), True, True, True, True), ( FingerprintComparisonResult(core=True, nlu=False, nlg=False), True, From a94137d5d3e8fc010cd680155a20a19956da54b9 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 17:03:07 +0100 Subject: [PATCH 34/45] polishing --- rasa/train.py | 4 +--- tests/importers/test_multi_project.py | 6 ++++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rasa/train.py b/rasa/train.py index 5ca187e31793..23fbe729a62a 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -208,9 +208,7 @@ async def _do_training( kwargs: Optional[Dict] = None, ): if not fingerprint_comparison_result: - fingerprint_comparison_result = FingerprintComparisonResult( - nlu=True, core=True, nlg=True - ) + fingerprint_comparison_result = FingerprintComparisonResult() if fingerprint_comparison_result.should_retrain_core(): await _train_core_with_validated_data( diff --git a/tests/importers/test_multi_project.py b/tests/importers/test_multi_project.py index f7f703e9e68a..7fc223188178 100644 --- a/tests/importers/test_multi_project.py +++ b/tests/importers/test_multi_project.py @@ -4,7 +4,7 @@ from _pytest.tmpdir import TempdirFactory import os -from rasa.constants import DEFAULT_CORE_SUBDIRECTORY_NAME +from rasa.constants import DEFAULT_CORE_SUBDIRECTORY_NAME, DEFAULT_DOMAIN_PATH from rasa.nlu.training_data.formats import RasaReader from rasa import model from rasa.core import utils @@ -242,7 +242,9 @@ async def test_multi_project_training(trained_async): unpacked = model.unpack_model(trained_stack_model_path) - domain_file = os.path.join(unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME, "domain.yml") + domain_file = os.path.join( + unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME, DEFAULT_DOMAIN_PATH + ) domain = Domain.load(domain_file) expected_intents = { From 2e9fcd785ac9dfb3fb34f975ef4387c27aa556b4 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Oct 2019 18:01:12 +0100 Subject: [PATCH 35/45] remove leading whitespace --- rasa/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/model.py b/rasa/model.py index dd239d8a9ab7..f94d3904896e 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -468,7 +468,7 @@ async def update_model_with_new_domain( importer: "TrainingDataImporter", unpacked_model_path: Union[Path, Text] ) -> None: """Overwrites the domain of an unpacked model with a new domain. - + Args: importer: Importer which provides the new domain. unpacked_model_path: Path to the unpacked model. From 48725df608920ca27ab2b23c4b45bd33037de0ed Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 11:12:38 +0100 Subject: [PATCH 36/45] Apply suggestions from code review Co-Authored-By: Tanja --- rasa/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index f94d3904896e..729f277b34bd 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -67,7 +67,7 @@ FINGERPRINT_RASA_VERSION_KEY, ], ) -SECTION_NLG = Section(name="NLG Templates", relevant_keys=[FINGERPRINT_NLG_KEY]) +SECTION_NLG = Section(name="NLG templates", relevant_keys=[FINGERPRINT_NLG_KEY]) class FingerprintComparisonResult: @@ -76,7 +76,7 @@ def __init__( nlu: bool = True, core: bool = True, nlg: bool = True, - force_trainining: bool = False, + force_training: bool = False, ): """Creates a `FingerprintComparisonResult` instance. @@ -390,7 +390,7 @@ def should_retrain( train_path: Path to the directory in which the new model will be trained. Returns: - A ShouldRetrain object indicating whether Rasa Core and/or Rasa NLU needs + A FingerprintComparisonResult object indicating whether Rasa Core and/or Rasa NLU needs to be retrained or not. """ From a811b3de30db472cc1e44a9849a72e3e57ea3a98 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 11:19:14 +0100 Subject: [PATCH 37/45] use class based syntax for namedtuple --- rasa/model.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index f94d3904896e..f6c8dc034ee3 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -4,9 +4,8 @@ import shutil import tempfile import typing -from collections import namedtuple from pathlib import Path -from typing import Text, Tuple, Union, Optional, List, Dict, Any +from typing import Text, Tuple, Union, Optional, List, Dict, NamedTuple import rasa.utils.io from rasa.cli.utils import print_success, create_output_path @@ -46,7 +45,13 @@ FINGERPRINT_TRAINED_AT_KEY = "trained_at" -Section = namedtuple("Section", ["name", "relevant_keys"]) +class Section(NamedTuple): + """Defines relevant fingerprint sections which are used to decide whether a model + should be retrained.""" + + name: Text + relevant_keys: List[Text] + SECTION_CORE = Section( name="Core model", From 822d7c9f2225dea5d78d9b68a207bee7bdd886c9 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 11:25:58 +0100 Subject: [PATCH 38/45] use functions instead of directly accessing attributes --- rasa/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index f6c8dc034ee3..f194cce6dd47 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -421,15 +421,15 @@ def should_retrain( ) core_merge_failed = False - if not retrain.core: + if not retrain.should_retrain_core(): target_path = os.path.join(train_path, DEFAULT_CORE_SUBDIRECTORY_NAME) core_merge_failed = not merge_model(old_core, target_path) retrain.core = core_merge_failed - if not retrain.nlg: - retrain.nlg = core_merge_failed + if not retrain.should_retrain_nlg() and core_merge_failed: + retrain.nlg = True - if not retrain.nlu: + if not retrain.should_retrain_nlu(): target_path = os.path.join(train_path, "nlu") retrain.nlu = not merge_model(old_nlu, target_path) From f0dd0951b03c6d93e282b6cd9fbf8096c8c512ce Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 11:26:57 +0100 Subject: [PATCH 39/45] rename 'merge_model' to 'move_model' --- rasa/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index f194cce6dd47..360c22b2834a 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -365,8 +365,8 @@ def did_section_fingerprint_change( return False -def merge_model(source: Text, target: Text) -> bool: - """Merge two model directories. +def move_model(source: Text, target: Text) -> bool: + """Move two model directories. Args: source: The original folder which should be merged in another. @@ -423,7 +423,7 @@ def should_retrain( core_merge_failed = False if not retrain.should_retrain_core(): target_path = os.path.join(train_path, DEFAULT_CORE_SUBDIRECTORY_NAME) - core_merge_failed = not merge_model(old_core, target_path) + core_merge_failed = not move_model(old_core, target_path) retrain.core = core_merge_failed if not retrain.should_retrain_nlg() and core_merge_failed: @@ -431,7 +431,7 @@ def should_retrain( if not retrain.should_retrain_nlu(): target_path = os.path.join(train_path, "nlu") - retrain.nlu = not merge_model(old_nlu, target_path) + retrain.nlu = not move_model(old_nlu, target_path) return retrain From b8f3cab3a0ede9659cacfc3c5a3aca284b511b04 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 11:31:24 +0100 Subject: [PATCH 40/45] add clarfication comment --- rasa/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rasa/model.py b/rasa/model.py index 360c22b2834a..d3ecee91c087 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -427,6 +427,7 @@ def should_retrain( retrain.core = core_merge_failed if not retrain.should_retrain_nlg() and core_merge_failed: + # If merging the Core model failed, we should also retrain NLG retrain.nlg = True if not retrain.should_retrain_nlu(): From 49229a4f1329bb00db4349aa0fc407163bf1a4ed Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 13:43:27 +0100 Subject: [PATCH 41/45] black formatting --- rasa/cli/x.py | 2 +- rasa/core/brokers/utils.py | 6 +++--- rasa/core/interpreter.py | 2 +- rasa/core/policies/ensemble.py | 7 ++++--- rasa/core/test.py | 6 +++++- rasa/core/trackers.py | 2 +- rasa/core/training/structures.py | 4 ++-- rasa/core/training/visualization.py | 2 +- rasa/model.py | 2 +- rasa/nlu/test.py | 4 ++-- rasa/nlu/utils/mitie_utils.py | 2 +- tests/conftest.py | 2 +- tests/importers/test_multi_project.py | 2 +- 13 files changed, 24 insertions(+), 19 deletions(-) diff --git a/rasa/cli/x.py b/rasa/cli/x.py index d49ab2224895..d24ea01fc6ef 100644 --- a/rasa/cli/x.py +++ b/rasa/cli/x.py @@ -347,7 +347,7 @@ def run_in_production(args: argparse.Namespace): def _get_credentials_and_endpoints_paths( - args: argparse.Namespace + args: argparse.Namespace, ) -> Tuple[Optional[Text], Optional[Text]]: config_endpoint = args.config_endpoint if config_endpoint: diff --git a/rasa/core/brokers/utils.py b/rasa/core/brokers/utils.py index 7ff67f9c7536..addd85a3d809 100644 --- a/rasa/core/brokers/utils.py +++ b/rasa/core/brokers/utils.py @@ -14,7 +14,7 @@ def from_endpoint_config( - broker_config: Optional[EndpointConfig] + broker_config: Optional[EndpointConfig], ) -> Optional["EventChannel"]: """Instantiate an event channel based on its configuration.""" @@ -41,7 +41,7 @@ def from_endpoint_config( def load_event_channel_from_module_string( - broker_config: EndpointConfig + broker_config: EndpointConfig, ) -> Optional["EventChannel"]: """Instantiate an event channel based on its class name.""" @@ -57,7 +57,7 @@ def load_event_channel_from_module_string( def create_rabbitmq_ssl_options( - rabbitmq_host: Optional[Text] = None + rabbitmq_host: Optional[Text] = None, ) -> Optional["pika.SSLOptions"]: """Create RabbitMQ SSL options. diff --git a/rasa/core/interpreter.py b/rasa/core/interpreter.py index a0d32700eed7..57fe85c2ca8d 100644 --- a/rasa/core/interpreter.py +++ b/rasa/core/interpreter.py @@ -135,7 +135,7 @@ def _starts_with_intent_prefix(self, text: Text) -> bool: @staticmethod def extract_intent_and_entities( - user_input: Text + user_input: Text, ) -> Tuple[Optional[Text], float, List[Dict[Text, Any]]]: """Parse the user input using regexes to extract intent & entities.""" diff --git a/rasa/core/policies/ensemble.py b/rasa/core/policies/ensemble.py index 2bd0e6bc0776..a32fe62ffd9c 100644 --- a/rasa/core/policies/ensemble.py +++ b/rasa/core/policies/ensemble.py @@ -303,9 +303,10 @@ def from_dict(cls, policy_configuration: Dict[Text, Any]) -> List[Policy]: ) if featurizer_config.get("state_featurizer"): - state_featurizer_func, state_featurizer_config = cls.get_state_featurizer_from_dict( - featurizer_config - ) + ( + state_featurizer_func, + state_featurizer_config, + ) = cls.get_state_featurizer_from_dict(featurizer_config) # override featurizer's state_featurizer # with real state_featurizer class diff --git a/rasa/core/test.py b/rasa/core/test.py index ef57b3645744..562b42e9068f 100644 --- a/rasa/core/test.py +++ b/rasa/core/test.py @@ -345,7 +345,11 @@ def _predict_tracker_actions( for event in events[1:]: if isinstance(event, ActionExecuted): - action_executed_result, policy, confidence = _collect_action_executed_predictions( + ( + action_executed_result, + policy, + confidence, + ) = _collect_action_executed_predictions( processor, partial_tracker, event, fail_on_prediction_errors ) tracker_eval_store.merge_store(action_executed_result) diff --git a/rasa/core/trackers.py b/rasa/core/trackers.py index 0f921f72bf20..2f90ac1bfe7e 100644 --- a/rasa/core/trackers.py +++ b/rasa/core/trackers.py @@ -262,7 +262,7 @@ def init_copy(self) -> "DialogueStateTracker": ) def generate_all_prior_trackers( - self + self, ) -> Generator["DialogueStateTracker", None, None]: """Returns a generator of the previous trackers of this tracker. diff --git a/rasa/core/training/structures.py b/rasa/core/training/structures.py index e0458d9ea90f..bebe9794cf79 100644 --- a/rasa/core/training/structures.py +++ b/rasa/core/training/structures.py @@ -653,7 +653,7 @@ def as_story_string(self) -> Text: @staticmethod def order_steps( - story_steps: List[StoryStep] + story_steps: List[StoryStep], ) -> Tuple[deque, List[Tuple[Text, Text]]]: """Topological sort of the steps returning the ids of the steps.""" @@ -668,7 +668,7 @@ def order_steps( @staticmethod def _group_by_start_checkpoint( - story_steps: List[StoryStep] + story_steps: List[StoryStep], ) -> Dict[Text, List[StoryStep]]: """Returns all the start checkpoint of the steps""" diff --git a/rasa/core/training/visualization.py b/rasa/core/training/visualization.py index 85c7bd84721b..4e4fef34abdf 100644 --- a/rasa/core/training/visualization.py +++ b/rasa/core/training/visualization.py @@ -30,7 +30,7 @@ def __init__(self, nlu_training_data): @staticmethod def _create_reverse_mapping( - data: "TrainingData" + data: "TrainingData", ) -> Dict[Dict[Text, Any], List["Message"]]: """Create a mapping from intent to messages diff --git a/rasa/model.py b/rasa/model.py index 0e443d7fa2c1..ce80e9085142 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -197,7 +197,7 @@ def unpack_model( def get_model_subdirectories( - unpacked_model_path: Text + unpacked_model_path: Text, ) -> Tuple[Optional[Text], Optional[Text]]: """Return paths for Core and NLU model directories, if they exist. If neither directories exist, a `ModelNotFound` exception is raised. diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index 9143638590c4..7145912e67d5 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -197,7 +197,7 @@ def get_unique_labels( def remove_empty_intent_examples( - intent_results: List[IntentEvaluationResult] + intent_results: List[IntentEvaluationResult], ) -> List[IntentEvaluationResult]: """Remove those examples without an intent.""" @@ -215,7 +215,7 @@ def remove_empty_intent_examples( def remove_empty_response_examples( - response_results: List[ResponseSelectionEvaluationResult] + response_results: List[ResponseSelectionEvaluationResult], ) -> List[ResponseSelectionEvaluationResult]: """Remove those examples without a response.""" diff --git a/rasa/nlu/utils/mitie_utils.py b/rasa/nlu/utils/mitie_utils.py index 66416c3b145e..0c2c64338c15 100644 --- a/rasa/nlu/utils/mitie_utils.py +++ b/rasa/nlu/utils/mitie_utils.py @@ -76,7 +76,7 @@ def provide_context(self) -> Dict[Text, Any]: @staticmethod def ensure_proper_language_model( - extractor: Optional["mitie.total_word_feature_extractor"] + extractor: Optional["mitie.total_word_feature_extractor"], ) -> None: if extractor is None: diff --git a/tests/conftest.py b/tests/conftest.py index 8a1bb709b2ca..578c6ef52edc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -69,7 +69,7 @@ async def trained_moodbot_path() -> Text: @pytest.fixture(scope="session") async def unpacked_trained_moodbot_path( - trained_moodbot_path: Text + trained_moodbot_path: Text, ) -> TempDirectoryPath: return get_model(trained_moodbot_path) diff --git a/tests/importers/test_multi_project.py b/tests/importers/test_multi_project.py index fe32d8e86bd8..5b7bb10fdc33 100644 --- a/tests/importers/test_multi_project.py +++ b/tests/importers/test_multi_project.py @@ -83,7 +83,7 @@ def test_load_from_none(input_dict: Dict, tmpdir_factory: TempdirFactory): def test_load_if_subproject_is_more_specific_than_parent( - tmpdir_factory: TempdirFactory + tmpdir_factory: TempdirFactory, ): root = tmpdir_factory.mktemp("Parent Bot") config_path = str(root / "config.yml") From 1d2c79a2525e543627f1c2313471e452ff2cfdca Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 13:44:39 +0100 Subject: [PATCH 42/45] fix variable naming --- rasa/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index ce80e9085142..a2d097713e27 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -89,12 +89,12 @@ def __init__( nlu: `True` if the NLU model should be retrained. core: `True` if the Core model should be retrained. nlg: `True` if the templates in the domain should be updated. - force_trainining: `True` if a training of all parts is forced. + force_training: `True` if a training of all parts is forced. """ self.nlu = nlu self.core = core self.nlg = nlg - self.force_training = force_trainining + self.force_training = force_training def is_training_required(self) -> bool: """Check if anything has to be retrained.""" From 7da43db1067cf1e815165fa924373622d837aa3f Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 13:45:52 +0100 Subject: [PATCH 43/45] Update rasa/model.py Co-Authored-By: Tanja --- rasa/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/model.py b/rasa/model.py index a2d097713e27..86ee32580869 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -427,7 +427,7 @@ def should_retrain( retrain.core = core_merge_failed if not retrain.should_retrain_nlg() and core_merge_failed: - # If merging the Core model failed, we should also retrain NLG + # If moving the Core model failed, we should also retrain NLG retrain.nlg = True if not retrain.should_retrain_nlu(): From db9835663647f9f70df86b9e22f5f2007aeb51fe Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 13:46:15 +0100 Subject: [PATCH 44/45] retrain 'retrain' to 'fingerprint_comparison' --- rasa/model.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/rasa/model.py b/rasa/model.py index a2d097713e27..aba54e5d9fe4 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -399,16 +399,16 @@ def should_retrain( to be retrained or not. """ - retrain = FingerprintComparisonResult() + fingerprint_comparison = FingerprintComparisonResult() if old_model is None or not os.path.exists(old_model): - return retrain + return fingerprint_comparison with unpack_model(old_model) as unpacked: last_fingerprint = fingerprint_from_path(unpacked) old_core, old_nlu = get_model_subdirectories(unpacked) - retrain = FingerprintComparisonResult( + fingerprint_comparison = FingerprintComparisonResult( core=did_section_fingerprint_change( last_fingerprint, new_fingerprint, SECTION_CORE ), @@ -421,20 +421,20 @@ def should_retrain( ) core_merge_failed = False - if not retrain.should_retrain_core(): + if not fingerprint_comparison.should_retrain_core(): target_path = os.path.join(train_path, DEFAULT_CORE_SUBDIRECTORY_NAME) core_merge_failed = not move_model(old_core, target_path) - retrain.core = core_merge_failed + fingerprint_comparison.core = core_merge_failed - if not retrain.should_retrain_nlg() and core_merge_failed: + if not fingerprint_comparison.should_retrain_nlg() and core_merge_failed: # If merging the Core model failed, we should also retrain NLG - retrain.nlg = True + fingerprint_comparison.nlg = True - if not retrain.should_retrain_nlu(): + if not fingerprint_comparison.should_retrain_nlu(): target_path = os.path.join(train_path, "nlu") - retrain.nlu = not move_model(old_nlu, target_path) + fingerprint_comparison.nlu = not move_model(old_nlu, target_path) - return retrain + return fingerprint_comparison def package_model( From 8956846748011465b78a6c7162889493f52e3eb9 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 30 Oct 2019 15:33:11 +0100 Subject: [PATCH 45/45] fix typo in variable name --- rasa/train.py | 4 +--- tests/core/test_model.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/rasa/train.py b/rasa/train.py index 23fbe729a62a..c00a5c571412 100644 --- a/rasa/train.py +++ b/rasa/train.py @@ -165,9 +165,7 @@ async def _train_async_internal( new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) - fingerprint_comparison = FingerprintComparisonResult( - force_trainining=force_training - ) + fingerprint_comparison = FingerprintComparisonResult(force_training=force_training) if not force_training: fingerprint_comparison = model.should_retrain( new_fingerprint, old_model, train_path diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 11216f8a7c4f..e837ced5e929 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -340,7 +340,7 @@ def set_fingerprint(trained_model: Text, fingerprint: Fingerprint) -> Text: @pytest.mark.parametrize( "comparison_result,retrain_all,retrain_core,retrain_nlg,retrain_nlu", [ - (FingerprintComparisonResult(force_trainining=True), True, True, True, True), + (FingerprintComparisonResult(force_training=True), True, True, True, True), ( FingerprintComparisonResult(core=True, nlu=False, nlg=False), True,