From cd8cca82dae82fddbf9a485dab7b122bf24f8618 Mon Sep 17 00:00:00 2001 From: Ignacio Paricio <54770971+ignacioparicio@users.noreply.github.com> Date: Thu, 12 Aug 2021 15:18:24 +0200 Subject: [PATCH] [KED-2768] Fix kedro install flow (#1203) --- RELEASE.md | 1 + kedro/framework/cli/jupyter.py | 5 + kedro/framework/project/__init__.py | 25 ++--- kedro/framework/session/session.py | 18 ++-- tests/extras/extensions/test_ipython.py | 3 +- tests/framework/context/test_context.py | 1 - .../project/test_pipeline_registry.py | 6 +- tests/framework/session/conftest.py | 26 +++-- tests/framework/session/test_session.py | 95 +++++++++++-------- tests/framework/test_startup.py | 4 +- 10 files changed, 109 insertions(+), 75 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 1997f79cce..e9f9cefc2d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -9,6 +9,7 @@ ## Bug fixes and other changes * Bumped minimum required `fsspec` version to 2021.04. +* Fixed the `kedro install` and `kedro build-reqs` flows when uninstalled dependencies are present in a project's `settings.py`, `context.py` or `hooks.py` ([Issue #829](https://github.com/quantumblacklabs/kedro/issues/829)). ## Minor breaking changes to the API diff --git a/kedro/framework/cli/jupyter.py b/kedro/framework/cli/jupyter.py index 367b1fbe07..3e45165d46 100644 --- a/kedro/framework/cli/jupyter.py +++ b/kedro/framework/cli/jupyter.py @@ -54,6 +54,7 @@ load_entry_points, python_call, ) +from kedro.framework.project import validate_settings from kedro.framework.startup import ProjectMetadata JUPYTER_IP_HELP = "IP address of the Jupyter server." @@ -142,6 +143,8 @@ def jupyter_notebook( """Open Jupyter Notebook with project specific variables loaded.""" _check_module_importable("jupyter_core") + validate_settings() + if "-h" not in args and "--help" not in args: ipython_message(all_kernels) @@ -179,6 +182,8 @@ def jupyter_lab( """Open Jupyter Lab with project specific variables loaded.""" _check_module_importable("jupyter_core") + validate_settings() + if "-h" not in args and "--help" not in args: ipython_message(all_kernels) diff --git a/kedro/framework/project/__init__.py b/kedro/framework/project/__init__.py index a1f8f9a87d..77dd6d1e29 100644 --- a/kedro/framework/project/__init__.py +++ b/kedro/framework/project/__init__.py @@ -203,20 +203,11 @@ def _clear(self, pipelines_module: str) -> None: pipelines = _ProjectPipelines() -def _validate_module(settings_module): - """Eagerly validate that the module is importable. - This ensures that the settings module is syntactically - correct so that any import errors are surfaced early. - """ - importlib.import_module(settings_module) - - def configure_project(package_name: str): """Configure a Kedro project by populating its settings with values defined in user's settings.py and pipeline_registry.py. """ settings_module = f"{package_name}.settings" - _validate_module(settings_module) settings.configure(settings_module) # set up all hooks so we can discover all pipelines @@ -228,7 +219,19 @@ def configure_project(package_name: str): pipelines.configure(pipelines_module) # Once the project is successfully configured once, store PACKAGE_NAME as a - # global variable to make it easily accessible. This is used by ParallelRunner on - # Windows, as package_name is required every time a new subprocess is spawned. + # global variable to make it easily accessible. This is used by validate_settings() + # below, and also by ParallelRunner on Windows, as package_name is required every + # time a new subprocess is spawned. global PACKAGE_NAME PACKAGE_NAME = package_name + + +def validate_settings(): + """Eagerly validate that the settings module is importable. This is desirable to + surface any syntax or import errors early. In particular, without eagerly importing + the settings module, dynaconf would silence any import error (e.g. missing + dependency, missing/mislabelled pipeline), and users would instead get a cryptic + error message ``Expected an instance of `ConfigLoader`, got `NoneType` instead``. + More info on the dynaconf issue: https://github.com/rochacbruno/dynaconf/issues/460 + """ + importlib.import_module(f"{PACKAGE_NAME}.settings") diff --git a/kedro/framework/session/session.py b/kedro/framework/session/session.py index aa2700fb0e..d80c177acf 100644 --- a/kedro/framework/session/session.py +++ b/kedro/framework/session/session.py @@ -27,7 +27,6 @@ # limitations under the License. # pylint: disable=invalid-name,global-statement """This module implements Kedro session responsible for project lifecycle.""" - import logging import logging.config import os @@ -46,7 +45,12 @@ _convert_paths_to_absolute_posix, ) from kedro.framework.hooks import get_hook_manager -from kedro.framework.project import configure_project, pipelines, settings +from kedro.framework.project import ( + configure_project, + pipelines, + settings, + validate_settings, +) from kedro.framework.session.store import BaseSessionStore from kedro.io.core import generate_timestamp from kedro.runner import AbstractRunner, SequentialRunner @@ -177,13 +181,15 @@ def create( # pylint: disable=too-many-arguments A new ``KedroSession`` instance. """ - # this is to make sure that for workflows that manually create session - # without going through one of our known entrypoints, e.g. some plugins like kedro-airflow, - # the project is still properly configured. This is for backward compatibility - # and should be removed in 0.18. + # This is to make sure that for workflows that manually create session + # without going through one of our known entrypoints, e.g. some plugins + # like kedro-airflow, the project is still properly configured. This + # is for backward compatibility and should be removed in 0.18. if package_name is not None: configure_project(package_name) + validate_settings() + session = cls( package_name=package_name, project_path=project_path, diff --git a/tests/extras/extensions/test_ipython.py b/tests/extras/extensions/test_ipython.py index 6f969fde19..03d3c0f3fe 100644 --- a/tests/extras/extensions/test_ipython.py +++ b/tests/extras/extensions/test_ipython.py @@ -101,6 +101,7 @@ def test_load_kedro_objects(self, tmp_path, mocker): project_path=tmp_path, ) mocker.patch("kedro.framework.session.session.configure_project") + mocker.patch("kedro.framework.session.session.validate_settings") mocker.patch( "kedro.framework.startup.bootstrap_project", return_value=fake_metadata, @@ -125,7 +126,7 @@ def test_load_kedro_objects(self, tmp_path, mocker): "session": mocker.ANY, } ) - assert mock_register_line_magic.call_count == 1 + mock_register_line_magic.assert_called_once() def test_load_kedro_objects_extra_args(self, tmp_path, mocker): fake_metadata = ProjectMetadata( diff --git a/tests/framework/context/test_context.py b/tests/framework/context/test_context.py index c868affa51..3e71ff5544 100644 --- a/tests/framework/context/test_context.py +++ b/tests/framework/context/test_context.py @@ -282,7 +282,6 @@ def mocked_logging(mocker): def dummy_context( tmp_path, prepare_project_dir, env, extra_params, mocker ): # pylint: disable=unused-argument - mocker.patch("kedro.framework.project._validate_module") configure_project(MOCK_PACKAGE_NAME) context = KedroContext( MOCK_PACKAGE_NAME, str(tmp_path), env=env, extra_params=extra_params diff --git a/tests/framework/project/test_pipeline_registry.py b/tests/framework/project/test_pipeline_registry.py index 933df7b800..bd7e2bc5a7 100644 --- a/tests/framework/project/test_pipeline_registry.py +++ b/tests/framework/project/test_pipeline_registry.py @@ -78,17 +78,15 @@ def register_pipelines(): def test_pipelines_after_configuring_project_shows_updated_values( - mock_package_name_with_pipelines_file, mocker + mock_package_name_with_pipelines_file, ): - mocker.patch("kedro.framework.project._validate_module") configure_project(mock_package_name_with_pipelines_file) assert isinstance(pipelines["new_pipeline"], Pipeline) def test_configure_project_should_not_raise_for_unimportable_pipelines( - mock_package_name_with_unimportable_pipelines_file, mocker + mock_package_name_with_unimportable_pipelines_file, ): - mocker.patch("kedro.framework.project._validate_module") # configure_project should not raise error for unimportable pipelines # since pipelines loading is lazy configure_project(mock_package_name_with_unimportable_pipelines_file) diff --git a/tests/framework/session/conftest.py b/tests/framework/session/conftest.py index f20637244b..1e12087418 100644 --- a/tests/framework/session/conftest.py +++ b/tests/framework/session/conftest.py @@ -25,6 +25,7 @@ # # See the License for the specific language governing permissions and # limitations under the License. +import importlib import logging from logging.handlers import QueueHandler, QueueListener from multiprocessing import Queue @@ -50,10 +51,12 @@ logger = logging.getLogger(__name__) +MOCK_PACKAGE_NAME = "fake_package" + @pytest.fixture def mock_package_name() -> str: - return "mock_package_name" + return MOCK_PACKAGE_NAME @pytest.fixture @@ -402,14 +405,6 @@ def register_catalog( ) -@pytest.fixture(autouse=True) -def patched_validate_module(mocker): - """Patching this so KedroSession could be created for testing purpose - since KedroSession.create is still calling configure_project at the moment - """ - mocker.patch("kedro.framework.project._validate_module") - - @pytest.fixture def project_hooks(): """A set of project hook implementations that log to stdout whenever it is invoked.""" @@ -464,3 +459,16 @@ def mock_session( return KedroSession.create( mock_package_name, tmp_path, extra_params={"params:key": "value"} ) + + +@pytest.fixture(autouse=True) +def mock_import(mocker): + # KedroSession eagerly validates that a project's settings.py is correct by + # importing it. settings.py does not actually exists as part of this test suite, + # so its import is mocked. + settings_module = f"{MOCK_PACKAGE_NAME}.settings" + mocker.patch( + "importlib.import_module", + wraps=importlib.import_module, + side_effect=(lambda x: None if x == settings_module else mocker.DEFAULT), + ) diff --git a/tests/framework/session/test_session.py b/tests/framework/session/test_session.py index 73446b55ca..749117f8a5 100644 --- a/tests/framework/session/test_session.py +++ b/tests/framework/session/test_session.py @@ -49,7 +49,6 @@ from kedro.framework.session import KedroSession, get_current_session from kedro.framework.session.store import BaseSessionStore, ShelveStore -_FAKE_PACKAGE_NAME = "fake_package" _FAKE_PROJECT_NAME = "fake_project" _FAKE_PIPELINE_NAME = "fake_pipeline" @@ -208,7 +207,7 @@ def local_logging_config(): @pytest.fixture -def fake_project(mocker, tmp_path, local_logging_config): +def fake_project(tmp_path, local_logging_config, mock_package_name): fake_project_dir = Path(tmp_path) / "fake_project" (fake_project_dir / "src").mkdir(parents=True) @@ -218,7 +217,7 @@ def fake_project(mocker, tmp_path, local_logging_config): "kedro": { "project_version": kedro_version, "project_name": _FAKE_PROJECT_NAME, - "package_name": _FAKE_PACKAGE_NAME, + "package_name": mock_package_name, } } } @@ -229,8 +228,6 @@ def fake_project(mocker, tmp_path, local_logging_config): env_logging.parent.mkdir(parents=True) env_logging.write_text(json.dumps(local_logging_config)) (fake_project_dir / "conf" / "local").mkdir() - - mocker.patch("kedro.framework.project._validate_module") return fake_project_dir @@ -251,13 +248,14 @@ def test_create( fake_project, mock_context_class, fake_session_id, + mock_package_name, mocker, env, extra_params, ): mock_click_ctx = mocker.patch("click.get_current_context").return_value session = KedroSession.create( - _FAKE_PACKAGE_NAME, fake_project, env=env, extra_params=extra_params + mock_package_name, fake_project, env=env, extra_params=extra_params ) expected_cli_data = { @@ -269,7 +267,7 @@ def test_create( expected_store = { "project_path": fake_project, "session_id": fake_session_id, - "package_name": _FAKE_PACKAGE_NAME, + "package_name": mock_package_name, "cli": expected_cli_data, } if env: @@ -281,7 +279,7 @@ def test_create( # called for logging setup mock_context_class.assert_called_once_with( project_path=fake_project, - package_name=_FAKE_PACKAGE_NAME, + package_name=mock_package_name, env=env, extra_params=extra_params, ) @@ -290,10 +288,15 @@ def test_create( @pytest.mark.usefixtures("mock_settings_context_class") def test_create_no_env_extra_params( - self, fake_project, mock_context_class, fake_session_id, mocker + self, + fake_project, + mock_context_class, + fake_session_id, + mock_package_name, + mocker, ): mock_click_ctx = mocker.patch("click.get_current_context").return_value - session = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + session = KedroSession.create(mock_package_name, fake_project) expected_cli_data = { "args": mock_click_ctx.args, @@ -304,14 +307,14 @@ def test_create_no_env_extra_params( expected_store = { "project_path": fake_project, "session_id": fake_session_id, - "package_name": _FAKE_PACKAGE_NAME, + "package_name": mock_package_name, "cli": expected_cli_data, } assert session.store == expected_store mock_context_class.assert_called_once_with( project_path=fake_project, - package_name=_FAKE_PACKAGE_NAME, + package_name=mock_package_name, env=None, extra_params=None, ) @@ -319,11 +322,13 @@ def test_create_no_env_extra_params( assert session.load_context() is mock_context_class.return_value @pytest.mark.usefixtures("mock_settings") - def test_load_context_with_envvar(self, fake_project, monkeypatch, mocker): + def test_load_context_with_envvar( + self, fake_project, monkeypatch, mock_package_name, mocker + ): mocker.patch("kedro.config.config.ConfigLoader.get") monkeypatch.setenv("KEDRO_ENV", "my_fake_env") - session = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + session = KedroSession.create(mock_package_name, fake_project) result = session.load_context() assert isinstance(result, KedroContext) @@ -331,16 +336,18 @@ def test_load_context_with_envvar(self, fake_project, monkeypatch, mocker): assert result.env == "my_fake_env" @pytest.mark.usefixtures("mock_settings_custom_context_class") - def test_load_context_custom_context_class(self, fake_project): - session = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + def test_load_context_custom_context_class(self, fake_project, mock_package_name): + session = KedroSession.create(mock_package_name, fake_project) result = session.load_context() assert isinstance(result, KedroContext) assert result.__class__.__name__ == "MyContext" @pytest.mark.usefixtures("mock_settings_context_class") - def test_default_store(self, fake_project, fake_session_id, caplog): - session = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + def test_default_store( + self, fake_project, fake_session_id, caplog, mock_package_name + ): + session = KedroSession.create(mock_package_name, fake_project) assert isinstance(session.store, dict) assert session._store.__class__ is BaseSessionStore assert session._store._path == (fake_project / "sessions").as_posix() @@ -358,10 +365,12 @@ def test_default_store(self, fake_project, fake_session_id, caplog): assert actual_log_messages == expected_log_messages @pytest.mark.usefixtures("mock_settings_shelve_session_store") - def test_shelve_store(self, fake_project, fake_session_id, caplog, mocker): + def test_shelve_store( + self, fake_project, fake_session_id, caplog, mock_package_name, mocker + ): mocker.patch("pathlib.Path.is_file", return_value=True) shelve_location = fake_project / "nested" / "sessions" - other = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + other = KedroSession.create(mock_package_name, fake_project) assert other._store.__class__ is ShelveStore assert other._store._path == shelve_location.as_posix() assert other._store._location == shelve_location / fake_session_id / "store" @@ -391,25 +400,26 @@ def test_wrong_store_type(self, mock_settings_file_bad_session_store_class): assert mock_settings.SESSION_STORE_CLASS @pytest.mark.usefixtures("mock_settings_bad_session_store_args") - def test_wrong_store_args(self, fake_project): + def test_wrong_store_args(self, fake_project, mock_package_name): classpath = f"{BaseSessionStore.__module__}.{BaseSessionStore.__qualname__}" pattern = ( f"Store config must only contain arguments valid for " f"the constructor of `{classpath}`." ) with pytest.raises(ValueError, match=re.escape(pattern)): - KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + KedroSession.create(mock_package_name, fake_project) def test_store_uncaught_error( self, fake_project, fake_session_id, mock_settings_uncaught_session_store_exception, + mock_package_name, ): classpath = f"{BaseSessionStore.__module__}.{BaseSessionStore.__qualname__}" pattern = f"Failed to instantiate session store of type `{classpath}`." with pytest.raises(ValueError, match=re.escape(pattern)): - KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + KedroSession.create(mock_package_name, fake_project) mock_settings_uncaught_session_store_exception.assert_called_once_with( path="path", session_id=fake_session_id @@ -419,7 +429,7 @@ def test_store_uncaught_error( @pytest.mark.parametrize("fake_git_status", ["dirty", ""]) @pytest.mark.parametrize("fake_commit_hash", ["fake_commit_hash"]) def test_git_describe( - self, fake_project, fake_commit_hash, fake_git_status, mocker + self, fake_project, fake_commit_hash, fake_git_status, mock_package_name, mocker ): """Test that git information is added to the session store""" mocker.patch( @@ -427,7 +437,7 @@ def test_git_describe( side_effect=[fake_commit_hash.encode(), fake_git_status.encode()], ) - session = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + session = KedroSession.create(mock_package_name, fake_project) expected_git_info = { "commit_sha": fake_commit_hash, "dirty": bool(fake_git_status), @@ -443,12 +453,14 @@ def test_git_describe( NotADirectoryError, ], ) - def test_git_describe_error(self, fake_project, exception, mocker, caplog): + def test_git_describe_error( + self, fake_project, exception, mock_package_name, mocker, caplog + ): """Test that git information is not added to the session store if call to git fails """ mocker.patch("subprocess.check_output", side_effect=exception) - session = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + session = KedroSession.create(mock_package_name, fake_project) assert "git" not in session.store expected_log_messages = [f"Unable to git describe {fake_project}"] @@ -460,11 +472,11 @@ def test_git_describe_error(self, fake_project, exception, mocker, caplog): assert actual_log_messages == expected_log_messages @pytest.mark.usefixtures("mock_settings") - def test_log_error(self, fake_project): + def test_log_error(self, fake_project, mock_package_name): """Test logging the error by the session""" # test that the error is not swallowed by the session with pytest.raises(FakeException), KedroSession.create( - _FAKE_PACKAGE_NAME, fake_project + mock_package_name, fake_project ) as session: raise FakeException @@ -476,17 +488,16 @@ def test_log_error(self, fake_project): ) @pytest.mark.usefixtures("mock_settings") - def test_get_current_session(self, fake_project, mocker): + def test_get_current_session(self, fake_project, mock_package_name): assert get_current_session(silent=True) is None # no sessions yet pattern = "There is no active Kedro session" with pytest.raises(RuntimeError, match=pattern): get_current_session() - mocker.patch("kedro.framework.project._validate_module") - configure_project(_FAKE_PACKAGE_NAME) - session1 = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) - session2 = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) + configure_project(mock_package_name) + session1 = KedroSession.create(mock_package_name, fake_project) + session2 = KedroSession.create(mock_package_name, fake_project) with session1: assert get_current_session() is session1 @@ -511,6 +522,7 @@ def test_run( fake_session_id, fake_pipeline_name, mock_context_class, + mock_package_name, mocker, ): """Test running the project via the session""" @@ -530,7 +542,7 @@ def test_run( mock_runner = mocker.Mock() mock_pipeline = mock_context._filter_pipeline.return_value - with KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) as session: + with KedroSession.create(mock_package_name, fake_project) as session: session.run(runner=mock_runner, pipeline_name=fake_pipeline_name) record_data = { @@ -563,7 +575,7 @@ def test_run( ) @pytest.mark.usefixtures("mock_settings_context_class") - def test_run_non_existent_pipeline(self, fake_project, mocker): + def test_run_non_existent_pipeline(self, fake_project, mock_package_name, mocker): mock_runner = mocker.Mock() pattern = ( @@ -572,7 +584,7 @@ def test_run_non_existent_pipeline(self, fake_project, mocker): "by the 'register_pipelines' function." ) with pytest.raises(KedroContextError, match=re.escape(pattern)): - with KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) as session: + with KedroSession.create(mock_package_name, fake_project) as session: session.run(runner=mock_runner, pipeline_name="doesnotexist") @pytest.mark.usefixtures("mock_settings_context_class") @@ -583,6 +595,7 @@ def test_run_exception( # pylint: disable=too-many-locals fake_session_id, fake_pipeline_name, mock_context_class, + mock_package_name, mocker, ): """Test exception being raise during the run""" @@ -604,7 +617,7 @@ def test_run_exception( # pylint: disable=too-many-locals mock_pipeline = mock_context._filter_pipeline.return_value with pytest.raises(FakeException), KedroSession.create( - _FAKE_PACKAGE_NAME, fake_project + mock_package_name, fake_project ) as session: session.run(runner=mock_runner, pipeline_name=fake_pipeline_name) @@ -640,8 +653,10 @@ def test_run_exception( # pylint: disable=too-many-locals @pytest.mark.usefixtures("mock_settings") -def test_setup_logging_using_absolute_path(fake_project, mocked_logging): - KedroSession.create(_FAKE_PACKAGE_NAME, fake_project) +def test_setup_logging_using_absolute_path( + fake_project, mocked_logging, mock_package_name +): + KedroSession.create(mock_package_name, fake_project) mocked_logging.assert_called_once() call_args = mocked_logging.call_args[0][0] diff --git a/tests/framework/test_startup.py b/tests/framework/test_startup.py index e731be5757..c88f48a8c0 100644 --- a/tests/framework/test_startup.py +++ b/tests/framework/test_startup.py @@ -241,10 +241,8 @@ def test_non_existent_source_path(self, tmp_path): class TestBootstrapProject: - def test_bootstrap_project(self, mocker, monkeypatch, tmp_path): + def test_bootstrap_project(self, monkeypatch, tmp_path): monkeypatch.delenv("PYTHONPATH", raising=False) - # assume settings.py is okay - mocker.patch("kedro.framework.project._validate_module") pyproject_toml_payload = { "tool": { "kedro": {