From 6dad5f5db62174a9af4f7f880efbabd8ab09382f Mon Sep 17 00:00:00 2001 From: Ankita Katiyar Date: Wed, 5 Jul 2023 15:48:59 +0100 Subject: [PATCH 1/2] Add warning for catch-all patterns Signed-off-by: Ankita Katiyar --- kedro/io/data_catalog.py | 7 +++++++ tests/io/test_data_catalog.py | 8 +++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index b21f90a8cf..b0f6a05986 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -386,6 +386,13 @@ def _get_dataset( self._load_versions.get(data_set_name), self._save_version, ) + if self._specificity(matched_pattern) == 0: + self._logger.warning( + "The dataset '%s' is using the catch-all pattern '%s'", + data_set_name, + matched_pattern, + ) + self.add(data_set_name, data_set) if data_set_name not in self._data_sets: error_msg = f"Dataset '{data_set_name}' not found in the catalog" diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py index 6752eeac04..6d05fe8a8c 100644 --- a/tests/io/test_data_catalog.py +++ b/tests/io/test_data_catalog.py @@ -818,11 +818,17 @@ def test_sorting_order_patterns(self, config_with_dataset_factories_only_pattern ] assert list(catalog._dataset_patterns.keys()) == sorted_keys_expected - def test_default_dataset(self, config_with_dataset_factories_with_default): + def test_default_dataset(self, config_with_dataset_factories_with_default, caplog): """Check that default dataset is used when no other pattern matches""" catalog = DataCatalog.from_config(**config_with_dataset_factories_with_default) assert "jet@planes" not in catalog._data_sets jet_dataset = catalog._get_dataset("jet@planes") + log_record = caplog.records[0] + assert log_record.levelname == "WARNING" + assert ( + "The dataset 'jet@planes' is using the catch-all pattern '{default_dataset}'" + in log_record.message + ) assert isinstance(jet_dataset, CSVDataSet) def test_unmatched_key_error_when_parsing_config( From 28d7ca43c5eb0f83d498e59649a1f7b87674a551 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar Date: Thu, 6 Jul 2023 13:34:06 +0100 Subject: [PATCH 2/2] Update warning message Signed-off-by: Ankita Katiyar --- kedro/io/data_catalog.py | 5 +++-- tests/io/test_data_catalog.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index b0f6a05986..40d079e6ac 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -388,9 +388,10 @@ def _get_dataset( ) if self._specificity(matched_pattern) == 0: self._logger.warning( - "The dataset '%s' is using the catch-all pattern '%s'", - data_set_name, + "Config from the dataset factory pattern '%s' in the catalog will be used to " + "override the default MemoryDataset creation for the dataset '%s'", matched_pattern, + data_set_name, ) self.add(data_set_name, data_set) diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py index 6d05fe8a8c..74858702bf 100644 --- a/tests/io/test_data_catalog.py +++ b/tests/io/test_data_catalog.py @@ -826,8 +826,9 @@ def test_default_dataset(self, config_with_dataset_factories_with_default, caplo log_record = caplog.records[0] assert log_record.levelname == "WARNING" assert ( - "The dataset 'jet@planes' is using the catch-all pattern '{default_dataset}'" - in log_record.message + "Config from the dataset factory pattern '{default_dataset}' " + "in the catalog will be used to override the default " + "MemoryDataset creation for the dataset 'jet@planes'" in log_record.message ) assert isinstance(jet_dataset, CSVDataSet)