From bcca77ee608da62745820a6e1519a962cd52e75d Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Thu, 4 Jul 2024 09:15:33 +0100 Subject: [PATCH] Key completion for dataset access (#3973) * Added key completions example Signed-off-by: Elena Khaustova * Implemented autocompletion by original name Signed-off-by: Elena Khaustova * Added test for key completions Signed-off-by: Elena Khaustova * Updated RELEASE.md Signed-off-by: Elena Khaustova * Added return types Signed-off-by: Elena Khaustova * Added variable types Signed-off-by: Elena Khaustova * Added variable types Signed-off-by: Elena Khaustova --------- Signed-off-by: Elena Khaustova --- RELEASE.md | 2 ++ kedro/io/data_catalog.py | 20 ++++++++++++++------ tests/io/test_data_catalog.py | 24 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index f6b37a2606..512edb019d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -6,6 +6,8 @@ * Updated error message for invalid catalog entries. * Updated error message for catalog entries when the dataset class is not found with hints on how to resolve the issue. * Fixed a bug in the `DataCatalog` `shallow_copy()` method to ensure it returns the type of the used catalog and doesn't cast it to `DataCatalog`. +* Implemented key completion support for accessing datasets in the `DataCatalog`. + ## Breaking changes to the API diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index 7778cfd70f..f9f74f8fbe 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -105,21 +105,23 @@ def __init__( """Return a _FrozenDatasets instance from some datasets collections. Each collection could either be another _FrozenDatasets or a dictionary. """ + self._original_names: set[str] = set() for collection in datasets_collections: if isinstance(collection, _FrozenDatasets): self.__dict__.update(collection.__dict__) + self._original_names.update(collection._original_names) else: # Non-word characters in dataset names are replaced with `__` # for easy access to transcoded/prefixed datasets. - self.__dict__.update( - { - _sub_nonword_chars(dataset_name): dataset - for dataset_name, dataset in collection.items() - } - ) + for dataset_name, dataset in collection.items(): + self.__dict__[_sub_nonword_chars(dataset_name)] = dataset + self._original_names.add(dataset_name) # Don't allow users to add/change attributes on the fly def __setattr__(self, key: str, value: Any) -> None: + if key == "_original_names": + super().__setattr__(key, value) + return msg = "Operation not allowed! " if key in self.__dict__: msg += "Please change datasets through configuration." @@ -127,6 +129,12 @@ def __setattr__(self, key: str, value: Any) -> None: msg += "Please use DataCatalog.add() instead." raise AttributeError(msg) + def _ipython_key_completions_(self) -> list[str]: + return list(self._original_names) + + def __getitem__(self, key: str) -> Any: + return self.__dict__[_sub_nonword_chars(key)] + class DataCatalog: """``DataCatalog`` stores instances of ``AbstractDataset`` implementations diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py index 1deecb7a0f..aaa1fed15d 100644 --- a/tests/io/test_data_catalog.py +++ b/tests/io/test_data_catalog.py @@ -442,6 +442,30 @@ class MyDataCatalog(DataCatalog): copy = data_catalog.shallow_copy() assert isinstance(copy, MyDataCatalog) + def test_key_completions(self, data_catalog_from_config): + """Test catalog.datasets key completions""" + assert isinstance(data_catalog_from_config.datasets["boats"], CSVDataset) + assert isinstance(data_catalog_from_config.datasets["cars"], CSVDataset) + data_catalog_from_config.add_feed_dict( + { + "params:model_options": [1, 2, 4], + "params:model_options.random_state": [0, 42, 67], + } + ) + assert isinstance( + data_catalog_from_config.datasets["params:model_options"], MemoryDataset + ) + assert isinstance( + data_catalog_from_config.datasets["params__model_options.random_state"], + MemoryDataset, + ) + assert set(data_catalog_from_config.datasets._ipython_key_completions_()) == { + "boats", + "cars", + "params:model_options", + "params:model_options.random_state", + } + class TestDataCatalogFromConfig: def test_from_sane_config(self, data_catalog_from_config, dummy_dataframe):