Skip to content

Commit

Permalink
Key completion for dataset access (#3973)
Browse files Browse the repository at this point in the history
* Added key completions example

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Implemented autocompletion by original name

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added test for key completions

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Updated RELEASE.md

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added return types

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added variable types

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

* Added variable types

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>

---------

Signed-off-by: Elena Khaustova <ymax70rus@gmail.com>
  • Loading branch information
ElenaKhaustova committed Jul 4, 2024
1 parent c269cde commit bcca77e
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 6 deletions.
2 changes: 2 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* Updated error message for invalid catalog entries.
* Updated error message for catalog entries when the dataset class is not found with hints on how to resolve the issue.
* Fixed a bug in the `DataCatalog` `shallow_copy()` method to ensure it returns the type of the used catalog and doesn't cast it to `DataCatalog`.
* Implemented key completion support for accessing datasets in the `DataCatalog`.


## Breaking changes to the API

Expand Down
20 changes: 14 additions & 6 deletions kedro/io/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,36 @@ def __init__(
"""Return a _FrozenDatasets instance from some datasets collections.
Each collection could either be another _FrozenDatasets or a dictionary.
"""
self._original_names: set[str] = set()
for collection in datasets_collections:
if isinstance(collection, _FrozenDatasets):
self.__dict__.update(collection.__dict__)
self._original_names.update(collection._original_names)
else:
# Non-word characters in dataset names are replaced with `__`
# for easy access to transcoded/prefixed datasets.
self.__dict__.update(
{
_sub_nonword_chars(dataset_name): dataset
for dataset_name, dataset in collection.items()
}
)
for dataset_name, dataset in collection.items():
self.__dict__[_sub_nonword_chars(dataset_name)] = dataset
self._original_names.add(dataset_name)

# Don't allow users to add/change attributes on the fly
def __setattr__(self, key: str, value: Any) -> None:
if key == "_original_names":
super().__setattr__(key, value)
return
msg = "Operation not allowed! "
if key in self.__dict__:
msg += "Please change datasets through configuration."
else:
msg += "Please use DataCatalog.add() instead."
raise AttributeError(msg)

def _ipython_key_completions_(self) -> list[str]:
return list(self._original_names)

def __getitem__(self, key: str) -> Any:
return self.__dict__[_sub_nonword_chars(key)]


class DataCatalog:
"""``DataCatalog`` stores instances of ``AbstractDataset`` implementations
Expand Down
24 changes: 24 additions & 0 deletions tests/io/test_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,30 @@ class MyDataCatalog(DataCatalog):
copy = data_catalog.shallow_copy()
assert isinstance(copy, MyDataCatalog)

def test_key_completions(self, data_catalog_from_config):
"""Test catalog.datasets key completions"""
assert isinstance(data_catalog_from_config.datasets["boats"], CSVDataset)
assert isinstance(data_catalog_from_config.datasets["cars"], CSVDataset)
data_catalog_from_config.add_feed_dict(
{
"params:model_options": [1, 2, 4],
"params:model_options.random_state": [0, 42, 67],
}
)
assert isinstance(
data_catalog_from_config.datasets["params:model_options"], MemoryDataset
)
assert isinstance(
data_catalog_from_config.datasets["params__model_options.random_state"],
MemoryDataset,
)
assert set(data_catalog_from_config.datasets._ipython_key_completions_()) == {
"boats",
"cars",
"params:model_options",
"params:model_options.random_state",
}


class TestDataCatalogFromConfig:
def test_from_sane_config(self, data_catalog_from_config, dummy_dataframe):
Expand Down

0 comments on commit bcca77e

Please sign in to comment.