From cbd73ee05f24e1d8b740dd6b8733aca57dc097ef Mon Sep 17 00:00:00 2001 From: Elena Khaustova Date: Mon, 15 Jul 2024 15:23:52 +0100 Subject: [PATCH 1/6] Updated docstring text Signed-off-by: Elena Khaustova --- kedro/io/data_catalog.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index 6c0c87f522..713961acaa 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -681,13 +681,15 @@ def add_all( self.add(name, dataset, replace) def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> None: - """Adds instances of ``MemoryDataset``, containing the data provided - through feed_dict. + """This function adds datasets to the ``DataCatalog`` using the data provided through the `feed_dict`. + + `feed_dict` dictionary key is used as a dataset name, and a value is used to create an instance of + ``MemoryDataset`` before adding to the ``DataCatalog`` for all the value types except of ``AbstractDataset``. + In the last case, the ``AbstractDataset`` is added as it is. Args: - feed_dict: A feed dict with data to be added in memory. - replace: Specifies whether to replace an existing dataset - with the same name is allowed. + feed_dict: A feed dict with data to be added to the ``DataCatalog``. + replace: Specifies whether to replace an existing dataset with the same name in the ``DataCatalog``. Example: :: @@ -698,12 +700,12 @@ def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> Non >>> 'col2': [4, 5], >>> 'col3': [5, 6]}) >>> - >>> io = DataCatalog() - >>> io.add_feed_dict({ + >>> catalog = DataCatalog() + >>> catalog.add_feed_dict({ >>> 'data': df >>> }, replace=True) >>> - >>> assert io.load("data").equals(df) + >>> assert catalog.load("data").equals(df) """ for dataset_name in feed_dict: if isinstance(feed_dict[dataset_name], AbstractDataset): From 0e6e60bd6de1425a781de3da1791774d5a57f7df Mon Sep 17 00:00:00 2001 From: Elena Khaustova Date: Mon, 15 Jul 2024 15:37:54 +0100 Subject: [PATCH 2/6] Updated example Signed-off-by: Elena Khaustova --- kedro/io/data_catalog.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index 713961acaa..b671844e6b 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -694,18 +694,25 @@ def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> Non Example: :: + >>> from kedro_datasets.pandas import CSVDataset >>> import pandas as pd >>> - >>> df = pd.DataFrame({'col1': [1, 2], - >>> 'col2': [4, 5], - >>> 'col3': [5, 6]}) + >>> df = pd.DataFrame({"col1": [1, 2], + >>> "col2": [4, 5], + >>> "col3": [5, 6]}) >>> >>> catalog = DataCatalog() >>> catalog.add_feed_dict({ - >>> 'data': df + >>> "data_df": df >>> }, replace=True) >>> - >>> assert catalog.load("data").equals(df) + >>> assert catalog.load("data_df").equals(df) + >>> + >>> csv_dataset = CSVDataset(filepath="test.csv") + >>> csv_dataset.save(df) + >>> catalog.add_feed_dict({"data_csv_dataset": csv_dataset}) + >>> + >>> assert catalog.load("data_csv_dataset").equals(df) """ for dataset_name in feed_dict: if isinstance(feed_dict[dataset_name], AbstractDataset): From d0420f1f3bfe2a9700e6b67430244159e81b746e Mon Sep 17 00:00:00 2001 From: Elena Khaustova Date: Mon, 15 Jul 2024 15:52:37 +0100 Subject: [PATCH 3/6] Added alias Signed-off-by: Elena Khaustova --- kedro/io/data_catalog.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index b671844e6b..9a6ccedf2f 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -687,6 +687,9 @@ def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> Non ``MemoryDataset`` before adding to the ``DataCatalog`` for all the value types except of ``AbstractDataset``. In the last case, the ``AbstractDataset`` is added as it is. + Has `from_dict` alias, so it can be called using `from_dict` name instead. + Will be fully renamed to `from_dict` in the `0.20.0` version. + Args: feed_dict: A feed dict with data to be added to the ``DataCatalog``. replace: Specifies whether to replace an existing dataset with the same name in the ``DataCatalog``. @@ -722,6 +725,9 @@ def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> Non self.add(dataset_name, dataset, replace) + # Alias for add_feed_dict method + from_dict = add_feed_dict + def list(self, regex_search: str | None = None) -> list[str]: """ List of all dataset names registered in the catalog. From ee563c112ac4cba0557da247e45ccf9a5fa23590 Mon Sep 17 00:00:00 2001 From: Elena Khaustova Date: Tue, 16 Jul 2024 10:26:47 +0100 Subject: [PATCH 4/6] Removed alias Signed-off-by: Elena Khaustova --- kedro/io/data_catalog.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index 9a6ccedf2f..b671844e6b 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -687,9 +687,6 @@ def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> Non ``MemoryDataset`` before adding to the ``DataCatalog`` for all the value types except of ``AbstractDataset``. In the last case, the ``AbstractDataset`` is added as it is. - Has `from_dict` alias, so it can be called using `from_dict` name instead. - Will be fully renamed to `from_dict` in the `0.20.0` version. - Args: feed_dict: A feed dict with data to be added to the ``DataCatalog``. replace: Specifies whether to replace an existing dataset with the same name in the ``DataCatalog``. @@ -725,9 +722,6 @@ def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> Non self.add(dataset_name, dataset, replace) - # Alias for add_feed_dict method - from_dict = add_feed_dict - def list(self, regex_search: str | None = None) -> list[str]: """ List of all dataset names registered in the catalog. From dada3d49e20186778de30269d70de88666ba3fe7 Mon Sep 17 00:00:00 2001 From: Elena Khaustova Date: Tue, 16 Jul 2024 10:30:08 +0100 Subject: [PATCH 5/6] Applied suggested changes Signed-off-by: Elena Khaustova --- kedro/io/data_catalog.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index b671844e6b..4556791765 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -681,11 +681,11 @@ def add_all( self.add(name, dataset, replace) def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> None: - """This function adds datasets to the ``DataCatalog`` using the data provided through the `feed_dict`. + """Add datasets to the ``DataCatalog`` using the data provided through the `feed_dict`. - `feed_dict` dictionary key is used as a dataset name, and a value is used to create an instance of - ``MemoryDataset`` before adding to the ``DataCatalog`` for all the value types except of ``AbstractDataset``. - In the last case, the ``AbstractDataset`` is added as it is. + `feed_dict` keys are used as dataset names, and values can either be raw data or instances of + ``AbstractDataset``. In the former case, instances of ``MemoryDataset`` are automatically created before adding + to the ``DataCatalog``. Args: feed_dict: A feed dict with data to be added to the ``DataCatalog``. From 390dfd14bee24256e187a644ab667473cb79b1c0 Mon Sep 17 00:00:00 2001 From: Elena Khaustova Date: Tue, 16 Jul 2024 10:57:22 +0100 Subject: [PATCH 6/6] Updated docstring based on comments Signed-off-by: Elena Khaustova --- kedro/io/data_catalog.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kedro/io/data_catalog.py b/kedro/io/data_catalog.py index 4556791765..465d7ae09e 100644 --- a/kedro/io/data_catalog.py +++ b/kedro/io/data_catalog.py @@ -683,12 +683,13 @@ def add_all( def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> None: """Add datasets to the ``DataCatalog`` using the data provided through the `feed_dict`. - `feed_dict` keys are used as dataset names, and values can either be raw data or instances of - ``AbstractDataset``. In the former case, instances of ``MemoryDataset`` are automatically created before adding - to the ``DataCatalog``. + `feed_dict` is a dictionary where the keys represent dataset names and the values can either be raw data or + Kedro datasets - instances of classes that inherit from ``AbstractDataset``. If raw data is provided, + it will be automatically wrapped in a ``MemoryDataset`` before being added to the ``DataCatalog``. Args: - feed_dict: A feed dict with data to be added to the ``DataCatalog``. + feed_dict: A dictionary with data to be added to the ``DataCatalog``. Keys are dataset names and + values can be raw data or instances of classes that inherit from ``AbstractDataset``. replace: Specifies whether to replace an existing dataset with the same name in the ``DataCatalog``. Example: