kedro-org · ElenaKhaustova · Jul 18, 2024 · Jul 3, 2024 · Jul 3, 2024 · Jul 3, 2024
@@ -7,7 +7,7 @@
 * Updated error message for catalog entries when the dataset class is not found with hints on how to resolve the issue.
 * Fixed a bug in the `DataCatalog` `shallow_copy()` method to ensure it returns the type of the used catalog and doesn't cast it to `DataCatalog`.
 * Implemented key completion support for accessing datasets in the `DataCatalog`.
-
+* Implemented dataset and `DataCatalog` pretty printing.
 
 ## Breaking changes to the API
 

diff --git a/kedro/io/cached_dataset.py b/kedro/io/cached_dataset.py
@@ -94,8 +94,8 @@ def _from_config(config: dict, version: Version | None) -> AbstractDataset:
 
     def _describe(self) -> dict[str, Any]:
         return {
-            "dataset": self._dataset._describe(),
-            "cache": self._cache._describe(),
+            "dataset": self._dataset._pretty_repr(self._dataset._describe()),
+            "cache": self._cache._pretty_repr(self._cache._describe()),
         }
 
     def _load(self) -> Any:

diff --git a/kedro/io/core.py b/kedro/io/core.py
@@ -7,7 +7,9 @@
 import copy
 import logging
 import os
+import pprint
 import re
+import sys
 import warnings
 from collections import namedtuple
 from datetime import datetime, timezone
@@ -227,32 +229,23 @@ def save(self, data: _DI) -> None:
             message = f"Failed while saving data to data set {str(self)}.\n{str(exc)}"
             raise DatasetError(message) from exc
 
-    def __str__(self) -> str:
-        def _to_str(obj: Any, is_root: bool = False) -> str:
-            """Returns a string representation where
-            1. The root level (i.e. the Dataset.__init__ arguments) are
-            formatted like Dataset(key=value).
-            2. Dictionaries have the keys alphabetically sorted recursively.
-            3. None values are not shown.
-            """
-
-            fmt = "{}={}" if is_root else "'{}': {}"  # 1
-
-            if isinstance(obj, dict):
-                sorted_dict = sorted(obj.items(), key=lambda pair: str(pair[0]))  # 2
-
-                text = ", ".join(
-                    fmt.format(key, _to_str(value))  # 2
-                    for key, value in sorted_dict
-                    if value is not None  # 3
+    def _pretty_repr(self, object_description: dict[str, Any]) -> str:
+        str_keys = []
+        for arg_name, arg_descr in object_description.items():
+            if arg_descr is not None:
+                descr = pprint.pformat(
+                    arg_descr,
+                    sort_dicts=False,
+                    compact=True,
+                    depth=2,
+                    width=sys.maxsize,
                 )
+                str_keys.append(f"{arg_name}={descr}")
 
-                return text if is_root else "{" + text + "}"  # 1
-
-            # not a dictionary
-            return str(obj)
+        return f"{type(self).__module__}.{type(self).__name__}({', '.join(str_keys)})"
 
-        return f"{type(self).__name__}({_to_str(self._describe(), True)})"
+    def __repr__(self) -> str:
+        return self._pretty_repr(self._describe())
 
     @abc.abstractmethod
     def _load(self) -> _DO:

@@ -9,6 +9,7 @@
 import copy
 import difflib
 import logging
+import pprint
 import re
 from typing import Any, Dict
 
@@ -106,7 +107,7 @@ def __init__(
         """Return a _FrozenDatasets instance from some datasets collections.
         Each collection could either be another _FrozenDatasets or a dictionary.
         """
-        self._original_names: set[str] = set()
+        self._original_names: dict[str, str] = {}
         for collection in datasets_collections:
             if isinstance(collection, _FrozenDatasets):
                 self.__dict__.update(collection.__dict__)
@@ -116,7 +117,7 @@ def __init__(
                 # for easy access to transcoded/prefixed datasets.
                 for dataset_name, dataset in collection.items():
                     self.__dict__[_sub_nonword_chars(dataset_name)] = dataset
-                    self._original_names.add(dataset_name)
+                    self._original_names[dataset_name] = ""
 
     # Don't allow users to add/change attributes on the fly
     def __setattr__(self, key: str, value: Any) -> None:
@@ -131,11 +132,20 @@ def __setattr__(self, key: str, value: Any) -> None:
         raise AttributeError(msg)
 
     def _ipython_key_completions_(self) -> list[str]:
-        return list(self._original_names)
+        return list(self._original_names.keys())
 
     def __getitem__(self, key: str) -> Any:
         return self.__dict__[_sub_nonword_chars(key)]
 
+    def __repr__(self) -> str:
+        datasets_repr = {}
+        for ds_name in self._original_names.keys():
+            datasets_repr[ds_name] = self.__dict__[
+                _sub_nonword_chars(ds_name)
+            ].__repr__()
+
+        return pprint.pformat(datasets_repr, sort_dicts=False)
+
 
 class DataCatalog:
     """``DataCatalog`` stores instances of ``AbstractDataset`` implementations
@@ -207,6 +217,9 @@ def __init__(  # noqa: PLR0913
         if feed_dict:
             self.add_feed_dict(feed_dict)
 
+    def __repr__(self) -> str:
+        return self.datasets.__repr__()
+
     @property
     def _logger(self) -> logging.Logger:
         return logging.getLogger(__name__)

diff --git a/tests/io/test_cached_dataset.py b/tests/io/test_cached_dataset.py
@@ -127,8 +127,10 @@ def test_pickle(self, cached_ds, caplog):
 
     def test_str(self):
         assert (
-            str(CachedDataset(MemoryDataset(42))) == "CachedDataset(cache={}, "
-            "dataset={'data': <int>})"
+            str(CachedDataset(MemoryDataset(42)))
+            == """kedro.io.cached_dataset.CachedDataset("""
+            """dataset="kedro.io.memory_dataset.MemoryDataset(data='<int>')", """
+            """cache='kedro.io.memory_dataset.MemoryDataset()')"""
         )
 
     def test_release(self, cached_ds):

diff --git a/tests/io/test_core.py b/tests/io/test_core.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import pprint
 import shutil
 from decimal import Decimal
 from fractions import Fraction
@@ -206,11 +207,18 @@ def dummy_data():
 class TestCoreFunctions:
     @pytest.mark.parametrize("var", [1, True] + FALSE_BUILTINS)
     def test_str_representation(self, var):
-        filepath = "."
-        assert str(MyDataset(var=var)) == f"MyDataset(filepath={filepath}, var={var})"
+        var_str = pprint.pformat(var)
+        filepath_str = pprint.pformat(PurePosixPath("."))
+        assert (
+            str(MyDataset(var=var))
+            == f"tests.io.test_core.MyDataset(filepath={filepath_str}, var={var_str})"
+        )
 
     def test_str_representation_none(self):
-        assert str(MyDataset()) == "MyDataset(filepath=.)"
+        filepath_str = pprint.pformat(PurePosixPath("."))
+        assert (
+            str(MyDataset()) == f"tests.io.test_core.MyDataset(filepath={filepath_str})"
+        )
 
     def test_get_filepath_str(self):
         path = get_filepath_str(PurePosixPath("example.com/test.csv"), "http")
@@ -334,7 +342,9 @@ def test_resolve_save_version(self, dummy_data):
 
     def test_no_versions(self, my_versioned_dataset):
         """Check the error if no versions are available for load."""
-        pattern = r"Did not find any versions for MyVersionedDataset\(.+\)"
+        pattern = (
+            r"Did not find any versions for tests.io.test_core.MyVersionedDataset\(.+\)"
+        )
         with pytest.raises(DatasetError, match=pattern):
             my_versioned_dataset.load()
 
@@ -369,7 +379,7 @@ def test_prevent_overwrite(self, my_versioned_dataset, dummy_data):
         corresponding json file for a given save version already exists."""
         my_versioned_dataset.save(dummy_data)
         pattern = (
-            r"Save path \'.+\' for MyVersionedDataset\(.+\) must "
+            r"Save path \'.+\' for tests.io.test_core.MyVersionedDataset\(.+\) must "
             r"not exist if versioning is enabled\."
         )
         with pytest.raises(DatasetError, match=pattern):
@@ -389,7 +399,7 @@ def test_save_version_warning(
         pattern = (
             f"Save version '{save_version}' did not match "
             f"load version '{load_version}' for "
-            r"MyVersionedDataset\(.+\)"
+            r"tests.io.test_core.MyVersionedDataset\(.+\)"
         )
         with pytest.warns(UserWarning, match=pattern):
             my_versioned_dataset.save(dummy_data)

diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py
@@ -267,7 +267,7 @@ def test_add_all_save_and_load(self, dataset, dummy_dataframe):
     def test_load_error(self, data_catalog):
         """Check the error when attempting to load a data set
         from nonexistent source"""
-        pattern = r"Failed while loading data from data set CSVDataset"
+        pattern = r"Failed while loading data from data set kedro_datasets.pandas.csv_dataset.CSVDataset"
         with pytest.raises(DatasetError, match=pattern):
             data_catalog.load("test")
 

diff --git a/tests/io/test_lambda_dataset.py b/tests/io/test_lambda_dataset.py
@@ -28,25 +28,28 @@ def _dummy_exists():
     def _dummy_release():
         pass  # pragma: no cover
 
-    assert "LambdaDataset(load=<tests.io.test_lambda_dataset._dummy_load>)" in str(
-        LambdaDataset(_dummy_load, None)
+    assert (
+        "kedro.io.lambda_dataset.LambdaDataset(load='<tests.io.test_lambda_dataset._dummy_load>')"
+        in str(LambdaDataset(_dummy_load, None))
     )
-    assert "LambdaDataset(save=<tests.io.test_lambda_dataset._dummy_save>)" in str(
-        LambdaDataset(None, _dummy_save)
+    assert (
+        "kedro.io.lambda_dataset.LambdaDataset(save='<tests.io.test_lambda_dataset._dummy_save>')"
+        in str(LambdaDataset(None, _dummy_save))
     )
-    assert "LambdaDataset(exists=<tests.io.test_lambda_dataset._dummy_exists>)" in str(
-        LambdaDataset(None, None, _dummy_exists)
+    assert (
+        "kedro.io.lambda_dataset.LambdaDataset(exists='<tests.io.test_lambda_dataset._dummy_exists>')"
+        in str(LambdaDataset(None, None, _dummy_exists))
     )
     assert (
-        "LambdaDataset(release=<tests.io.test_lambda_dataset._dummy_release>)"
+        "kedro.io.lambda_dataset.LambdaDataset(release='<tests.io.test_lambda_dataset._dummy_release>')"
         in str(LambdaDataset(None, None, None, _dummy_release))
     )
 
-    # __init__ keys alphabetically sorted, None values not shown
+    # __init__ keys remains in the provided order, None values not shown
     expected = (
-        "LambdaDataset(exists=<tests.io.test_lambda_dataset._dummy_exists>, "
-        "load=<tests.io.test_lambda_dataset._dummy_load>, "
-        "save=<tests.io.test_lambda_dataset._dummy_save>)"
+        "kedro.io.lambda_dataset.LambdaDataset(load='<tests.io.test_lambda_dataset._dummy_load>', "
+        "save='<tests.io.test_lambda_dataset._dummy_save>', "
+        "exists='<tests.io.test_lambda_dataset._dummy_exists>')"
     )
     actual = str(LambdaDataset(_dummy_load, _dummy_save, _dummy_exists, None))
     assert actual == expected
@@ -103,7 +106,7 @@ def test_save_raises_error(self, mocked_save, mocked_dataset):
         mocked_save.side_effect = FileExistsError(error_message)
 
         pattern = (
-            r"Failed while saving data to data set LambdaDataset\(.+\)\.\n"
+            r"Failed while saving data to data set kedro.io.lambda_dataset.LambdaDataset\(.+\)\.\n"
             + error_message
         )
         with pytest.raises(DatasetError, match=pattern):

diff --git a/tests/io/test_memory_dataset.py b/tests/io/test_memory_dataset.py
@@ -141,8 +141,14 @@ def test_saving_none(self):
     @pytest.mark.parametrize(
         "input_data,expected",
         [
-            ("dummy_dataframe", "MemoryDataset(data=<DataFrame>)"),
-            ("dummy_numpy_array", "MemoryDataset(data=<ndarray>)"),
+            (
+                "dummy_dataframe",
+                "kedro.io.memory_dataset.MemoryDataset(data='<DataFrame>')",
+            ),
+            (
+                "dummy_numpy_array",
+                "kedro.io.memory_dataset.MemoryDataset(data='<ndarray>')",
+            ),
         ],
         indirect=["input_data"],
     )