From 2738147ebc839dea0be14dbaea7396d826024aee Mon Sep 17 00:00:00 2001 From: Gert Hulselmans Date: Mon, 27 Mar 2023 17:31:47 +0200 Subject: [PATCH] feat(python): add `to_repr` methods to DataFrame and Series (#7802) --- .../source/reference/dataframe/export.rst | 1 + py-polars/docs/source/reference/functions.rst | 1 + .../docs/source/reference/series/export.rst | 1 + py-polars/polars/convert.py | 5 ++ py-polars/polars/dataframe/frame.py | 58 +++++++++++++++++++ py-polars/polars/series/series.py | 35 +++++++++++ py-polars/tests/unit/test_interop.py | 30 ++++++++++ 7 files changed, 131 insertions(+) diff --git a/py-polars/docs/source/reference/dataframe/export.rst b/py-polars/docs/source/reference/dataframe/export.rst index 5bd9ccbbd8b7..762b17cf4222 100644 --- a/py-polars/docs/source/reference/dataframe/export.rst +++ b/py-polars/docs/source/reference/dataframe/export.rst @@ -12,6 +12,7 @@ Export DataFrame data to other formats: DataFrame.to_arrow DataFrame.to_dict DataFrame.to_dicts + DataFrame.to_init_repr DataFrame.to_numpy DataFrame.to_pandas DataFrame.to_struct diff --git a/py-polars/docs/source/reference/functions.rst b/py-polars/docs/source/reference/functions.rst index 27a1abcd64c4..64e2a0994cf5 100644 --- a/py-polars/docs/source/reference/functions.rst +++ b/py-polars/docs/source/reference/functions.rst @@ -15,6 +15,7 @@ Conversion from_numpy from_pandas from_records + from_repr Eager/Lazy functions ~~~~~~~~~~~~~~~~~~~~ diff --git a/py-polars/docs/source/reference/series/export.rst b/py-polars/docs/source/reference/series/export.rst index 8ff6ab028e94..6e19c4efa4f7 100644 --- a/py-polars/docs/source/reference/series/export.rst +++ b/py-polars/docs/source/reference/series/export.rst @@ -13,3 +13,4 @@ Export Series data to other formats: Series.to_list Series.to_numpy Series.to_pandas + Series.to_init_repr diff --git a/py-polars/polars/convert.py b/py-polars/polars/convert.py index cca1a0b97c5c..4611da0830bc 100644 --- a/py-polars/polars/convert.py +++ b/py-polars/polars/convert.py @@ -273,6 +273,11 @@ def from_repr(tbl: str) -> DataFrame: Currently compound types such as List and Struct are not supported, (and neither is Time) though support is planned. + See Also + -------- + polars.DataFrame.to_init_repr + polars.Series.to_init_repr + Examples -------- >>> df = pl.from_repr( diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 6e7e922cd7da..5965417df5cd 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -2106,6 +2106,64 @@ def to_series(self, index: int = 0) -> Series: index = len(self.columns) + index return wrap_s(self._df.select_at_idx(index)) + def to_init_repr(self, n: int = 1000) -> str: + """ + Convert DataFrame to instantiatable string representation. + + Parameters + ---------- + n + Only use first n rows. + + See Also + -------- + polars.Series.to_init_repr + polars.from_repr + + Examples + -------- + >>> df = pl.DataFrame( + ... [ + ... pl.Series("foo", [1, 2, 3], dtype=pl.UInt8), + ... pl.Series("bar", [6.0, 7.0, 8.0], dtype=pl.Float32), + ... pl.Series("ham", ["a", "b", "c"], dtype=pl.Categorical), + ... ] + ... ) + >>> print(df.to_init_repr()) + pl.DataFrame( + [ + pl.Series("foo", [1, 2, 3], dtype=pl.UInt8), + pl.Series("bar", [6.0, 7.0, 8.0], dtype=pl.Float32), + pl.Series("ham", ['a', 'b', 'c'], dtype=pl.Categorical), + ] + ) + + >>> df_from_str_repr = eval(df.to_init_repr()) + >>> df_from_str_repr + shape: (3, 3) + ┌─────┬─────┬─────┐ + │ foo ┆ bar ┆ ham │ + │ --- ┆ --- ┆ --- │ + │ u8 ┆ f32 ┆ cat │ + ╞═════╪═════╪═════╡ + │ 1 ┆ 6.0 ┆ a │ + │ 2 ┆ 7.0 ┆ b │ + │ 3 ┆ 8.0 ┆ c │ + └─────┴─────┴─────┘ + + """ + output = StringIO() + output.write("pl.DataFrame(\n [\n") + + for i in range(self.width): + output.write(" ") + output.write(self.to_series(i).to_init_repr(n)) + output.write(",\n") + + output.write(" ]\n)\n") + + return output.getvalue() + @overload def write_json( self, diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 8c6f3fd47196..1ee411267a7f 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -3257,6 +3257,41 @@ def to_pandas( # noqa: D417 pd_series.name = self.name return pd_series + def to_init_repr(self, n: int = 1000) -> str: + """ + Convert Series to instantiatable string representation. + + Parameters + ---------- + n + Only use first n elements. + + See Also + -------- + polars.Series.to_init_repr + polars.from_repr + + Examples + -------- + >>> s = pl.Series("a", [1, 2, None, 4], dtype=pl.Int16) + >>> print(s.to_init_repr()) + pl.Series("a", [1, 2, None, 4], dtype=pl.Int16) + >>> s_from_str_repr = eval(s.to_init_repr()) + >>> s_from_str_repr + shape: (4,) + Series: 'a' [i16] + [ + 1 + 2 + null + 4 + ] + + """ + return ( + f'pl.Series("{self.name}", {self.head(n).to_list()}, dtype=pl.{self.dtype})' + ) + def set(self, filter: Series, value: int | float | str) -> Series: """ Set masked values. diff --git a/py-polars/tests/unit/test_interop.py b/py-polars/tests/unit/test_interop.py index f2f45f6bd578..0c104fd387ca 100644 --- a/py-polars/tests/unit/test_interop.py +++ b/py-polars/tests/unit/test_interop.py @@ -884,3 +884,33 @@ def test_from_repr() -> None: "ident": pl.Utf8, "timestamp": pl.Datetime("us", "Asia/Tokyo"), } + + +def test_to_init_repr() -> None: + # round-trip various types + with pl.StringCache(): + df = ( + pl.LazyFrame( + { + "a": [1, 2, None], + "b": [4.5, 5.5, 6.5], + "c": ["x", "y", "z"], + "d": [True, False, True], + "e": [None, "", None], + "f": [date(2022, 7, 5), date(2023, 2, 5), date(2023, 8, 5)], + "g": [time(0, 0, 0, 1), time(12, 30, 45), time(23, 59, 59, 999000)], + "h": [ + datetime(2022, 7, 5, 10, 30, 45, 4560), + datetime(2023, 10, 12, 20, 3, 8, 11), + None, + ], + }, + ) + .with_columns( + pl.col("c").cast(pl.Categorical), + pl.col("h").cast(pl.Datetime("ns")), + ) + .collect() + ) + + assert_frame_equal(eval(df.to_init_repr().replace("datetime.", "")), df)