Skip to content

Commit

Permalink
feat(python): add to_repr methods to DataFrame and Series (#7802)
Browse files Browse the repository at this point in the history
  • Loading branch information
ghuls authored Mar 27, 2023
1 parent cad87d9 commit c2cb649
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 0 deletions.
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe/export.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Export DataFrame data to other formats:
DataFrame.to_arrow
DataFrame.to_dict
DataFrame.to_dicts
DataFrame.to_init_repr
DataFrame.to_numpy
DataFrame.to_pandas
DataFrame.to_struct
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Conversion
from_numpy
from_pandas
from_records
from_repr

Eager/Lazy functions
~~~~~~~~~~~~~~~~~~~~
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/export.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ Export Series data to other formats:
Series.to_list
Series.to_numpy
Series.to_pandas
Series.to_init_repr
5 changes: 5 additions & 0 deletions py-polars/polars/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,11 @@ def from_repr(tbl: str) -> DataFrame:
Currently compound types such as List and Struct are not supported,
(and neither is Time) though support is planned.
See Also
--------
polars.DataFrame.to_init_repr
polars.Series.to_init_repr
Examples
--------
>>> df = pl.from_repr(
Expand Down
58 changes: 58 additions & 0 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2106,6 +2106,64 @@ def to_series(self, index: int = 0) -> Series:
index = len(self.columns) + index
return wrap_s(self._df.select_at_idx(index))

def to_init_repr(self, n: int = 1000) -> str:
"""
Convert DataFrame to instantiatable string representation.
Parameters
----------
n
Only use first n rows.
See Also
--------
polars.Series.to_init_repr
polars.from_repr
Examples
--------
>>> df = pl.DataFrame(
... [
... pl.Series("foo", [1, 2, 3], dtype=pl.UInt8),
... pl.Series("bar", [6.0, 7.0, 8.0], dtype=pl.Float32),
... pl.Series("ham", ["a", "b", "c"], dtype=pl.Categorical),
... ]
... )
>>> print(df.to_init_repr())
pl.DataFrame(
[
pl.Series("foo", [1, 2, 3], dtype=pl.UInt8),
pl.Series("bar", [6.0, 7.0, 8.0], dtype=pl.Float32),
pl.Series("ham", ['a', 'b', 'c'], dtype=pl.Categorical),
]
)
>>> df_from_str_repr = eval(df.to_init_repr())
>>> df_from_str_repr
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ u8 ┆ f32 ┆ cat │
╞═════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
│ 2 ┆ 7.0 ┆ b │
│ 3 ┆ 8.0 ┆ c │
└─────┴─────┴─────┘
"""
output = StringIO()
output.write("pl.DataFrame(\n [\n")

for i in range(self.width):
output.write(" ")
output.write(self.to_series(i).to_init_repr(n))
output.write(",\n")

output.write(" ]\n)\n")

return output.getvalue()

@overload
def write_json(
self,
Expand Down
35 changes: 35 additions & 0 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3257,6 +3257,41 @@ def to_pandas( # noqa: D417
pd_series.name = self.name
return pd_series

def to_init_repr(self, n: int = 1000) -> str:
"""
Convert Series to instantiatable string representation.
Parameters
----------
n
Only use first n elements.
See Also
--------
polars.Series.to_init_repr
polars.from_repr
Examples
--------
>>> s = pl.Series("a", [1, 2, None, 4], dtype=pl.Int16)
>>> print(s.to_init_repr())
pl.Series("a", [1, 2, None, 4], dtype=pl.Int16)
>>> s_from_str_repr = eval(s.to_init_repr())
>>> s_from_str_repr
shape: (4,)
Series: 'a' [i16]
[
1
2
null
4
]
"""
return (
f'pl.Series("{self.name}", {self.head(n).to_list()}, dtype=pl.{self.dtype})'
)

def set(self, filter: Series, value: int | float | str) -> Series:
"""
Set masked values.
Expand Down
30 changes: 30 additions & 0 deletions py-polars/tests/unit/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,3 +884,33 @@ def test_from_repr() -> None:
"ident": pl.Utf8,
"timestamp": pl.Datetime("us", "Asia/Tokyo"),
}


def test_to_init_repr() -> None:
# round-trip various types
with pl.StringCache():
df = (
pl.LazyFrame(
{
"a": [1, 2, None],
"b": [4.5, 5.5, 6.5],
"c": ["x", "y", "z"],
"d": [True, False, True],
"e": [None, "", None],
"f": [date(2022, 7, 5), date(2023, 2, 5), date(2023, 8, 5)],
"g": [time(0, 0, 0, 1), time(12, 30, 45), time(23, 59, 59, 999000)],
"h": [
datetime(2022, 7, 5, 10, 30, 45, 4560),
datetime(2023, 10, 12, 20, 3, 8, 11),
None,
],
},
)
.with_columns(
pl.col("c").cast(pl.Categorical),
pl.col("h").cast(pl.Datetime("ns")),
)
.collect()
)

assert_frame_equal(eval(df.to_init_repr().replace("datetime.", "")), df)

0 comments on commit c2cb649

Please sign in to comment.