Skip to content

Commit

Permalink
feat(python): Add median stat to Series.describe (#8118)
Browse files Browse the repository at this point in the history
  • Loading branch information
zundertj authored Apr 10, 2023
1 parent 5ad0832 commit b2f7eb8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 15 deletions.
33 changes: 18 additions & 15 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,18 +1157,19 @@ def describe(self) -> DataFrame:
--------
>>> series_num = pl.Series([1, 2, 3, 4, 5])
>>> series_num.describe()
shape: (6, 2)
shape: (7, 2)
┌────────────┬──────────┐
│ statistic ┆ value │
│ --- ┆ --- │
│ str ┆ f64 │
╞════════════╪══════════╡
│ min ┆ 1.0 │
│ max ┆ 5.0 │
│ count ┆ 5.0 │
│ null_count ┆ 0.0 │
│ mean ┆ 3.0 │
│ std ┆ 1.581139 │
│ count ┆ 5.0 │
│ min ┆ 1.0 │
│ max ┆ 5.0 │
│ median ┆ 3.0 │
└────────────┴──────────┘
>>> series_str = pl.Series(["a", "a", None, "b", "c"])
Expand All @@ -1179,9 +1180,9 @@ def describe(self) -> DataFrame:
│ --- ┆ --- │
│ str ┆ i64 │
╞════════════╪═══════╡
│ unique ┆ 4 │
│ null_count ┆ 1 │
│ count ┆ 5 │
│ null_count ┆ 1 │
│ unique ┆ 4 │
└────────────┴───────┘
"""
Expand All @@ -1192,33 +1193,35 @@ def describe(self) -> DataFrame:
elif self.is_numeric():
s = self.cast(Float64)
stats = {
"min": s.min(),
"max": s.max(),
"count": s.len(),
"null_count": s.null_count(),
"mean": s.mean(),
"std": s.std(),
"count": s.len(),
"min": s.min(),
"max": s.max(),
"median": s.median(),
}
elif self.is_boolean():
stats = {
"sum": self.sum(),
"null_count": self.null_count(),
"count": self.len(),
"null_count": self.null_count(),
"sum": self.sum(),
}
elif self.is_utf8():
stats = {
"unique": len(self.unique()),
"null_count": self.null_count(),
"count": self.len(),
"null_count": self.null_count(),
"unique": len(self.unique()),
}
elif self.is_temporal():
# we coerce all to string, because a polars column
# only has a single dtype and dates: datetime and count: int don't match
stats = {
"count": str(self.len()),
"null_count": str(self.null_count()),
"min": str(self.dt.min()),
"max": str(self.dt.max()),
"null_count": str(self.null_count()),
"count": str(self.len()),
"median": str(self.dt.median()),
}
else:
raise TypeError("This type is not supported")
Expand Down
3 changes: 3 additions & 0 deletions py-polars/tests/unit/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,7 @@ def test_describe() -> None:
"min": 1.0,
"null_count": 0.0,
"std": 1.0,
"median": 2.0,
}
assert dict(float_s.describe().rows()) == { # type: ignore[arg-type]
"count": 3.0,
Expand All @@ -1138,6 +1139,7 @@ def test_describe() -> None:
"min": 1.3,
"null_count": 0.0,
"std": 3.8109491381194442,
"median": 4.6,
}
assert dict(str_s.describe().rows()) == { # type: ignore[arg-type]
"count": 3,
Expand All @@ -1153,6 +1155,7 @@ def test_describe() -> None:
"count": "3",
"max": "2021-01-03",
"min": "2021-01-01",
"median": "2021-01-02",
"null_count": "0",
}

Expand Down

0 comments on commit b2f7eb8

Please sign in to comment.