diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 5947bf6156fd..2ce65ecbb6c8 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -1899,11 +1899,21 @@ def describe( ----- The median is included by default as the 50% percentile. + The mean for boolean series is the ratio of true values + to the total non-null values. + + Returns ------- DataFrame Mapping with summary statistics of a Series. + Warnings + -------- + We will never guarantee the output of describe to be stable. + It will show statistics that we deem informative and may + be updated in the future. + Examples -------- >>> s = pl.Series([1, 2, 3, 4, 5]) @@ -1925,6 +1935,20 @@ def describe( │ max ┆ 5.0 │ └────────────┴──────────┘ + >>> s = pl.Series([True, False, True, None, True]) + >>> s.describe() + shape: (4, 2) + ┌────────────┬───────┐ + │ statistic ┆ value │ + │ --- ┆ --- │ + │ str ┆ f64 │ + ╞════════════╪═══════╡ + │ count ┆ 4.0 │ + │ null_count ┆ 1.0 │ + │ sum ┆ 3.0 │ + │ mean ┆ 0.75 │ + └────────────┴───────┘ + Non-numeric data types may not have all statistics available. >>> s = pl.Series(["a", "a", None, "b", "c"]) @@ -1957,11 +1981,12 @@ def describe( stats["max"] = self.max() elif self.dtype == Boolean: - stats_dtype = Int64 + stats_dtype = Float64 stats = { "count": self.count(), "null_count": self.null_count(), "sum": self.sum(), + "mean": self.mean(), } elif self.dtype == String: stats_dtype = Int64 diff --git a/py-polars/tests/unit/series/test_describe.py b/py-polars/tests/unit/series/test_describe.py index 1cd20ffe6825..c8a49525a7b6 100644 --- a/py-polars/tests/unit/series/test_describe.py +++ b/py-polars/tests/unit/series/test_describe.py @@ -61,11 +61,7 @@ def test_series_describe_boolean() -> None: s = pl.Series([True, False, None, True, True]) result = s.describe() - stats = { - "count": 4, - "null_count": 1, - "sum": 3, - } + stats = {"count": 4, "null_count": 1, "sum": 3, "mean": 0.75} expected = pl.DataFrame({"statistic": stats.keys(), "value": stats.values()}) assert_frame_equal(expected, result)