Skip to content

Commit

Permalink
fix: Improve binning in Series.hist with bin_count when all value…
Browse files Browse the repository at this point in the history
…s are the same (#20034)
  • Loading branch information
mcrumiller authored Nov 28, 2024
1 parent 0b218d8 commit 83b5042
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
10 changes: 8 additions & 2 deletions crates/polars-ops/src/chunked_array/hist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,14 @@ where
// Determine outer bin edges from the data itself
let min_value = ca.min().unwrap().to_f64().unwrap();
let max_value = ca.max().unwrap().to_f64().unwrap();
pad_lower = true;
(min_value, (max_value - min_value) / bin_count as f64)

// All data points are identical--use unit interval.
if min_value == max_value {
(min_value - 0.5, 1.0 / bin_count as f64)
} else {
pad_lower = true;
(min_value, (max_value - min_value) / bin_count as f64)
}
};
let out = (0..bin_count + 1)
.map(|x| (x as f64 * width) + offset)
Expand Down
14 changes: 14 additions & 0 deletions py-polars/tests/unit/operations/test_hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,8 @@ def test_hist_all_null() -> None:
def test_hist_rand(n_values: int, n_null: int) -> None:
s_rand = pl.Series([None] * n_null, dtype=pl.Int64)
s_values = pl.Series(np.random.randint(0, 100, n_values), dtype=pl.Int64)
if s_values.n_unique() == 1:
pytest.skip("Identical values not tested.")
s = pl.concat((s_rand, s_values))
out = s.hist(bin_count=10)

Expand Down Expand Up @@ -424,3 +426,15 @@ def test_hist_max_boundary_19998() -> None:
)
result = s.hist(bin_count=50)
assert result["count"].sum() == 4


def test_hist_same_values_20030() -> None:
out = pl.Series([1, 1]).hist(bin_count=2)
expected = pl.DataFrame(
{
"breakpoint": pl.Series([1.0, 1.5], dtype=pl.Float64),
"category": pl.Series(["(0.5, 1.0]", "(1.0, 1.5]"], dtype=pl.Categorical),
"count": pl.Series([2, 0], dtype=pl.get_index_type()),
}
)
assert_frame_equal(out, expected)

0 comments on commit 83b5042

Please sign in to comment.