Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(types): fix histogram bin allocation #9711

Merged
merged 7 commits into from
Jul 30, 2024
11 changes: 10 additions & 1 deletion ibis/backends/tests/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1370,7 +1370,7 @@ def test_clip(backend, alltypes, df, ibis_func, pandas_func):
backend.assert_series_equal(result, expected, check_names=False)


@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError)
@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError)
@pytest.mark.notyet(
["druid"],
raises=PyDruidProgrammingError,
Expand All @@ -1382,6 +1382,15 @@ def test_histogram(con, alltypes):
vc = hist.value_counts().sort_index()
vc_np, _bin_edges = np.histogram(alltypes.int_col.execute(), bins=n)
assert vc.tolist() == vc_np.tolist()
assert (
con.execute(
ibis.memtable({"value": range(100)})
.select(bin=_.value.histogram(10))
.value_counts()
.bin_count.nunique()
)
== 1
)


@pytest.mark.parametrize("const", ["pi", "e"])
Expand Down
13 changes: 8 additions & 5 deletions ibis/expr/types/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,16 +996,19 @@
f"Cannot pass both `nbins` (got {nbins}) and `binwidth` (got {binwidth})"
)

if binwidth is None or base is None:
if base is None:
base = self.min() - eps

if binwidth is None:
if nbins is None:
raise ValueError("`nbins` is required if `binwidth` is not provided")

if base is None:
base = self.min() - eps

binwidth = (self.max() - base) / nbins

return ((self - base) / binwidth).floor()
if nbins is None:
nbins = ((self.max() - base) / binwidth).ceil()

Check warning on line 1009 in ibis/expr/types/numeric.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/types/numeric.py#L1009

Added line #L1009 was not covered by tests

return ((self - base) / binwidth).floor().clip(-1, nbins - 1)


@public
Expand Down