diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index df62ea3b7a0e..12b8a8a58096 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -1370,7 +1370,7 @@ def test_clip(backend, alltypes, df, ibis_func, pandas_func): backend.assert_series_equal(result, expected, check_names=False) -@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( ["druid"], raises=PyDruidProgrammingError, @@ -1382,6 +1382,15 @@ def test_histogram(con, alltypes): vc = hist.value_counts().sort_index() vc_np, _bin_edges = np.histogram(alltypes.int_col.execute(), bins=n) assert vc.tolist() == vc_np.tolist() + assert ( + con.execute( + ibis.memtable({"value": range(100)}) + .select(bin=_.value.histogram(10)) + .value_counts() + .bin_count.nunique() + ) + == 1 + ) @pytest.mark.parametrize("const", ["pi", "e"]) diff --git a/ibis/expr/types/numeric.py b/ibis/expr/types/numeric.py index c99c357c7470..23b1db0e2cdd 100644 --- a/ibis/expr/types/numeric.py +++ b/ibis/expr/types/numeric.py @@ -996,16 +996,19 @@ def histogram( f"Cannot pass both `nbins` (got {nbins}) and `binwidth` (got {binwidth})" ) - if binwidth is None or base is None: + if base is None: + base = self.min() - eps + + if binwidth is None: if nbins is None: raise ValueError("`nbins` is required if `binwidth` is not provided") - if base is None: - base = self.min() - eps - binwidth = (self.max() - base) / nbins - return ((self - base) / binwidth).floor() + if nbins is None: + nbins = ((self.max() - base) / binwidth).ceil() + + return ((self - base) / binwidth).floor().clip(-1, nbins - 1) @public