Skip to content

Commit

Permalink
fix: remove incorrect fix for pandas regression
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and kszucs committed Mar 30, 2022
1 parent f2459eb commit 339f544
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 11 deletions.
4 changes: 1 addition & 3 deletions ibis/backends/pandas/aggcontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,9 +596,7 @@ def agg(
indexed_by_ordering = frame[columns].copy()
# placeholder column to compute window_sizes below
indexed_by_ordering['_placeholder'] = 0
indexed_by_ordering = indexed_by_ordering.set_index(
order_by
).sort_index(kind="stable")
indexed_by_ordering = indexed_by_ordering.set_index(order_by)

# regroup if needed
if group_by:
Expand Down
6 changes: 6 additions & 0 deletions ibis/backends/pandas/tests/execution/test_timecontext.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
import pandas.testing as tm
import pytest
from packaging.version import parse as vparse

import ibis
import ibis.common.exceptions as com
Expand Down Expand Up @@ -242,6 +243,11 @@ def test_context_adjustment_multi_window(time_table, time_df3):
tm.assert_series_equal(result["v2"], expected_win_2)


@pytest.mark.xfail(
condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"),
raises=ValueError,
reason="https://github.com/pandas-dev/pandas/pull/44068",
)
def test_context_adjustment_window_groupby_id(time_table, time_df3):
"""This test case is meant to test trim_window_result method
in pandas/execution/window.py to see if it could trim Series
Expand Down
71 changes: 71 additions & 0 deletions ibis/backends/pandas/tests/execution/test_window.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import io
from datetime import date
from operator import methodcaller

import numpy as np
import pandas as pd
import pytest
from packaging.version import parse as vparse
from pandas import testing as tm

import ibis
Expand Down Expand Up @@ -546,6 +549,11 @@ def test_window_with_preceding_expr(index):
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(
condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"),
raises=ValueError,
reason="https://github.com/pandas-dev/pandas/pull/44068",
)
def test_window_with_mlb():
index = pd.date_range('20170501', '20170507')
data = np.random.randn(len(index), 3)
Expand Down Expand Up @@ -769,3 +777,66 @@ def count_complex(v):
tm.assert_series_equal(result_nan, expected, check_names=False)
tm.assert_series_equal(result_non_numeric, expected, check_names=False)
tm.assert_series_equal(result_nan_non_numeric, expected, check_names=False)


@pytest.fixture
def events():
df = pd.DataFrame(
{
"event_id": [1] * 4 + [2] * 6 + [3] * 2,
"measured_on": map(
pd.Timestamp,
map(
date,
[2021] * 12,
[6] * 4 + [5] * 6 + [7] * 2,
range(1, 13),
),
),
"measurement": np.nan,
}
)
df.at[1, "measurement"] = 5.0
df.at[4, "measurement"] = 42.0
df.at[5, "measurement"] = 42.0
df.at[7, "measurement"] = 11.0
return df


def test_bfill(events):
con = ibis.pandas.connect({"t": events})
t = con.table("t")

win = ibis.window(
group_by=t.event_id, order_by=ibis.desc(t.measured_on), following=0
)
grouped = t.mutate(grouper=t.measurement.count().over(win))

expr = (
grouped.group_by([grouped.event_id, grouped.grouper])
.mutate(bfill=grouped.measurement.max())
.sort_by("measured_on")
)
result = expr.execute().reset_index(drop=True)

expected_raw = """\
event_id measured_on measurement grouper bfill
2 2021-05-05 42.0 3 42.0
2 2021-05-06 42.0 2 42.0
2 2021-05-07 NaN 1 11.0
2 2021-05-08 11.0 1 11.0
2 2021-05-09 NaN 0 NaN
2 2021-05-10 NaN 0 NaN
1 2021-06-01 NaN 1 5.0
1 2021-06-02 5.0 1 5.0
1 2021-06-03 NaN 0 NaN
1 2021-06-04 NaN 0 NaN
3 2021-07-11 NaN 0 NaN
3 2021-07-12 NaN 0 NaN"""
expected = pd.read_csv(
io.StringIO(expected_raw),
sep=r"\s+",
header=0,
parse_dates=["measured_on"],
)
tm.assert_frame_equal(result, expected)
6 changes: 6 additions & 0 deletions ibis/backends/pandas/tests/test_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import pandas._testing as tm
import pytest
from packaging.version import parse as vparse

import ibis
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -256,6 +257,11 @@ def test_udaf_window(t2, df2):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(
condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"),
raises=ValueError,
reason="https://github.com/pandas-dev/pandas/pull/44068",
)
def test_udaf_window_interval():
df = pd.DataFrame(
collections.OrderedDict(
Expand Down
23 changes: 18 additions & 5 deletions ibis/backends/tests/test_timecontext.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
import pandas.testing as tm
import pytest
from pytest import param

import ibis
from ibis.config import option_context
Expand Down Expand Up @@ -42,11 +43,23 @@ def filter_by_time_context(df, context):
@pytest.mark.parametrize(
'window',
[
ibis.trailing_window(ibis.interval(days=3), order_by=ORDERBY_COL),
ibis.trailing_window(
ibis.interval(days=3),
order_by=ORDERBY_COL,
group_by=GROUPBY_COL,
param(
ibis.trailing_window(ibis.interval(days=3), order_by=ORDERBY_COL),
id="order_by",
),
param(
ibis.trailing_window(
ibis.interval(days=3),
order_by=ORDERBY_COL,
group_by=GROUPBY_COL,
),
id="order_by_group_by",
marks=[
pytest.mark.broken(
["pandas"],
reason="https://github.com/pandas-dev/pandas/pull/44068",
)
],
),
],
)
Expand Down
22 changes: 20 additions & 2 deletions ibis/tests/benchmarks/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
import pandas as pd
import pytest
from packaging.version import parse as vparse

import ibis
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -293,6 +294,13 @@ def high_card_grouped_rolling_udf_wm(t):
return my_wm(t.value, t.value).over(low_card_rolling_window(t))


broken_pandas_grouped_rolling = pytest.mark.xfail(
condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"),
raises=ValueError,
reason="https://github.com/pandas-dev/pandas/pull/44068",
)


@pytest.mark.benchmark(group="execution")
@pytest.mark.parametrize(
"expression_fn",
Expand All @@ -309,17 +317,25 @@ def high_card_grouped_rolling_udf_wm(t):
pytest.param(simple_sort_projection, id="simple_sort_projection"),
pytest.param(multikey_sort, id="multikey_sort"),
pytest.param(multikey_sort_projection, id="multikey_sort_projection"),
pytest.param(low_card_grouped_rolling, id="low_card_grouped_rolling"),
pytest.param(
high_card_grouped_rolling, id="high_card_grouped_rolling"
low_card_grouped_rolling,
id="low_card_grouped_rolling",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(
high_card_grouped_rolling,
id="high_card_grouped_rolling",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(
low_card_grouped_rolling_udf_mean,
id="low_card_grouped_rolling_udf_mean",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(
high_card_grouped_rolling_udf_mean,
id="high_card_grouped_rolling_udf_mean",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(
low_card_window_analytics_udf, id="low_card_window_analytics_udf"
Expand All @@ -330,10 +346,12 @@ def high_card_grouped_rolling_udf_wm(t):
pytest.param(
low_card_grouped_rolling_udf_wm,
id="low_card_grouped_rolling_udf_wm",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(
high_card_grouped_rolling_udf_wm,
id="high_card_grouped_rolling_udf_wm",
marks=[broken_pandas_grouped_rolling],
),
],
)
Expand Down
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ mkdocs-material = ">=8.2.1,<9"
mkdocs-table-reader-plugin = ">=1.0.0,<2"
mkdocstrings = ">=0.17.0,<0.18.0"
mypy = "0.942"
packaging = ">=21.3,<22"
pyarrow = ">=1,<8"
pydocstyle = ">=6.1.1,<7"
pymdown-extensions = ">=9.1,<10"
Expand Down

0 comments on commit 339f544

Please sign in to comment.