From 68573d4f4615963556e05271c45dd400020d7da2 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Thu, 21 Nov 2019 01:29:07 +0000 Subject: [PATCH 1/5] add test for ffill for non unique multilevel --- pandas/tests/test_multilevel.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index f0928820367e9..923eb89109f0c 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1989,6 +1989,35 @@ def test_repeat(self): m_df = Series(data, index=m_idx) assert m_df.repeat(3).shape == (3 * len(data),) + def test_ffill_non_unique_multilevel(self): + # GH 19437 + date = pd.to_datetime( + [ + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-02", + "2018-01-01", + "2018-01-02", + ] + ) + symbol = ["MSFT", "MSFT", "MSFT", "AAPL", "AAPL", "AAPL", "TSLA", "TSLA"] + status = ["shrt", "lng", np.nan, "shrt", np.nan, "shrt", "ntrl", np.nan] + + df = DataFrame({"date": date, "symbol": symbol, "status": status}) + df = df.set_index(["date", "symbol"]) + result = df.groupby("symbol")["status"].ffill() + + index = MultiIndex.from_tuples( + tuples=list(zip(*[date, symbol])), names=["date", "symbol"] + ) + status = ["shrt", "lng", "lng", "shrt", "shrt", "shrt", "ntrl", "ntrl"] + expected = Series(status, index=index, name="status") + + tm.assert_series_equal(result, expected) + def test_subsets_multiindex_dtype(self): # GH 20757 data = [["x", 1]] From bbc853473ec42c8d42664b9247f583bcd0655b17 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Thu, 21 Nov 2019 19:22:38 +0000 Subject: [PATCH 2/5] move test to tests/groupby/test_transform.py --- pandas/tests/groupby/test_transform.py | 30 ++++++++++++++++++++++++++ pandas/tests/test_multilevel.py | 29 ------------------------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 3d9a349d94e10..6cc460e4c5636 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -911,6 +911,36 @@ def test_pct_change(test_series, freq, periods, fill_method, limit): tm.assert_frame_equal(result, expected.to_frame("vals")) +def test_ffill_non_unique_multilevel(self): + # GH 19437 + date = pd.to_datetime( + [ + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-02", + "2018-01-01", + "2018-01-02", + ] + ) + symbol = ["MSFT", "MSFT", "MSFT", "AAPL", "AAPL", "AAPL", "TSLA", "TSLA"] + status = ["shrt", "lng", np.nan, "shrt", np.nan, "shrt", "ntrl", np.nan] + + df = DataFrame({"date": date, "symbol": symbol, "status": status}) + df = df.set_index(["date", "symbol"]) + result = df.groupby("symbol")["status"].ffill() + + index = MultiIndex.from_tuples( + tuples=list(zip(*[date, symbol])), names=["date", "symbol"] + ) + status = ["shrt", "lng", "lng", "shrt", "shrt", "shrt", "ntrl", "ntrl"] + expected = Series(status, index=index, name="status") + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("func", [np.any, np.all]) def test_any_all_np_func(func): # GH 20653 diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 923eb89109f0c..f0928820367e9 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1989,35 +1989,6 @@ def test_repeat(self): m_df = Series(data, index=m_idx) assert m_df.repeat(3).shape == (3 * len(data),) - def test_ffill_non_unique_multilevel(self): - # GH 19437 - date = pd.to_datetime( - [ - "2018-01-01", - "2018-01-01", - "2018-01-01", - "2018-01-01", - "2018-01-01", - "2018-01-02", - "2018-01-01", - "2018-01-02", - ] - ) - symbol = ["MSFT", "MSFT", "MSFT", "AAPL", "AAPL", "AAPL", "TSLA", "TSLA"] - status = ["shrt", "lng", np.nan, "shrt", np.nan, "shrt", "ntrl", np.nan] - - df = DataFrame({"date": date, "symbol": symbol, "status": status}) - df = df.set_index(["date", "symbol"]) - result = df.groupby("symbol")["status"].ffill() - - index = MultiIndex.from_tuples( - tuples=list(zip(*[date, symbol])), names=["date", "symbol"] - ) - status = ["shrt", "lng", "lng", "shrt", "shrt", "shrt", "ntrl", "ntrl"] - expected = Series(status, index=index, name="status") - - tm.assert_series_equal(result, expected) - def test_subsets_multiindex_dtype(self): # GH 20757 data = [["x", 1]] From 9040a76fde17f8fbd6a2607658235f9aa09f96f9 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Thu, 21 Nov 2019 19:24:41 +0000 Subject: [PATCH 3/5] move test to tests/groupby/test_transform.py --- pandas/tests/groupby/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 6cc460e4c5636..cec862169ca2b 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -911,7 +911,7 @@ def test_pct_change(test_series, freq, periods, fill_method, limit): tm.assert_frame_equal(result, expected.to_frame("vals")) -def test_ffill_non_unique_multilevel(self): +def test_ffill_non_unique_multilevel(): # GH 19437 date = pd.to_datetime( [ From 56a3c0babff3b5f657505a3c62b4a7cf14b1b8d4 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Fri, 22 Nov 2019 21:50:30 +0000 Subject: [PATCH 4/5] parametrize over ffill, bfill --- pandas/tests/groupby/test_transform.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index cec862169ca2b..9c2836dd61317 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -911,7 +911,14 @@ def test_pct_change(test_series, freq, periods, fill_method, limit): tm.assert_frame_equal(result, expected.to_frame("vals")) -def test_ffill_non_unique_multilevel(): +@pytest.mark.parametrize( + "func, expected_status", + [ + ("ffill", ["shrt", "shrt", "lng", np.nan, "shrt", "ntrl", "ntrl"]), + ("bfill", ["shrt", "lng", "lng", "shrt", "shrt", "ntrl", np.nan]), + ], +) +def test_ffill_non_unique_multilevel(func, expected_status): # GH 19437 date = pd.to_datetime( [ @@ -919,24 +926,22 @@ def test_ffill_non_unique_multilevel(): "2018-01-01", "2018-01-01", "2018-01-01", - "2018-01-01", "2018-01-02", "2018-01-01", "2018-01-02", ] ) - symbol = ["MSFT", "MSFT", "MSFT", "AAPL", "AAPL", "AAPL", "TSLA", "TSLA"] - status = ["shrt", "lng", np.nan, "shrt", np.nan, "shrt", "ntrl", np.nan] + symbol = ["MSFT", "MSFT", "MSFT", "AAPL", "AAPL", "TSLA", "TSLA"] + status = ["shrt", np.nan, "lng", np.nan, "shrt", "ntrl", np.nan] df = DataFrame({"date": date, "symbol": symbol, "status": status}) df = df.set_index(["date", "symbol"]) - result = df.groupby("symbol")["status"].ffill() + result = getattr(df.groupby("symbol")["status"], func)() index = MultiIndex.from_tuples( tuples=list(zip(*[date, symbol])), names=["date", "symbol"] ) - status = ["shrt", "lng", "lng", "shrt", "shrt", "shrt", "ntrl", "ntrl"] - expected = Series(status, index=index, name="status") + expected = Series(expected_status, index=index, name="status") tm.assert_series_equal(result, expected) From f48ac9f3df1ed437cd3ca854afe970ee93169cb3 Mon Sep 17 00:00:00 2001 From: ganevgv Date: Fri, 22 Nov 2019 21:51:17 +0000 Subject: [PATCH 5/5] change name test --- pandas/tests/groupby/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 9c2836dd61317..c46180c1d11cd 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -918,7 +918,7 @@ def test_pct_change(test_series, freq, periods, fill_method, limit): ("bfill", ["shrt", "lng", "lng", "shrt", "shrt", "ntrl", np.nan]), ], ) -def test_ffill_non_unique_multilevel(func, expected_status): +def test_ffill_bfill_non_unique_multilevel(func, expected_status): # GH 19437 date = pd.to_datetime( [