From 1b23b6fda648ec864ecc7865b385ba525f5d6182 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 2 Nov 2012 20:22:39 -0400 Subject: [PATCH] BUG: make NDFrame.drop work with non-unique indexes. close #2101 --- pandas/core/generic.py | 24 +++++++++++++++++++----- pandas/tests/test_frame.py | 15 --------------- pandas/tests/test_multilevel.py | 21 +++++++++++++++++++++ 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 31c1c2a638376..cf2557d84e8fe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7,6 +7,7 @@ from pandas.tseries.index import DatetimeIndex from pandas.tseries.offsets import DateOffset import pandas.core.common as com +import pandas.lib as lib class PandasError(Exception): @@ -337,13 +338,26 @@ def drop(self, labels, axis=0, level=None): axis_name = self._get_axis_name(axis) axis = self._get_axis(axis) - if level is not None: - assert(isinstance(axis, MultiIndex)) - new_axis = axis.drop(labels, level=level) + if axis.is_unique: + if level is not None: + assert(isinstance(axis, MultiIndex)) + new_axis = axis.drop(labels, level=level) + else: + new_axis = axis.drop(labels) + + return self.reindex(**{axis_name: new_axis}) else: - new_axis = axis.drop(labels) + if level is not None: + assert(isinstance(axis, MultiIndex)) + indexer = -lib.ismember(axis.get_level_values(level), + set(labels)) + else: + indexer = -axis.isin(labels) - return self.reindex(**{axis_name: new_axis}) + slicer = [slice(None)] * self.ndim + slicer[self._get_axis_number(axis_name)] = indexer + + return self.ix[tuple(slicer)] def sort_index(self, axis=0, ascending=True): """ diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 2282ea6ad7b4d..f069a65a1ab12 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4366,21 +4366,6 @@ def test_drop_col_still_multiindex(self): del df[('a','','')] assert(isinstance(df.columns, MultiIndex)) - def test_drop_nonuq_multiindex(self): - df = DataFrame([["x-a", "x", "a", 1.5],["x-a", "x", "a", 1.2], - ["z-c", "z", "c", 3.1], ["x-a", "x", "a", 4.1], - ["x-b", "x", "b", 5.1],["x-b", "x", "b", 4.1], - ["x-b", "x", "b", 2.2], - ["y-a", "y", "a", 1.2],["z-b", "z", "b", 2.1]], - columns=["var1", "var2", "var3", "var4"]) - - grp_size = df.groupby("var1").size() - drop_idx = grp_size.ix[grp_size == 1] - - df = df.set_index(["var1", "var2", "var3"]) - - self.assertRaises(Exception, df.drop, drop_idx.index, level=0) - def test_fillna(self): self.tsframe['A'][:5] = nan self.tsframe['A'][-5:] = nan diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 34872ea572f81..900ba1137cd9e 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1326,6 +1326,27 @@ def test_mixed_depth_drop(self): ('routine2', 'result1', '')], axis=1) assert_frame_equal(expected, result) + def test_drop_nonunique(self): + df = DataFrame([["x-a", "x", "a", 1.5],["x-a", "x", "a", 1.2], + ["z-c", "z", "c", 3.1], ["x-a", "x", "a", 4.1], + ["x-b", "x", "b", 5.1],["x-b", "x", "b", 4.1], + ["x-b", "x", "b", 2.2], + ["y-a", "y", "a", 1.2],["z-b", "z", "b", 2.1]], + columns=["var1", "var2", "var3", "var4"]) + + grp_size = df.groupby("var1").size() + drop_idx = grp_size.ix[grp_size == 1] + + idf = df.set_index(["var1", "var2", "var3"]) + + # it works! #2101 + result = idf.drop(drop_idx.index, level=0).reset_index() + expected = df[-df.var1.isin(drop_idx.index)] + + result.index = expected.index + + assert_frame_equal(result, expected) + def test_mixed_depth_pop(self): arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'], [ '', 'OD', 'OD', 'result1', 'result2', 'result1'],