Skip to content

Commit

Permalink
BUG: make NDFrame.drop work with non-unique indexes. close #2101
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Nov 3, 2012
1 parent d823bce commit 1b23b6f
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 20 deletions.
24 changes: 19 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pandas.tseries.index import DatetimeIndex
from pandas.tseries.offsets import DateOffset
import pandas.core.common as com
import pandas.lib as lib


class PandasError(Exception):
Expand Down Expand Up @@ -337,13 +338,26 @@ def drop(self, labels, axis=0, level=None):
axis_name = self._get_axis_name(axis)
axis = self._get_axis(axis)

if level is not None:
assert(isinstance(axis, MultiIndex))
new_axis = axis.drop(labels, level=level)
if axis.is_unique:
if level is not None:
assert(isinstance(axis, MultiIndex))
new_axis = axis.drop(labels, level=level)
else:
new_axis = axis.drop(labels)

return self.reindex(**{axis_name: new_axis})
else:
new_axis = axis.drop(labels)
if level is not None:
assert(isinstance(axis, MultiIndex))
indexer = -lib.ismember(axis.get_level_values(level),
set(labels))
else:
indexer = -axis.isin(labels)

return self.reindex(**{axis_name: new_axis})
slicer = [slice(None)] * self.ndim
slicer[self._get_axis_number(axis_name)] = indexer

return self.ix[tuple(slicer)]

def sort_index(self, axis=0, ascending=True):
"""
Expand Down
15 changes: 0 additions & 15 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4366,21 +4366,6 @@ def test_drop_col_still_multiindex(self):
del df[('a','','')]
assert(isinstance(df.columns, MultiIndex))

def test_drop_nonuq_multiindex(self):
df = DataFrame([["x-a", "x", "a", 1.5],["x-a", "x", "a", 1.2],
["z-c", "z", "c", 3.1], ["x-a", "x", "a", 4.1],
["x-b", "x", "b", 5.1],["x-b", "x", "b", 4.1],
["x-b", "x", "b", 2.2],
["y-a", "y", "a", 1.2],["z-b", "z", "b", 2.1]],
columns=["var1", "var2", "var3", "var4"])

grp_size = df.groupby("var1").size()
drop_idx = grp_size.ix[grp_size == 1]

df = df.set_index(["var1", "var2", "var3"])

self.assertRaises(Exception, df.drop, drop_idx.index, level=0)

def test_fillna(self):
self.tsframe['A'][:5] = nan
self.tsframe['A'][-5:] = nan
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1326,6 +1326,27 @@ def test_mixed_depth_drop(self):
('routine2', 'result1', '')], axis=1)
assert_frame_equal(expected, result)

def test_drop_nonunique(self):
df = DataFrame([["x-a", "x", "a", 1.5],["x-a", "x", "a", 1.2],
["z-c", "z", "c", 3.1], ["x-a", "x", "a", 4.1],
["x-b", "x", "b", 5.1],["x-b", "x", "b", 4.1],
["x-b", "x", "b", 2.2],
["y-a", "y", "a", 1.2],["z-b", "z", "b", 2.1]],
columns=["var1", "var2", "var3", "var4"])

grp_size = df.groupby("var1").size()
drop_idx = grp_size.ix[grp_size == 1]

idf = df.set_index(["var1", "var2", "var3"])

# it works! #2101
result = idf.drop(drop_idx.index, level=0).reset_index()
expected = df[-df.var1.isin(drop_idx.index)]

result.index = expected.index

assert_frame_equal(result, expected)

def test_mixed_depth_pop(self):
arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
[ '', 'OD', 'OD', 'result1', 'result2', 'result1'],
Expand Down

0 comments on commit 1b23b6f

Please sign in to comment.