diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 117e7c9d11259..09217b9cb81be 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1006,6 +1006,7 @@ Reshaping - Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) - Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) - Bug in :func:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`) +- Bug in ``DataFrame.drop_duplicates`` where dropping with non-unique column names raised a ``ValueError`` (:issue:`17836`) Numeric ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 97943f153319b..0341c60670aa0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3556,7 +3556,8 @@ def f(vals): isinstance(subset, tuple) and subset in self.columns): subset = subset, - vals = (self[col].values for col in subset) + vals = (col.values for name, col in self.iteritems() + if name in subset) labels, shape = map(list, zip(*map(f, vals))) ids = get_group_index(labels, shape, sort=False, xnull=False) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index c36b5957a4283..1bac4037e99c9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1394,6 +1394,21 @@ def test_drop_duplicates(self): for keep in ['first', 'last', False]: assert df.duplicated(keep=keep).sum() == 0 + def test_drop_duplicates_with_duplicate_column_names(self): + # GH17836 + df = DataFrame([ + [1, 2, 5], + [3, 4, 6], + [3, 4, 7] + ], columns=['a', 'a', 'b']) + + result0 = df.drop_duplicates() + tm.assert_frame_equal(result0, df) + + result1 = df.drop_duplicates('a') + expected1 = df[:2] + tm.assert_frame_equal(result1, expected1) + def test_drop_duplicates_for_take_all(self): df = DataFrame({'AAA': ['foo', 'bar', 'baz', 'bar', 'foo', 'bar', 'qux', 'foo'],