Skip to content

Commit

Permalink
DEPR: some removals
Browse files Browse the repository at this point in the history
DEPR: Removal of cols keyword in favor of subset in
DataFrame.duplicated() and DataFrame.drop_duplicates(), xref #6680

Author: Jeff Reback <jeff@reback.net>

Closes #12165 from jreback/deprecate and squashes the following commits:

5be6dc6 [Jeff Reback] DOC: small fix on Timestamp doc-string
e3579a5 [Jeff Reback] DEPR: Removal of cols keyword in favor of subset in DataFrame.duplicated() and DataFrame.drop_duplicates(), xref #6680
  • Loading branch information
jreback committed Jan 28, 2016
1 parent 9bc8243 commit bb94ebe
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 65 deletions.
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ Removal of prior version deprecations/changes
- Removal of ``rolling_corr_pairwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`)
- Removal of ``expanding_corr_pairwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`)
- Removal of ``DataMatrix`` module. This was not imported into the pandas namespace in any event (:issue:`12111`)

- Removal of ``cols`` keyword in favor of ``subset`` in ``DataFrame.duplicated()`` and ``DataFrame.drop_duplicates()`` (:issue:`6680`)


.. _whatsnew_0180.performance:
Expand Down Expand Up @@ -544,4 +544,4 @@ Bug Fixes

- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)

- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
4 changes: 0 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3012,7 +3012,6 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,

@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
False: 'first'})
@deprecate_kwarg(old_arg_name='cols', new_arg_name='subset', stacklevel=3)
def drop_duplicates(self, subset=None, keep='first', inplace=False):
"""
Return DataFrame with duplicate rows removed, optionally only
Expand All @@ -3030,7 +3029,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False):
take_last : deprecated
inplace : boolean, default False
Whether to drop duplicates in place or to return a copy
cols : kwargs only argument of subset [deprecated]
Returns
-------
Expand All @@ -3047,7 +3045,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False):

@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
False: 'first'})
@deprecate_kwarg(old_arg_name='cols', new_arg_name='subset', stacklevel=3)
def duplicated(self, subset=None, keep='first'):
"""
Return boolean Series denoting duplicate rows, optionally only
Expand All @@ -3065,7 +3062,6 @@ def duplicated(self, subset=None, keep='first'):
last occurrence.
- False : Mark all duplicates as ``True``.
take_last : deprecated
cols : kwargs only argument of subset [deprecated]
Returns
-------
Expand Down
57 changes: 0 additions & 57 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1670,40 +1670,6 @@ def test_drop_duplicates_for_take_all(self):
expected = df.iloc[[0, 1, 2, 6]]
assert_frame_equal(result, expected)

def test_drop_duplicates_deprecated_warning(self):
df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'bar', 'foo'],
'B': ['one', 'one', 'two', 'two',
'two', 'two', 'one', 'two'],
'C': [1, 1, 2, 2, 2, 2, 1, 2],
'D': lrange(8)})
expected = df[:2]

# Raises warning
with tm.assert_produces_warning(False):
result = df.drop_duplicates(subset='AAA')
assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
result = df.drop_duplicates(cols='AAA')
assert_frame_equal(result, expected)

# Does not allow both subset and cols
self.assertRaises(TypeError, df.drop_duplicates,
kwargs={'cols': 'AAA', 'subset': 'B'})

# Does not allow unknown kwargs
self.assertRaises(TypeError, df.drop_duplicates,
kwargs={'subset': 'AAA', 'bad_arg': True})

# deprecate take_last
# Raises warning
with tm.assert_produces_warning(FutureWarning):
result = df.drop_duplicates(take_last=False, subset='AAA')
assert_frame_equal(result, expected)

self.assertRaises(ValueError, df.drop_duplicates, keep='invalid_name')

def test_drop_duplicates_tuple(self):
df = DataFrame({('AA', 'AB'): ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'bar', 'foo'],
Expand Down Expand Up @@ -1960,29 +1926,6 @@ def test_drop_duplicates_inplace(self):
result = df2
assert_frame_equal(result, expected)

def test_duplicated_deprecated_warning(self):
df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'bar', 'foo'],
'B': ['one', 'one', 'two', 'two',
'two', 'two', 'one', 'two'],
'C': [1, 1, 2, 2, 2, 2, 1, 2],
'D': lrange(8)})

# Raises warning
with tm.assert_produces_warning(False):
result = df.duplicated(subset='AAA')

with tm.assert_produces_warning(FutureWarning):
result = df.duplicated(cols='AAA') # noqa

# Does not allow both subset and cols
self.assertRaises(TypeError, df.duplicated,
kwargs={'cols': 'AAA', 'subset': 'B'})

# Does not allow unknown kwargs
self.assertRaises(TypeError, df.duplicated,
kwargs={'subset': 'AAA', 'bad_arg': True})

# Rounding

def test_round(self):
Expand Down
2 changes: 0 additions & 2 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,6 @@ class Timestamp(_Timestamp):
Offset which Timestamp will have
tz : string, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time which Timestamp will have.
unit : string
numpy unit used for conversion, if ts_input is int or float
"""

# Do not add ``dayfirst`` and ``yearfist`` to Timestamp based on the discussion
Expand Down

0 comments on commit bb94ebe

Please sign in to comment.