From ebd40aae0196cec4b6c7f2364655f7fc55024cc0 Mon Sep 17 00:00:00 2001 From: jmholzer Date: Sun, 16 May 2021 02:32:25 +0200 Subject: [PATCH 01/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/frame.py | 2 ++ pandas/core/series.py | 2 ++ pandas/tests/frame/methods/test_drop_duplicates.py | 11 +++++++++++ pandas/tests/series/methods/test_drop_duplicates.py | 11 +++++++++++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 622029adf357f..e34589a0ae5c5 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -647,7 +647,7 @@ Deprecations - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - +- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` and :meth:`Series.drop_duplicates` (:issue:`41485`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2941b6ac01904..9f602b643731f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -77,6 +77,7 @@ Appender, Substitution, deprecate_kwarg, + deprecate_nonkeyword_arguments, doc, rewrite_axis_style_signature, ) @@ -5953,6 +5954,7 @@ def dropna( else: return result + @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self"]) def drop_duplicates( self, subset: Hashable | Sequence[Hashable] | None = None, diff --git a/pandas/core/series.py b/pandas/core/series.py index c8e9898f9462a..ba6b2ab1e7cf0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -51,6 +51,7 @@ from pandas.util._decorators import ( Appender, Substitution, + deprecate_nonkeyword_arguments, doc, ) from pandas.util._validators import ( @@ -2024,6 +2025,7 @@ def drop_duplicates(self, *, inplace: Literal[True]) -> None: def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None: ... + @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self"]) def drop_duplicates(self, keep="first", inplace=False) -> Series | None: """ Return Series with duplicate values removed. diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 10c1f37f4c9ba..7bfdc8bc308e4 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -471,3 +471,14 @@ def test_drop_duplicates_non_boolean_ignore_index(arg): msg = '^For argument "ignore_index" expected type bool, received type .*.$' with pytest.raises(ValueError, match=msg): df.drop_duplicates(ignore_index=arg) + + +def test_drop_duplicates_pos_args_deprecation(): + # test deprecation warning message for positional arguments GH#41485 + df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) + msg = ( + r"Starting with Pandas version 2\.0 all arguments of drop_duplicates except for " + r"the argument 'self' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.drop_duplicates(["b", "c"]) \ No newline at end of file diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index dae1bbcd86e81..f24c2c4b85b86 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -223,3 +223,14 @@ def test_drop_duplicates_categorical_bool(self, ordered): return_value = sc.drop_duplicates(keep=False, inplace=True) assert return_value is None tm.assert_series_equal(sc, tc[~expected]) + + +def test_drop_duplicates_pos_args_deprecation(): + # test deprecation warning message for positional arguments GH#41485 + s = Series(['a', 'b', 'c', 'b']) + msg = ( + r"Starting with Pandas version 2\.0 all arguments of drop_duplicates except for " + r"the argument 'self' will be keyword-only" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + s.drop_duplicates("last") \ No newline at end of file From 8cb7645502693d1d94a9e2571444efe51f1391de Mon Sep 17 00:00:00 2001 From: jmholzer Date: Sun, 16 May 2021 02:43:19 +0200 Subject: [PATCH 02/11] leave newline --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/tests/frame/methods/test_drop_duplicates.py | 2 +- pandas/tests/series/methods/test_drop_duplicates.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index e34589a0ae5c5..b6849869e9c65 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -648,6 +648,7 @@ Deprecations - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` and :meth:`Series.drop_duplicates` (:issue:`41485`) + .. --------------------------------------------------------------------------- diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 7bfdc8bc308e4..3969a6849fc71 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -481,4 +481,4 @@ def test_drop_duplicates_pos_args_deprecation(): r"the argument 'self' will be keyword-only" ) with tm.assert_produces_warning(FutureWarning, match=msg): - df.drop_duplicates(["b", "c"]) \ No newline at end of file + df.drop_duplicates(["b", "c"]) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index f24c2c4b85b86..7ffa07329a8be 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -233,4 +233,4 @@ def test_drop_duplicates_pos_args_deprecation(): r"the argument 'self' will be keyword-only" ) with tm.assert_produces_warning(FutureWarning, match=msg): - s.drop_duplicates("last") \ No newline at end of file + s.drop_duplicates("last") From fa6574c8426bd082a82f3394e8f0fa2335c6f128 Mon Sep 17 00:00:00 2001 From: jmholzer Date: Sun, 16 May 2021 14:28:25 +0000 Subject: [PATCH 03/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- pandas/core/frame.py | 2 +- pandas/tests/frame/methods/test_drop_duplicates.py | 6 +++--- pandas/tests/series/methods/test_drop_duplicates.py | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9f602b643731f..eefb3f3d26eee 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5954,7 +5954,7 @@ def dropna( else: return result - @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self"]) + @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self", "subset"]) def drop_duplicates( self, subset: Hashable | Sequence[Hashable] | None = None, diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 3969a6849fc71..29af4c9d3d662 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -474,11 +474,11 @@ def test_drop_duplicates_non_boolean_ignore_index(arg): def test_drop_duplicates_pos_args_deprecation(): - # test deprecation warning message for positional arguments GH#41485 + # GH#41485 df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) msg = ( - r"Starting with Pandas version 2\.0 all arguments of drop_duplicates except for " - r"the argument 'self' will be keyword-only" + r"Starting with Pandas version 2\.0 all arguments of drop_duplicates " + r"except for the argument 'self' will be keyword-only" ) with tm.assert_produces_warning(FutureWarning, match=msg): df.drop_duplicates(["b", "c"]) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index 7ffa07329a8be..b891dff7a5652 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -226,11 +226,11 @@ def test_drop_duplicates_categorical_bool(self, ordered): def test_drop_duplicates_pos_args_deprecation(): - # test deprecation warning message for positional arguments GH#41485 - s = Series(['a', 'b', 'c', 'b']) + # GH#41485 + s = Series(["a", "b", "c", "b"]) msg = ( - r"Starting with Pandas version 2\.0 all arguments of drop_duplicates except for " - r"the argument 'self' will be keyword-only" + r"Starting with Pandas version 2\.0 all arguments of drop_duplicates " + r"except for the argument 'self' will be keyword-only" ) with tm.assert_produces_warning(FutureWarning, match=msg): s.drop_duplicates("last") From d7c341a44f0ede903a0915bbc871b837543358e1 Mon Sep 17 00:00:00 2001 From: jmholzer Date: Sun, 16 May 2021 14:35:03 +0000 Subject: [PATCH 04/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- pandas/tests/frame/methods/test_drop_duplicates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 29af4c9d3d662..e65240d5acac2 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -478,7 +478,7 @@ def test_drop_duplicates_pos_args_deprecation(): df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) msg = ( r"Starting with Pandas version 2\.0 all arguments of drop_duplicates " - r"except for the argument 'self' will be keyword-only" + r"except for the arguments 'self' and 'subset' will be keyword-only" ) with tm.assert_produces_warning(FutureWarning, match=msg): - df.drop_duplicates(["b", "c"]) + df.drop_duplicates(["b", "c"], "last") From 19aa58926d7ab45207365f2205508488d59c792d Mon Sep 17 00:00:00 2001 From: jmholzer Date: Sun, 16 May 2021 15:17:49 +0000 Subject: [PATCH 05/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b6849869e9c65..b1d644bc8da50 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -647,7 +647,7 @@ Deprecations - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) -- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` and :meth:`Series.drop_duplicates` (:issue:`41485`) +- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``) and :meth:`Series.drop_duplicates` (:issue:`41485`) .. --------------------------------------------------------------------------- From 0d54ca7f00e244cd17a9add0b1540a72a275d1a2 Mon Sep 17 00:00:00 2001 From: jmholzer Date: Thu, 20 May 2021 18:38:16 +0000 Subject: [PATCH 06/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- pandas/core/frame.py | 2 +- pandas/core/series.py | 2 +- pandas/tests/frame/methods/test_drop_duplicates.py | 13 ++++++++++--- pandas/tests/series/methods/test_drop_duplicates.py | 12 +++++++++--- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9b248fe18a88f..79c2f8e3ee60b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6004,7 +6004,7 @@ def dropna( else: return result - @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self", "subset"]) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"]) def drop_duplicates( self, subset: Hashable | Sequence[Hashable] | None = None, diff --git a/pandas/core/series.py b/pandas/core/series.py index cf5db51c9b0a6..2f186057efbb5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2025,7 +2025,7 @@ def drop_duplicates(self, *, inplace: Literal[True]) -> None: def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None: ... - @deprecate_nonkeyword_arguments(version="2.0", allowed_args=["self"]) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def drop_duplicates(self, keep="first", inplace=False) -> Series | None: """ Return Series with duplicate values removed. diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index e65240d5acac2..5430584aaeef2 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -476,9 +476,16 @@ def test_drop_duplicates_non_boolean_ignore_index(arg): def test_drop_duplicates_pos_args_deprecation(): # GH#41485 df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) + msg = ( - r"Starting with Pandas version 2\.0 all arguments of drop_duplicates " - r"except for the arguments 'self' and 'subset' will be keyword-only" + "In a future version of pandas all arguments of " + "DataFrame.drop_duplicates except for the argument 'subset' " + "will be keyword-only" ) + with tm.assert_produces_warning(FutureWarning, match=msg): - df.drop_duplicates(["b", "c"], "last") + result = df.drop_duplicates(["b", "c"], "last") + + expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2]) + + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index b891dff7a5652..efae8367e9391 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -228,9 +228,15 @@ def test_drop_duplicates_categorical_bool(self, ordered): def test_drop_duplicates_pos_args_deprecation(): # GH#41485 s = Series(["a", "b", "c", "b"]) + msg = ( - r"Starting with Pandas version 2\.0 all arguments of drop_duplicates " - r"except for the argument 'self' will be keyword-only" + "In a future version of pandas all arguments of " + "Series.drop_duplicates will be keyword-only" ) + with tm.assert_produces_warning(FutureWarning, match=msg): - s.drop_duplicates("last") + result = s.drop_duplicates("last") + + expected = Series(["a", "c", "b"], index=[0, 2, 3]) + + tm.assert_series_equal(expected, result) From 463c37a576efb298e49082d6530d058dcc764ee6 Mon Sep 17 00:00:00 2001 From: jmholzer Date: Fri, 21 May 2021 21:06:04 +0000 Subject: [PATCH 07/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/indexes/base.py | 2 ++ pandas/tests/indexes/multi/test_duplicates.py | 17 +++++++++++++++++ pandas/tests/indexes/test_base.py | 17 +++++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index daa125f034627..89e86fb8d8843 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -649,7 +649,7 @@ Deprecations - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`) -- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``) and :meth:`Series.drop_duplicates` (:issue:`41485`) +- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``), :meth:`Series.drop_duplicates` and :meth:`Index.drop_duplicates` (:issue:`41485`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b9fd18dfdce73..4c26ceff5832b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -54,6 +54,7 @@ from pandas.util._decorators import ( Appender, cache_readonly, + deprecate_nonkeyword_arguments, doc, ) @@ -2633,6 +2634,7 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: result = super().unique() return self._shallow_copy(result) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) @final def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: """ diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index ea59d55989f8b..7aa4c30ebc219 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -306,3 +306,20 @@ def test_duplicated_drop_duplicates(): assert duplicated.dtype == bool expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) + + +def test_multi_drop_duplicates_pos_args_deprecation(): + idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]]) + + msg = ( + "In a future version of pandas all arguments of " + "Index.drop_duplicates will be keyword-only" + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + idx.drop_duplicates("last") + result = idx.drop_duplicates("last") + + expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]]) + + tm.assert_index_equal(expected, result) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 47657fff56ceb..c73bba075cdc6 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1738,3 +1738,20 @@ def test_construct_from_memoryview(klass, extra_kwargs): result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs) expected = klass(range(2000, 2005), **extra_kwargs) tm.assert_index_equal(result, expected) + + +def test_drop_duplicates_pos_args_deprecation(): + idx = Index([1, 2, 3, 1]) + + msg = ( + "In a future version of pandas all arguments of " + "Index.drop_duplicates will be keyword-only" + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + idx.drop_duplicates("last") + result = idx.drop_duplicates("last") + + expected = Index([2, 3, 1]) + + tm.assert_index_equal(expected, result) From 2cb482ffac31bc0efdf048c894e2ca3901ffe3b5 Mon Sep 17 00:00:00 2001 From: jmholzer Date: Sun, 23 May 2021 00:25:47 +0000 Subject: [PATCH 08/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/indexes/multi.py | 7 +++++++ pandas/tests/indexes/multi/test_duplicates.py | 4 +++- pandas/tests/indexes/test_base.py | 2 ++ 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 38a6d710da6fb..5ad6369c12ecf 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -676,7 +676,7 @@ Deprecations - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`) - Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`) -- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``), :meth:`Series.drop_duplicates` and :meth:`Index.drop_duplicates` (:issue:`41485`) +- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``), :meth:`Series.drop_duplicates`, :meth:`Index.drop_duplicates` and :meth:`MultiIndex.drop_duplicates`(:issue:`41485`) - Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`) - Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1a3719233a1da..e60675dcea603 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -41,6 +41,7 @@ from pandas.util._decorators import ( Appender, cache_readonly, + deprecate_nonkeyword_arguments, doc, ) @@ -77,8 +78,10 @@ from pandas.core.indexes.base import ( Index, _index_shared_docs, + _IndexT, ensure_index, get_unanimous_names, + str_t, ) from pandas.core.indexes.frozen import FrozenList from pandas.core.indexes.numeric import Int64Index @@ -3793,6 +3796,10 @@ def isin(self, values, level=None) -> np.ndarray: return np.zeros(len(levs), dtype=np.bool_) return levs.isin(values) + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: + return super(Index, self).drop_duplicates(keep=keep) + # --------------------------------------------------------------- # Arithmetic/Numeric Methods - Disabled diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 7aa4c30ebc219..1a8bbedc7f5c4 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -309,11 +309,13 @@ def test_duplicated_drop_duplicates(): def test_multi_drop_duplicates_pos_args_deprecation(): + # GH#41485 + idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]]) msg = ( "In a future version of pandas all arguments of " - "Index.drop_duplicates will be keyword-only" + "MultiIndex.drop_duplicates will be keyword-only" ) with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c73bba075cdc6..3b2ef5d785912 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1741,6 +1741,8 @@ def test_construct_from_memoryview(klass, extra_kwargs): def test_drop_duplicates_pos_args_deprecation(): + # GH#41485 + idx = Index([1, 2, 3, 1]) msg = ( From 09fe413af8f928f1c5e9f256da4a5d6108e5d7ed Mon Sep 17 00:00:00 2001 From: jmholzer Date: Sun, 23 May 2021 13:14:24 +0000 Subject: [PATCH 09/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- pandas/core/indexes/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4c26ceff5832b..8aa1bdc99a7a2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2635,7 +2635,6 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: return self._shallow_copy(result) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - @final def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: """ Return Index with duplicate values removed. From 03d0330fa69e8ef70f0c910334672b5fe78375f1 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 23 May 2021 18:45:47 +0100 Subject: [PATCH 10/11] remove redundant line --- pandas/tests/indexes/multi/test_duplicates.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 1a8bbedc7f5c4..676a80ff7495c 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -319,7 +319,6 @@ def test_multi_drop_duplicates_pos_args_deprecation(): ) with tm.assert_produces_warning(FutureWarning, match=msg): - idx.drop_duplicates("last") result = idx.drop_duplicates("last") expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]]) From fbf70a23d21c9782df57b7e40fc3a328d4c88a37 Mon Sep 17 00:00:00 2001 From: jmholzer Date: Mon, 24 May 2021 20:19:48 +0000 Subject: [PATCH 11/11] ENH: Deprecate non-keyword arguments for drop_duplicates. --- pandas/core/indexes/multi.py | 6 ++---- pandas/tests/frame/methods/test_drop_duplicates.py | 4 ---- pandas/tests/indexes/multi/test_duplicates.py | 5 ----- pandas/tests/indexes/test_base.py | 5 ----- pandas/tests/series/methods/test_drop_duplicates.py | 4 ---- 5 files changed, 2 insertions(+), 22 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e60675dcea603..2a03b5696fcc2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -78,10 +78,8 @@ from pandas.core.indexes.base import ( Index, _index_shared_docs, - _IndexT, ensure_index, get_unanimous_names, - str_t, ) from pandas.core.indexes.frozen import FrozenList from pandas.core.indexes.numeric import Int64Index @@ -3797,8 +3795,8 @@ def isin(self, values, level=None) -> np.ndarray: return levs.isin(values) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT: - return super(Index, self).drop_duplicates(keep=keep) + def drop_duplicates(self, keep: str | bool = "first") -> MultiIndex: + return super().drop_duplicates(keep=keep) # --------------------------------------------------------------- # Arithmetic/Numeric Methods - Disabled diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index 5430584aaeef2..8cbf7bbfe0368 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -476,16 +476,12 @@ def test_drop_duplicates_non_boolean_ignore_index(arg): def test_drop_duplicates_pos_args_deprecation(): # GH#41485 df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]}) - msg = ( "In a future version of pandas all arguments of " "DataFrame.drop_duplicates except for the argument 'subset' " "will be keyword-only" ) - with tm.assert_produces_warning(FutureWarning, match=msg): result = df.drop_duplicates(["b", "c"], "last") - expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2]) - tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 1a8bbedc7f5c4..ee0b847dd681f 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -310,18 +310,13 @@ def test_duplicated_drop_duplicates(): def test_multi_drop_duplicates_pos_args_deprecation(): # GH#41485 - idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]]) - msg = ( "In a future version of pandas all arguments of " "MultiIndex.drop_duplicates will be keyword-only" ) - with tm.assert_produces_warning(FutureWarning, match=msg): idx.drop_duplicates("last") result = idx.drop_duplicates("last") - expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]]) - tm.assert_index_equal(expected, result) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 3b2ef5d785912..f41c79bd09f67 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1742,18 +1742,13 @@ def test_construct_from_memoryview(klass, extra_kwargs): def test_drop_duplicates_pos_args_deprecation(): # GH#41485 - idx = Index([1, 2, 3, 1]) - msg = ( "In a future version of pandas all arguments of " "Index.drop_duplicates will be keyword-only" ) - with tm.assert_produces_warning(FutureWarning, match=msg): idx.drop_duplicates("last") result = idx.drop_duplicates("last") - expected = Index([2, 3, 1]) - tm.assert_index_equal(expected, result) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index efae8367e9391..7eb51f8037792 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -228,15 +228,11 @@ def test_drop_duplicates_categorical_bool(self, ordered): def test_drop_duplicates_pos_args_deprecation(): # GH#41485 s = Series(["a", "b", "c", "b"]) - msg = ( "In a future version of pandas all arguments of " "Series.drop_duplicates will be keyword-only" ) - with tm.assert_produces_warning(FutureWarning, match=msg): result = s.drop_duplicates("last") - expected = Series(["a", "c", "b"], index=[0, 2, 3]) - tm.assert_series_equal(expected, result)