From 54c50687ddfcd79814aa1f854056b51eacd4e9e1 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 00:29:21 -0700 Subject: [PATCH 01/91] DOC #45443 edited the documentation of where/mask functions --- pandas/core/generic.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ba3474a2513fb..b46eff137394c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9614,7 +9614,8 @@ def where( The {name} method is an application of the if-then idiom. For each element in the calling DataFrame, if ``cond`` is ``{cond}`` the element is used; otherwise the corresponding element from the DataFrame - ``other`` is used. + ``other`` is used. If `cond` {klass} is less in size than `other`, the default bool + for the missing value is {cond_rev}. The signature for :func:`DataFrame.where` differs from :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to @@ -9641,6 +9642,23 @@ def where( 4 NaN dtype: float64 + >>> s = pd.Series(range(5)) + >>> t = pd.Series([True, False]) + >>> s.where(t,99) + 0 0 + 1 99 + 2 99 + 3 99 + 4 99 + dtype: int64 + >>> s.mask(t, 99) + 0 99 + 1 1 + 2 99 + 3 99 + 4 99 + dtype: int64 + >>> s.where(s > 1, 10) 0 10 1 10 From 2951fb14ef8c589f50b5a28e76878de410968b79 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 00:39:29 -0700 Subject: [PATCH 02/91] DOC #45443 edited the documentation of where/mask functions --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b46eff137394c..489ad1e3bf5c2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9614,8 +9614,8 @@ def where( The {name} method is an application of the if-then idiom. For each element in the calling DataFrame, if ``cond`` is ``{cond}`` the element is used; otherwise the corresponding element from the DataFrame - ``other`` is used. If `cond` {klass} is less in size than `other`, the default bool - for the missing value is {cond_rev}. + ``other`` is used. If `cond` {klass} is less in size than `other`, the + default bool for the missing value is {cond_rev}. The signature for :func:`DataFrame.where` differs from :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to From 8afd6a1fad45a45326e0fdac46eb5cfd8ffac551 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 08:12:01 -0700 Subject: [PATCH 03/91] Update generic.py --- pandas/core/generic.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 489ad1e3bf5c2..ba3474a2513fb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9614,8 +9614,7 @@ def where( The {name} method is an application of the if-then idiom. For each element in the calling DataFrame, if ``cond`` is ``{cond}`` the element is used; otherwise the corresponding element from the DataFrame - ``other`` is used. If `cond` {klass} is less in size than `other`, the - default bool for the missing value is {cond_rev}. + ``other`` is used. The signature for :func:`DataFrame.where` differs from :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to @@ -9642,23 +9641,6 @@ def where( 4 NaN dtype: float64 - >>> s = pd.Series(range(5)) - >>> t = pd.Series([True, False]) - >>> s.where(t,99) - 0 0 - 1 99 - 2 99 - 3 99 - 4 99 - dtype: int64 - >>> s.mask(t, 99) - 0 99 - 1 1 - 2 99 - 3 99 - 4 99 - dtype: int64 - >>> s.where(s > 1, 10) 0 10 1 10 From a326359ca3743ecdfd4a64b303c9d0e8fa63b6fb Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 10:43:16 -0700 Subject: [PATCH 04/91] ENH: add suffixes argument to DataFrame.compare #44354 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/frame.py | 10 +++++++++ pandas/core/generic.py | 13 +++++++++-- pandas/core/shared_docs.py | 6 +++++- pandas/tests/frame/methods/test_compare.py | 25 ++++++++++++++++++++++ 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c70acc0a0b18c..e9d5bd1ffd1e2 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -277,6 +277,7 @@ Other enhancements - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) +- :meth:`DataFrame.compare` now accepts a ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``self`` and ``other`` (:issue:`44354`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ead4ea744c647..ffa0b46896f98 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7776,6 +7776,14 @@ def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: 0 a c NaN NaN 2 NaN NaN 3.0 4.0 +Assign suffixes + +>>> df.compare(df2, suffixes=("left", "right")) + col1 col3 + left right left right +0 a c NaN NaN +2 NaN NaN 3.0 4.0 + Stack the differences on rows >>> df.compare(df2, align_axis=0) @@ -7823,12 +7831,14 @@ def compare( align_axis: Axis = 1, keep_shape: bool = False, keep_equal: bool = False, + suffixes: Suffixes = ("self", "other"), ) -> DataFrame: return super().compare( other=other, align_axis=align_axis, keep_shape=keep_shape, keep_equal=keep_equal, + suffixes=suffixes, ) def combine( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ba3474a2513fb..b62f5aa088500 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -58,6 +58,7 @@ Renamer, SortKind, StorageOptions, + Suffixes, T, TimedeltaConvertibleTypes, TimestampConvertibleTypes, @@ -8965,6 +8966,7 @@ def compare( align_axis: Axis = 1, keep_shape: bool_t = False, keep_equal: bool_t = False, + suffixes: Suffixes = ("self", "other"), ): from pandas.core.reshape.concat import concat @@ -8975,7 +8977,6 @@ def compare( ) mask = ~((self == other) | (self.isna() & other.isna())) - keys = ["self", "other"] if not keep_equal: self = self.where(mask) @@ -8990,13 +8991,21 @@ def compare( else: self = self[mask] other = other[mask] + if not isinstance(suffixes, tuple): + warnings.warn( + f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give " + "unexpected results. Provide 'suffixes' as a tuple instead. In the " + "future a 'TypeError' will be raised.", + FutureWarning, + stacklevel=find_stack_level(), + ) if align_axis in (1, "columns"): # This is needed for Series axis = 1 else: axis = self._get_axis_number(align_axis) - diff = concat([self, other], axis=axis, keys=keys) + diff = concat([self, other], axis=axis, keys=suffixes) if axis >= self.ndim: # No need to reorganize data if stacking on new axis diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 4b7a487e9472d..039d37d70dd45 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -53,7 +53,7 @@ ] = """ Compare to another {klass} and show the differences. -.. versionadded:: 1.1.0 +.. versionadded:: 1.5.0 Parameters ---------- @@ -75,6 +75,10 @@ keep_equal : bool, default False If true, the result keeps values that are equal. Otherwise, equal values are shown as NaNs. + +suffixes : tuple, default ('self', 'other') + Set the dataframes names in the comparison. + """ _shared_docs[ diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 468811eba0d39..10e9a4c847e30 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -180,3 +180,28 @@ def test_compare_unaligned_objects(): df1 = pd.DataFrame(np.ones((3, 3))) df2 = pd.DataFrame(np.zeros((2, 1))) df1.compare(df2) + + +def test_compare_suffixes(): + # GH + df1 = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + columns=["col1", "col2", "col3"], + ) + df2 = df1.copy() + df2.loc[0, "col1"] = "c" + df2.loc[2, "col3"] = 4.0 + + suffixes = ["left", "right"] + comp = df1.compare(df2, suffixes=suffixes) + expected = pd.DataFrame( + { + ("col1", "left"): {0: "a", 2: np.nan}, + ("col1", "right"): {0: "c", 2: np.nan}, + ("col3", "left"): {0: np.nan, 2: 3.0}, + ("col3", "right"): {0: np.nan, 2: np.nan}, + } + ) + tm.assert_frame_equal(comp, expected) + result_suffixes = comp.columns.get_level_values(1).unique() + assert result_suffixes.isin(suffixes).all(), "suffixes not equal" From d9c4ca98071cec8094e18f2fafddcc35b2c21a12 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 11:12:08 -0700 Subject: [PATCH 05/91] Edited the tests --- pandas/tests/frame/methods/test_compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 10e9a4c847e30..f51fbf11c1b30 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -190,9 +190,9 @@ def test_compare_suffixes(): ) df2 = df1.copy() df2.loc[0, "col1"] = "c" - df2.loc[2, "col3"] = 4.0 + df2.loc[2, "col3"] = np.nan - suffixes = ["left", "right"] + suffixes = ("left", "right") comp = df1.compare(df2, suffixes=suffixes) expected = pd.DataFrame( { From 1c54472c4e0c27aef3dc9f306a5063250a03997f Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 11:17:13 -0700 Subject: [PATCH 06/91] space fixing --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b62f5aa088500..a8a0913d386ee 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8993,9 +8993,9 @@ def compare( other = other[mask] if not isinstance(suffixes, tuple): warnings.warn( - f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give " - "unexpected results. Provide 'suffixes' as a tuple instead. In the " - "future a 'TypeError' will be raised.", + f"Passing 'suffixes' as a {type(suffixes)}, is not supported " + "and may give unexpected results. Provide 'suffixes' as a tuple " + "instead. In the future a 'TypeError' will be raised.", FutureWarning, stacklevel=find_stack_level(), ) From 4d3482134ce57fa6e070d7f2860a39359d43aa77 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 12:54:39 -0700 Subject: [PATCH 07/91] Update shared_docs.py --- pandas/core/shared_docs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 039d37d70dd45..9b6c25ae80b83 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -78,7 +78,6 @@ suffixes : tuple, default ('self', 'other') Set the dataframes names in the comparison. - """ _shared_docs[ From 8fb6aa22a72fa82d9e35493c3bb699a74f9d7217 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 12:58:23 -0700 Subject: [PATCH 08/91] Update series.py --- pandas/core/series.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index ef4ea0172c505..a899facc918f5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -164,6 +164,7 @@ from pandas._typing import ( NumpySorter, NumpyValueArrayLike, + Suffixes, ) from pandas.core.frame import DataFrame @@ -3236,12 +3237,14 @@ def compare( align_axis: Axis = 1, keep_shape: bool = False, keep_equal: bool = False, + suffixes: Suffixes = ("self", "other"), ) -> DataFrame | Series: return super().compare( other=other, align_axis=align_axis, keep_shape=keep_shape, keep_equal=keep_equal, + suffixes = suffixes, ) def combine(self, other, func, fill_value=None) -> Series: From 1e33dea21d2967373e4940017c8e1661ec267c30 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 13:02:43 -0700 Subject: [PATCH 09/91] Update series.py --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a899facc918f5..8116706963bc1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3244,7 +3244,7 @@ def compare( align_axis=align_axis, keep_shape=keep_shape, keep_equal=keep_equal, - suffixes = suffixes, + suffixes=suffixes, ) def combine(self, other, func, fill_value=None) -> Series: From ae6c75ad72c67d20308d4ab10461e66e6574e72a Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 13:46:34 -0700 Subject: [PATCH 10/91] invalid argument tests --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/generic.py | 9 +++------ pandas/core/shared_docs.py | 4 +++- pandas/tests/frame/methods/test_compare.py | 19 +++++++++---------- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e9d5bd1ffd1e2..1cb1156da379d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -277,7 +277,7 @@ Other enhancements - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) -- :meth:`DataFrame.compare` now accepts a ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``self`` and ``other`` (:issue:`44354`) +- :meth:`DataFrame.compare` now accepts a ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a8a0913d386ee..327e0912ca291 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8992,12 +8992,9 @@ def compare( self = self[mask] other = other[mask] if not isinstance(suffixes, tuple): - warnings.warn( - f"Passing 'suffixes' as a {type(suffixes)}, is not supported " - "and may give unexpected results. Provide 'suffixes' as a tuple " - "instead. In the future a 'TypeError' will be raised.", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + f"Passing 'suffixes' as a {type(suffixes)}, is not " + "supported Provide 'suffixes' as a tuple instead." ) if align_axis in (1, "columns"): # This is needed for Series diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 9b6c25ae80b83..f5b3bff521f2e 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -53,7 +53,7 @@ ] = """ Compare to another {klass} and show the differences. -.. versionadded:: 1.5.0 +.. versionadded:: 1.1.0 Parameters ---------- @@ -78,6 +78,8 @@ suffixes : tuple, default ('self', 'other') Set the dataframes names in the comparison. + + .. versionadded:: 1.5.0 """ _shared_docs[ diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index f51fbf11c1b30..4dbd5328a71b7 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -186,14 +186,15 @@ def test_compare_suffixes(): # GH df1 = pd.DataFrame( {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, - columns=["col1", "col2", "col3"], ) - df2 = df1.copy() - df2.loc[0, "col1"] = "c" - df2.loc[2, "col3"] = np.nan - - suffixes = ("left", "right") - comp = df1.compare(df2, suffixes=suffixes) + df2 = pd.DataFrame( + { + "col1": ["c", "b", "c"], + "col2": [1.0, 2.0, np.nan], + "col3": [1.0, 2.0, np.nan], + }, + ) + result = df1.compare(df2, suffixes=("left", "right")) expected = pd.DataFrame( { ("col1", "left"): {0: "a", 2: np.nan}, @@ -202,6 +203,4 @@ def test_compare_suffixes(): ("col3", "right"): {0: np.nan, 2: np.nan}, } ) - tm.assert_frame_equal(comp, expected) - result_suffixes = comp.columns.get_level_values(1).unique() - assert result_suffixes.isin(suffixes).all(), "suffixes not equal" + tm.assert_frame_equal(result, expected) From ee10dd32e69fedc453a5cad3d5b1c23dce042c86 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 13:48:44 -0700 Subject: [PATCH 11/91] issue reference --- pandas/tests/frame/methods/test_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 4dbd5328a71b7..9ad58972621d8 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -183,7 +183,7 @@ def test_compare_unaligned_objects(): def test_compare_suffixes(): - # GH + #44354 df1 = pd.DataFrame( {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, ) From 077d274eefbc1eba793ef23bbff3d349ad984434 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 13:50:29 -0700 Subject: [PATCH 12/91] syntax editing --- pandas/tests/frame/methods/test_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 9ad58972621d8..18106fa3c2496 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -183,7 +183,7 @@ def test_compare_unaligned_objects(): def test_compare_suffixes(): - #44354 + # 44354 df1 = pd.DataFrame( {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, ) From d0289e58d240787ac83da2bdab7ef0f780127a59 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 10 Jul 2022 08:31:40 -0700 Subject: [PATCH 13/91] grammar fixing --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/generic.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 4377a867b3fce..9910c2a5e2291 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -278,7 +278,7 @@ Other enhancements - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) -- :meth:`DataFrame.compare` now accepts a ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- :meth:`DataFrame.compare` now accepts an argument ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) - .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dcd80c0a18f97..af9979c1d2531 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8993,8 +8993,8 @@ def compare( other = other[mask] if not isinstance(suffixes, tuple): raise TypeError( - f"Passing 'suffixes' as a {type(suffixes)}, is not " - "supported Provide 'suffixes' as a tuple instead." + f"Passing 'suffixes' as a {type(suffixes)} is not " + "supported. Provide 'suffixes' as a tuple instead." ) if align_axis in (1, "columns"): # This is needed for Series From bd45e06b3a8a458d8c502e8fbef8622a067d5a20 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 10 Jul 2022 09:20:39 -0700 Subject: [PATCH 14/91] edit doc --- doc/source/whatsnew/v1.5.0.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9910c2a5e2291..b9bd36aa6bcac 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -279,7 +279,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) -- + .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: @@ -800,6 +800,7 @@ Performance improvements - Performance improvement in :class:`BusinessHour` ``str`` and ``repr`` (:issue:`44764`) - Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`) - Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`) +- Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47404`, :issue:`47405`) - .. --------------------------------------------------------------------------- @@ -858,10 +859,12 @@ Conversion - Bug in :meth:`DataFrame.to_records` returning inconsistent numpy types if the index was a :class:`MultiIndex` (:issue:`47263`) - Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`) - Bug in :meth:`DataFrame.apply` that returns a :class:`DataFrame` instead of a :class:`Series` when applied to an empty :class:`DataFrame` and ``axis=1`` (:issue:`39111`) +- Bug when inferring the dtype from an iterable that is *not* a NumPy ``ndarray`` consisting of all NumPy unsigned integer scalars did not result in an unsigned integer dtype (:issue:`47294`) Strings ^^^^^^^ - Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) +- Bug in :meth:`Series.str.zfill` when strings contain leading signs, padding '0' before the sign character rather than after as ``str.zfill`` from standard library (:issue:`20868`) - Interval @@ -1048,4 +1051,4 @@ Other .. _whatsnew_150.contributors: Contributors -~~~~~~~~~~~~ +~~~~~~~~~~~~ \ No newline at end of file From a13b319860b61b2a3d068bcbb14db91703d493b4 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 10 Jul 2022 09:30:23 -0700 Subject: [PATCH 15/91] editting doc --- doc/source/getting_started/intro_tutorials/02_read_write.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst index d69a48def0287..864732ea0b7ec 100644 --- a/doc/source/getting_started/intro_tutorials/02_read_write.rst +++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst @@ -205,4 +205,4 @@ For a complete overview of the input and output possibilities from and to pandas .. raw:: html - + \ No newline at end of file From f32d7cf7e53b6eca4720cb17efed64b166be8ce8 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 10 Jul 2022 09:58:34 -0700 Subject: [PATCH 16/91] Update 02_read_write.rst --- doc/source/getting_started/intro_tutorials/02_read_write.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst index 864732ea0b7ec..92b6e85c9c3a3 100644 --- a/doc/source/getting_started/intro_tutorials/02_read_write.rst +++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst @@ -205,4 +205,5 @@ For a complete overview of the input and output possibilities from and to pandas .. raw:: html - \ No newline at end of file + + From 63965838454fd51eeb7a9ca871892d49791e4933 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 10 Jul 2022 09:58:44 -0700 Subject: [PATCH 17/91] Update 02_read_write.rst --- doc/source/getting_started/intro_tutorials/02_read_write.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst index 92b6e85c9c3a3..d69a48def0287 100644 --- a/doc/source/getting_started/intro_tutorials/02_read_write.rst +++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst @@ -205,5 +205,4 @@ For a complete overview of the input and output possibilities from and to pandas .. raw:: html - From e754e1510d3cfc077184555efc1cc1c92291f883 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 10 Jul 2022 09:59:04 -0700 Subject: [PATCH 18/91] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b9bd36aa6bcac..d1a7f3c5b2e0d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1,5 +1,6 @@ .. _whatsnew_150: + What's new in 1.5.0 (??) ------------------------ @@ -1051,4 +1052,4 @@ Other .. _whatsnew_150.contributors: Contributors -~~~~~~~~~~~~ \ No newline at end of file +~~~~~~~~~~~~ From 8f67c9f7cedd90a08a7cb990de9ea4fd71a0d288 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 10 Jul 2022 09:59:15 -0700 Subject: [PATCH 19/91] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d1a7f3c5b2e0d..7eaed2a2e3566 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1,6 +1,5 @@ .. _whatsnew_150: - What's new in 1.5.0 (??) ------------------------ From 580773d40fc343b9b47bf8e8d816cdab3c8ff115 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 12 Jul 2022 18:43:46 -0700 Subject: [PATCH 20/91] np --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/frame.py | 8 ++++---- pandas/core/generic.py | 10 +++++----- pandas/core/series.py | 4 ++-- pandas/core/shared_docs.py | 2 +- pandas/tests/frame/methods/test_compare.py | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7eaed2a2e3566..db1a6a5eead6d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -278,7 +278,7 @@ Other enhancements - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) -- :meth:`DataFrame.compare` now accepts an argument ``suffixes`` to allow the user to specify the suffixes of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ffa0b46896f98..3e6fe9719c5c7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7776,9 +7776,9 @@ def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: 0 a c NaN NaN 2 NaN NaN 3.0 4.0 -Assign suffixes +Assign result_names ->>> df.compare(df2, suffixes=("left", "right")) +>>> df.compare(df2, result_names=("left", "right")) col1 col3 left right left right 0 a c NaN NaN @@ -7831,14 +7831,14 @@ def compare( align_axis: Axis = 1, keep_shape: bool = False, keep_equal: bool = False, - suffixes: Suffixes = ("self", "other"), + result_names: Suffixes = ("self", "other"), ) -> DataFrame: return super().compare( other=other, align_axis=align_axis, keep_shape=keep_shape, keep_equal=keep_equal, - suffixes=suffixes, + result_names=result_names, ) def combine( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bc3f18414b793..9472aaf8e935c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8971,7 +8971,7 @@ def compare( align_axis: Axis = 1, keep_shape: bool_t = False, keep_equal: bool_t = False, - suffixes: Suffixes = ("self", "other"), + result_names: Suffixes = ("self", "other"), ): from pandas.core.reshape.concat import concat @@ -8996,10 +8996,10 @@ def compare( else: self = self[mask] other = other[mask] - if not isinstance(suffixes, tuple): + if not isinstance(result_names, tuple): raise TypeError( - f"Passing 'suffixes' as a {type(suffixes)} is not " - "supported. Provide 'suffixes' as a tuple instead." + f"Passing 'result_names' as a {type(result_names)} is not " + "supported. Provide 'result_names' as a tuple instead." ) if align_axis in (1, "columns"): # This is needed for Series @@ -9007,7 +9007,7 @@ def compare( else: axis = self._get_axis_number(align_axis) - diff = concat([self, other], axis=axis, keys=suffixes) + diff = concat([self, other], axis=axis, keys=result_names) if axis >= self.ndim: # No need to reorganize data if stacking on new axis diff --git a/pandas/core/series.py b/pandas/core/series.py index 8116706963bc1..05fc90503dbbd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3237,14 +3237,14 @@ def compare( align_axis: Axis = 1, keep_shape: bool = False, keep_equal: bool = False, - suffixes: Suffixes = ("self", "other"), + result_names: Suffixes = ("self", "other"), ) -> DataFrame | Series: return super().compare( other=other, align_axis=align_axis, keep_shape=keep_shape, keep_equal=keep_equal, - suffixes=suffixes, + result_names=result_names, ) def combine(self, other, func, fill_value=None) -> Series: diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index f5b3bff521f2e..b7b75d6464da3 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -76,7 +76,7 @@ If true, the result keeps values that are equal. Otherwise, equal values are shown as NaNs. -suffixes : tuple, default ('self', 'other') +result_names : tuple, default ('self', 'other') Set the dataframes names in the comparison. .. versionadded:: 1.5.0 diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 18106fa3c2496..d9d24b0ebb7dd 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -182,7 +182,7 @@ def test_compare_unaligned_objects(): df1.compare(df2) -def test_compare_suffixes(): +def test_compare_result_names(): # 44354 df1 = pd.DataFrame( {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, @@ -194,7 +194,7 @@ def test_compare_suffixes(): "col3": [1.0, 2.0, np.nan], }, ) - result = df1.compare(df2, suffixes=("left", "right")) + result = df1.compare(df2, result_names=("left", "right")) expected = pd.DataFrame( { ("col1", "left"): {0: "a", 2: np.nan}, From a4fca5637f0ff16b26671a27c17ec6ab469296d0 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 13 Jul 2022 09:08:40 -0700 Subject: [PATCH 21/91] 1.5.0 rst --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index bdebf27f59b04..7bd787ff1acfb 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -278,7 +278,7 @@ Other enhancements - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) -- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From bc209bb17be1e6f5a82a81016cd4244a95c2713e Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 20:22:34 -0700 Subject: [PATCH 22/91] created tests for invalid input --- pandas/tests/frame/methods/test_compare.py | 28 +++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index d9d24b0ebb7dd..fdb1f8909041e 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -183,7 +183,7 @@ def test_compare_unaligned_objects(): def test_compare_result_names(): - # 44354 + #GH 44354 df1 = pd.DataFrame( {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, ) @@ -204,3 +204,29 @@ def test_compare_result_names(): } ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "result_names", + [ + [1,2], + "HK", + {"2":2,"3":3}, + 3, + 3.0 + ] +) +def test_invalid_input_result_names(result_names): + #GH 44354 + df1 = pd.DataFrame( + {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, + ) + df2 = pd.DataFrame( + { + "col1": ["c", "b", "c"], + "col2": [1.0, 2.0, np.nan], + "col3": [1.0, 2.0, np.nan], + }, + ) + with pytest.raises(TypeError): + df1.compare(df2, result_names=result_names) From ff014e3706e5e1eaede890d30e81840437dbfc5b Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 20:23:49 -0700 Subject: [PATCH 23/91] space --- pandas/tests/frame/methods/test_compare.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index fdb1f8909041e..d1c06ec4f7e61 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -230,3 +230,4 @@ def test_invalid_input_result_names(result_names): ) with pytest.raises(TypeError): df1.compare(df2, result_names=result_names) + From 32d1c5e2bda00918f870e9ae85b0c56761cf7906 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 20:24:43 -0700 Subject: [PATCH 24/91] space --- pandas/tests/frame/methods/test_compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index d1c06ec4f7e61..52a99f23980e0 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -213,8 +213,8 @@ def test_compare_result_names(): "HK", {"2":2,"3":3}, 3, - 3.0 - ] + 3.0, + ], ) def test_invalid_input_result_names(result_names): #GH 44354 From 0daa3e831b140c7b66289efe4764b12b2a6c2c37 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 20:39:26 -0700 Subject: [PATCH 25/91] space --- pandas/tests/frame/methods/test_compare.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 52a99f23980e0..3982e7191ab9f 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -183,7 +183,7 @@ def test_compare_unaligned_objects(): def test_compare_result_names(): - #GH 44354 + # GH 44354 df1 = pd.DataFrame( {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, ) @@ -209,15 +209,15 @@ def test_compare_result_names(): @pytest.mark.parametrize( "result_names", [ - [1,2], + [1, 2], "HK", - {"2":2,"3":3}, + {"2": 2, "3": 3}, 3, 3.0, ], ) def test_invalid_input_result_names(result_names): - #GH 44354 + # GH 44354 df1 = pd.DataFrame( {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}, ) From 9cb23b8365d65a3351e9081ac6512e7adf1ca788 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 21:16:25 -0700 Subject: [PATCH 26/91] editing test --- pandas/tests/frame/methods/test_compare.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 3982e7191ab9f..609242db453ba 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -228,6 +228,11 @@ def test_invalid_input_result_names(result_names): "col3": [1.0, 2.0, np.nan], }, ) - with pytest.raises(TypeError): + with pytest.raises( + TypeError, + match=( + f"Passing 'result_names' as a {type(result_names)} is not " + "supported. Provide 'result_names' as a tuple instead." + ), + ): df1.compare(df2, result_names=result_names) - From 8ec706775dd6d97177ed3528d74fc749273d0af7 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 20:28:39 -0700 Subject: [PATCH 27/91] deprecated --- pandas/core/frame.py | 13 +++++++++++++ pandas/tests/groupby/test_groupby.py | 27 ++++++++++++++++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c90dffbf4df6..e3ee892318ac2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8359,6 +8359,19 @@ def groupby( ) -> DataFrameGroupBy: from pandas.core.groupby.generic import DataFrameGroupBy + if isinstance(by, list): + if len(by) == 1 and isinstance(by[0], str): + warnings.warn( + ( + "In a future version of pandas, a length 1 " + "tuple will be returned when grouping by a " + "list of length 1. Don't supply a list with " + "a single grouper to avoid this warning." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) + if squeeze is not no_default: warnings.warn( ( diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 920b869ef799b..7882787545749 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -806,7 +806,7 @@ def test_groupby_as_index_cython(df): msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = grouped.mean() - expected = data.groupby(["A"]).mean() + expected = data.groupby("A").mean() expected.insert(0, "A", expected.index) expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) @@ -1259,7 +1259,7 @@ def test_consistency_name(): } ) - expected = df.groupby(["A"]).B.count() + expected = df.groupby("A").B.count() result = df.B.groupby(df.A).count() tm.assert_series_equal(result, expected) @@ -1495,7 +1495,7 @@ def test_groupby_2d_malformed(): d["label"] = ["l1", "l2"] msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): - tmp = d.groupby(["group"]).mean() + tmp = d.groupby("group").mean() res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) @@ -2643,7 +2643,7 @@ def test_groupby_aggregation_non_numeric_dtype(): index=Index(["M", "W"], dtype="object", name="MW"), ) - gb = df.groupby(by=["MW"]) + gb = df.groupby(by="MW") result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2666,7 +2666,7 @@ def test_groupby_aggregation_multi_non_numeric_dtype(): index=Index([0, 1], dtype="int64", name="x"), ) - gb = df.groupby(by=["x"]) + gb = df.groupby(by="x") result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2686,7 +2686,7 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): index=Index([0, 1], dtype="int64", name="x"), ) - gb = df.groupby(by=["x"]) + gb = df.groupby(by="x") msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = gb.sum() @@ -2766,7 +2766,7 @@ def test_by_column_values_with_same_starting_value(): ) aggregate_details = {"Mood": Series.mode, "Credit": "sum"} - result = df.groupby(["Name"]).agg(aggregate_details) + result = df.groupby("Name").agg(aggregate_details) expected_result = DataFrame( { "Mood": [["happy", "sad"], "happy"], @@ -2795,3 +2795,16 @@ def test_groupby_none_column_name(): result = df.groupby(by=[None]).sum() expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None)) tm.assert_frame_equal(result, expected) + + +def test_single_element_list_grouping(): + df = DataFrame(columns=["a", "b", "c"], index=["x", "y"]) + df.loc["y"] = Series({"a": 1, "b": 5, "c": 2}) + msg = ( + "In a future version of pandas, a length 1 " + "tuple will be returned when grouping by a " + "list of length 1. Don't supply a list with " + "a single grouper to avoid this warning." + ) + with tm.assert_produces_warning(FutureWarning, match = msg): + df.groupby(["a"]) From 7be113adb947a53762ab4ac54fa0636087f3ef62 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 21:40:20 -0700 Subject: [PATCH 28/91] syntax --- pandas/tests/groupby/test_groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7882787545749..55b18fe583be3 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2804,7 +2804,7 @@ def test_single_element_list_grouping(): "In a future version of pandas, a length 1 " "tuple will be returned when grouping by a " "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." + "a single grouper to avoid this warning." ) - with tm.assert_produces_warning(FutureWarning, match = msg): + with tm.assert_produces_warning(FutureWarning, match=msg): df.groupby(["a"]) From b78819f452f0a4827a4e2351a151314c4c661b7d Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 22:38:27 -0700 Subject: [PATCH 29/91] editting existed examples --- asv_bench/benchmarks/groupby.py | 2 +- doc/source/user_guide/cookbook.rst | 2 +- doc/source/user_guide/groupby.rst | 10 +++++----- doc/source/whatsnew/v1.1.0.rst | 4 ++-- pandas/core/frame.py | 4 ++-- pandas/tests/groupby/aggregate/test_aggregate.py | 4 ++-- pandas/tests/groupby/aggregate/test_cython.py | 4 ++-- pandas/tests/groupby/test_allowlist.py | 6 +++--- pandas/tests/groupby/test_apply.py | 4 ++-- pandas/tests/groupby/test_apply_mutate.py | 6 +++--- pandas/tests/groupby/test_categorical.py | 4 ++-- pandas/tests/groupby/test_counting.py | 6 +++--- pandas/tests/groupby/test_function.py | 4 ++-- pandas/tests/groupby/test_groupby.py | 4 ++-- pandas/tests/groupby/test_groupby_dropna.py | 2 +- pandas/tests/groupby/test_groupby_shift_diff.py | 4 ++-- pandas/tests/groupby/test_grouping.py | 2 +- pandas/tests/groupby/test_missing.py | 4 ++-- pandas/tests/groupby/test_nunique.py | 2 +- pandas/tests/groupby/test_rank.py | 2 +- pandas/tests/groupby/test_value_counts.py | 4 ++-- pandas/tests/groupby/transform/test_transform.py | 6 +++--- pandas/tests/resample/test_resampler_grouper.py | 2 +- pandas/tests/reshape/merge/test_join.py | 2 +- pandas/tests/reshape/test_crosstab.py | 4 ++-- 25 files changed, 49 insertions(+), 49 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 2de1f25fceace..21da5fd10778a 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -618,7 +618,7 @@ def setup(self): self.df = DataFrame({"a": arr, "b": arr}) def time_sum(self): - self.df.groupby(["a"])["b"].sum() + self.df.groupby("a")["b"].sum() class String: diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index daf5a0e481b8e..d53928d72d8e6 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -562,7 +562,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to {"Color": "Red Red Red Blue".split(), "Value": [100, 150, 50, 50]} ) df - df["Counts"] = df.groupby(["Color"]).transform(len) + df["Counts"] = df.groupby("Color").transform(len) df `Shift groups of the values in a column based on the index diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 5d8ef7ce02097..c4e6ec3d9949d 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -186,8 +186,8 @@ By default the group keys are sorted during the ``groupby`` operation. You may h .. ipython:: python df2 = pd.DataFrame({"X": ["B", "B", "A", "A"], "Y": [1, 2, 3, 4]}) - df2.groupby(["X"]).sum() - df2.groupby(["X"], sort=False).sum() + df2.groupby("X").sum() + df2.groupby("X", sort=False).sum() Note that ``groupby`` will preserve the order in which *observations* are sorted *within* each group. @@ -221,10 +221,10 @@ in case you want to include ``NA`` values in group keys, you could pass ``dropna .. ipython:: python # Default ``dropna`` is set to True, which will exclude NaNs in keys - df_dropna.groupby(by=["b"], dropna=True).sum() + df_dropna.groupby(by="b", dropna=True).sum() # In order to allow NaN in keys, set ``dropna`` to False - df_dropna.groupby(by=["b"], dropna=False).sum() + df_dropna.groupby(by="b", dropna=False).sum() The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys. @@ -403,7 +403,7 @@ getting a column from a DataFrame, you can do: df - grouped = df.groupby(["A"]) + grouped = df.groupby("A") grouped_C = grouped["C"] grouped_D = grouped["D"] diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e1f54c439ae9b..ff6187499d9e4 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -117,10 +117,10 @@ compatibility (:issue:`3729`) .. ipython:: python # Default ``dropna`` is set to True, which will exclude NaNs in keys - df_dropna.groupby(by=["b"], dropna=True).sum() + df_dropna.groupby(by="b", dropna=True).sum() # In order to allow NaN in keys, set ``dropna`` to False - df_dropna.groupby(by=["b"], dropna=False).sum() + df_dropna.groupby(by="b", dropna=False).sum() The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1b58db04e4f92..68bb248e1d3e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8293,13 +8293,13 @@ def update( >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]] >>> df = pd.DataFrame(l, columns=["a", "b", "c"]) ->>> df.groupby(by=["b"]).sum() +>>> df.groupby(by="b").sum() a c b 1.0 2 3 2.0 2 5 ->>> df.groupby(by=["b"], dropna=False).sum() +>>> df.groupby(by="b", dropna=False).sum() a c b 1.0 2 3 diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 54ee32502bbc9..044b8237f890a 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -504,7 +504,7 @@ def test_bool_agg_dtype(op): @pytest.mark.parametrize( "keys, agg_index", [ - (["a"], Index([1], name="a")), + ("a", Index([1], name="a")), (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])), ], ) @@ -535,7 +535,7 @@ def test_callable_result_dtype_frame( @pytest.mark.parametrize( "keys, agg_index", [ - (["a"], Index([1], name="a")), + ("a", Index([1], name="a")), (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])), ], ) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 869ed31b6a2d9..98312eded1838 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -291,8 +291,8 @@ def test_read_only_buffer_source_agg(agg): ) df._mgr.arrays[0].flags.writeable = False - result = df.groupby(["species"]).agg({"sepal_length": agg}) - expected = df.copy().groupby(["species"]).agg({"sepal_length": agg}) + result = df.groupby("species").agg({"sepal_length": agg}) + expected = df.copy().groupby("species").agg({"sepal_length": agg}) tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index e541abb368a02..5bd618898e567 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -362,7 +362,7 @@ def test_groupby_selection_with_methods(df, method): rng = date_range("2014", periods=len(df)) df.index = rng - g = df.groupby(["A"])[["C"]] + g = df.groupby("A")[["C"]] g_exp = df[["C"]].groupby(df["A"]) # TODO check groupby with > 1 col ? @@ -378,7 +378,7 @@ def test_groupby_selection_tshift_raises(df): rng = date_range("2014", periods=len(df)) df.index = rng - g = df.groupby(["A"])[["C"]] + g = df.groupby("A")[["C"]] # check that the index cache is cleared with pytest.raises(ValueError, match="Freq was not set in the index"): @@ -392,7 +392,7 @@ def test_groupby_selection_other_methods(df): df.columns.name = "foo" df.index = rng - g = df.groupby(["A"])[["C"]] + g = df.groupby("A")[["C"]] g_exp = df[["C"]].groupby(df["A"]) # methods which aren't just .foo() diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 4cfc3ea41543b..a5b817df56151 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -595,9 +595,9 @@ def test_apply_numeric_coercion_when_datetime(): df = DataFrame( {"Number": [1, 2], "Date": ["2017-03-02"] * 2, "Str": ["foo", "inf"]} ) - expected = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) + expected = df.groupby("Number").apply(lambda x: x.iloc[0]) df.Date = pd.to_datetime(df.Date) - result = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) + result = df.groupby("Number").apply(lambda x: x.iloc[0]) tm.assert_series_equal(result["Str"], expected["Str"]) # GH 15421 diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index d1f25aabe31a2..057d7d4ef7468 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -13,8 +13,8 @@ def test_group_by_copy(): } ).set_index("name") - grp_by_same_value = df.groupby(["age"], group_keys=False).apply(lambda group: group) - grp_by_copy = df.groupby(["age"], group_keys=False).apply( + grp_by_same_value = df.groupby("age", group_keys=False).apply(lambda group: group) + grp_by_copy = df.groupby("age", group_keys=False).apply( lambda group: group.copy() ) tm.assert_frame_equal(grp_by_same_value, grp_by_copy) @@ -75,7 +75,7 @@ def fn(x): x.loc[x.index[-1], "col2"] = 0 return x.col2 - result = df.groupby(["col1"], as_index=False).apply(fn) + result = df.groupby("col1", as_index=False).apply(fn) expected = pd.Series( [1, 2, 0, 4, 5, 0], index=pd.MultiIndex.from_tuples( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 004e55f4d161f..bf0bc944865fb 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -115,7 +115,7 @@ def test_basic(): # TODO: split this test ) x["person_name"] = Categorical(x.person_name) - g = x.groupby(["person_id"], observed=False) + g = x.groupby("person_id", observed=False) result = g.transform(lambda x: x) tm.assert_frame_equal(result, x[["person_name"]]) @@ -917,7 +917,7 @@ def test_sort(): df.value, range(0, 10500, 500), right=False, labels=cat_labels ) - res = df.groupby(["value_group"], observed=False)["value_group"].count() + res = df.groupby("value_group", observed=False)["value_group"].count() exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] exp.index = CategoricalIndex(exp.index, name=exp.index.name) tm.assert_series_equal(res, exp) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index f0a3219d0b419..f9527f13dd7c5 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -151,7 +151,7 @@ def test_ngroup_groupby_not_col(self): def test_ngroup_descending(self): df = DataFrame(["a", "a", "b", "a", "b"], columns=["A"]) - g = df.groupby(["A"]) + g = df.groupby("A") ascending = Series([0, 0, 1, 0, 1]) descending = Series([1, 1, 0, 1, 0]) @@ -179,7 +179,7 @@ def test_ngroup_cumcount_pair(self): # brute force comparison for all small series for p in product(range(3), repeat=4): df = DataFrame({"a": p}) - g = df.groupby(["a"]) + g = df.groupby("a") order = sorted(set(p)) ngroupd = [order.index(val) for val in p] @@ -235,7 +235,7 @@ def test_count_with_only_nans_in_first_group(self): def test_count_groupby_column_with_nan_in_groupby_column(self): # https://github.com/pandas-dev/pandas/issues/32841 df = DataFrame({"A": [1, 1, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]}) - res = df.groupby(["B"]).count() + res = df.groupby("B").count() expected = DataFrame( index=Index([0.0, 3.0, 4.0, 5.0], name="B"), data={"A": [1, 1, 1, 1]} ) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index dda583e3a1962..b66b7e68d04c5 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1025,7 +1025,7 @@ def test_is_monotonic_increasing(in_vals, out_vals): tm.assert_series_equal(result, expected) # Also check result equal to manually taking x.is_monotonic_increasing. - expected = df.groupby(["B"]).C.apply(lambda x: x.is_monotonic_increasing) + expected = df.groupby("B").C.apply(lambda x: x.is_monotonic_increasing) tm.assert_series_equal(result, expected) @@ -1357,7 +1357,7 @@ def test_groupby_sum_timedelta_with_nat(): ], ) @pytest.mark.parametrize("numeric_only", [True, False, lib.no_default]) -@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) +@pytest.mark.parametrize("keys", ["a1", ["a1", "a2"]]) def test_deprecate_numeric_only( kernel, numeric_only_default, drops_nuisance, has_arg, numeric_only, keys ): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 55b18fe583be3..3919b5e8c64f8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2240,7 +2240,7 @@ def test_groupby_groups_in_BaseGrouper(): assert result.groups == expected.groups -@pytest.mark.parametrize("group_name", ["x", ["x"]]) +@pytest.mark.parametrize("group_name", ["x"]) def test_groupby_axis_1(group_name): # GH 27614 df = DataFrame( @@ -2807,4 +2807,4 @@ def test_single_element_list_grouping(): "a single grouper to avoid this warning." ) with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(["a"]) + df.groupby("a") diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 515c96780e731..74f050a5737d7 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -333,7 +333,7 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, @pytest.mark.parametrize("input_index", [None, ["a"], ["a", "b"]]) -@pytest.mark.parametrize("keys", [["a"], ["a", "b"]]) +@pytest.mark.parametrize("keys", ["a", ["a", "b"]]) @pytest.mark.parametrize("series", [True, False]) def test_groupby_dropna_with_multiindex_input(input_index, keys, series): # GH#46783 diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index 7ffee412e3cdf..2ad5b56fb5c4e 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -141,8 +141,8 @@ def test_group_diff_object_raises(object_dtype): def test_empty_shift_with_fill(): # GH 41264, single-index check df = DataFrame(columns=["a", "b", "c"]) - shifted = df.groupby(["a"]).shift(1) - shifted_with_fill = df.groupby(["a"]).shift(1, fill_value=0) + shifted = df.groupby("a").shift(1) + shifted_with_fill = df.groupby("a").shift(1, fill_value=0) tm.assert_frame_equal(shifted, shifted_with_fill) tm.assert_index_equal(shifted.index, shifted_with_fill.index) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 6da07dafcda74..ec94749d6952e 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -873,7 +873,7 @@ def test_gb_key_len_equal_axis_len(self): class TestIteration: def test_groups(self, df): - grouped = df.groupby(["A"]) + grouped = df.groupby("A") groups = grouped.groups assert groups is grouped.groups # caching works diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py index 76da8dfe0607b..596b98d673042 100644 --- a/pandas/tests/groupby/test_missing.py +++ b/pandas/tests/groupby/test_missing.py @@ -17,7 +17,7 @@ def test_groupby_column_index_name_lost_fill_funcs(func): [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]], columns=Index(["type", "a", "b"], name="idx"), ) - df_grouped = df.groupby(["type"])[["a", "b"]] + df_grouped = df.groupby("type")[["a", "b"]] result = getattr(df_grouped, func)().columns expected = Index(["a", "b"], name="idx") tm.assert_index_equal(result, expected) @@ -28,7 +28,7 @@ def test_groupby_fill_duplicate_column_names(func): # GH: 25610 ValueError with duplicate column names df1 = DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]}) df2 = DataFrame({"field1": [1, np.nan, 4]}) - df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"]) + df_grouped = pd.concat([df1, df2], axis=1).groupby(by="field2") expected = DataFrame( [[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"] ) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index 6656fd565f79d..f1eb811a788b4 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -164,7 +164,7 @@ def test_nunique_with_timegrouper(): def test_nunique_with_NaT(key, data, dropna, expected): # GH 27951 df = DataFrame({"key": key, "data": data}) - result = df.groupby(["key"])["data"].nunique(dropna=dropna) + result = df.groupby("key")["data"].nunique(dropna=dropna) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index 8bbe38d3379ac..833ee9c095860 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -580,7 +580,7 @@ def test_rank_pct_equal_values_on_group_transition(use_nan): ], columns=["group", "val"], ) - result = df.groupby(["group"])["val"].rank( + result = df.groupby("group")["val"].rank( method="dense", pct=True, ) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 577a72d3f5090..a9f3109199a1f 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -142,7 +142,7 @@ def test_series_groupby_value_counts_with_grouper(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +@pytest.mark.parametrize("columns", [["A", "B", "C"]]) def test_series_groupby_value_counts_empty(columns): # GH39172 df = DataFrame(columns=columns) @@ -155,7 +155,7 @@ def test_series_groupby_value_counts_empty(columns): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +@pytest.mark.parametrize("columns", [["A", "B", "C"]]) def test_series_groupby_value_counts_one_row(columns): # GH42618 df = DataFrame(data=[range(len(columns))], columns=columns) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 5c64ba3d9e266..0ea8bec832155 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1194,7 +1194,7 @@ def test_transform_lambda_with_datetimetz(): "timezone": ["Etc/GMT+4", "US/Eastern"], } ) - result = df.groupby(["timezone"])["time"].transform( + result = df.groupby("timezone")["time"].transform( lambda x: x.dt.tz_localize(x.name) ) expected = Series( @@ -1328,7 +1328,7 @@ def test_transform_cumcount(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("keys", [["A1"], ["A1", "A2"]]) +@pytest.mark.parametrize("keys", ["A1", ["A1", "A2"]]) def test_null_group_lambda_self(request, sort, dropna, keys): # GH 17093 if not sort and not dropna: @@ -1543,7 +1543,7 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func) (lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]), ], ) -@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) +@pytest.mark.parametrize("keys", ["a1", ["a1", "a2"]]) @pytest.mark.parametrize("keys_in_index", [True, False]) def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index): # GH#45648 - transform should align with the input's index diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 8aff217cca5c1..e521b300bfb39 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -453,7 +453,7 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate): if consolidate: df = df._consolidate() - result = df.groupby(["key"]).resample("W", on="date").min() + result = df.groupby("key").resample("W", on="date").min() idx = pd.MultiIndex.from_arrays( [ ["A"] * 3 + ["B"] * 3, diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 905c2af2d22a5..9fb69a58e35e8 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -407,7 +407,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): def test_join_hierarchical_mixed(self): # GH 2024 df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"]) - new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]}) + new_df = df.groupby("a").agg({"b": [np.mean, np.sum]}) other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"]) other_df.set_index("a", inplace=True) # GH 9455, 12219 diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 76448d5942a5a..6d3bc04239381 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -139,7 +139,7 @@ def test_crosstab_margins(self): assert result.columns.names == ["b", "c"] all_cols = result["All", ""] - exp_cols = df.groupby(["a"]).size().astype("i8") + exp_cols = df.groupby("a").size().astype("i8") # to keep index.name exp_margin = Series([len(df)], index=Index(["All"], name="a")) exp_cols = pd.concat([exp_cols, exp_margin]) @@ -177,7 +177,7 @@ def test_crosstab_margins_set_margin_name(self): assert result.columns.names == ["b", "c"] all_cols = result["TOTAL", ""] - exp_cols = df.groupby(["a"]).size().astype("i8") + exp_cols = df.groupby("a").size().astype("i8") # to keep index.name exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a")) exp_cols = pd.concat([exp_cols, exp_margin]) From 1090ef88e68876bb015e0fa8899648f6a15b428c Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 22:49:23 -0700 Subject: [PATCH 30/91] syntax --- pandas/tests/groupby/test_apply_mutate.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index 057d7d4ef7468..15dc95da7930d 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -14,9 +14,7 @@ def test_group_by_copy(): ).set_index("name") grp_by_same_value = df.groupby("age", group_keys=False).apply(lambda group: group) - grp_by_copy = df.groupby("age", group_keys=False).apply( - lambda group: group.copy() - ) + grp_by_copy = df.groupby("age", group_keys=False).apply(lambda group: group.copy()) tm.assert_frame_equal(grp_by_same_value, grp_by_copy) From c54e4ead9b9260ad36b63e75a818d67d108e377c Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 07:57:58 -0700 Subject: [PATCH 31/91] edit past tests --- .../getting_started/comparison/comparison_with_sql.rst | 8 ++++---- doc/source/user_guide/cookbook.rst | 2 +- doc/source/user_guide/groupby.rst | 4 ++-- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/groupby/test_groupby_dropna.py | 2 +- pandas/tests/groupby/transform/test_transform.py | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index 0a891a4c6d2d7..245a8ad5a25d2 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -433,8 +433,8 @@ Top n rows per group ( tips.assign( - rn=tips.sort_values(["total_bill"], ascending=False) - .groupby(["day"]) + rn=tips.sort_values("total_bill", ascending=False) + .groupby("day") .cumcount() + 1 ) @@ -448,7 +448,7 @@ the same using ``rank(method='first')`` function ( tips.assign( - rnk=tips.groupby(["day"])["total_bill"].rank( + rnk=tips.groupby("day")["total_bill"].rank( method="first", ascending=False ) ) @@ -478,7 +478,7 @@ Notice that when using ``rank(method='min')`` function ( tips[tips["tip"] < 2] - .assign(rnk_min=tips.groupby(["sex"])["tip"].rank(method="min")) + .assign(rnk_min=tips.groupby("sex")["tip"].rank(method="min")) .query("rnk_min < 3") .sort_values(["sex", "rnk_min"]) ) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index d53928d72d8e6..3e93ab9e03eeb 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -466,7 +466,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to .. ipython:: python - gb = df.groupby(["animal"]) + gb = df.groupby("animal") gb.get_group("cat") `Apply to different items in a group diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index c4e6ec3d9949d..8e842fa8c982c 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -196,9 +196,9 @@ For example, the groups created by ``groupby()`` below are in the order they app .. ipython:: python df3 = pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}) - df3.groupby(["X"]).get_group("A") + df3.groupby("X").get_group("A") - df3.groupby(["X"]).get_group("B") + df3.groupby("X").get_group("B") .. _groupby.dropna: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 3919b5e8c64f8..2f9d529aedb3a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2807,4 +2807,4 @@ def test_single_element_list_grouping(): "a single grouper to avoid this warning." ) with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby("a") + df.groupby(["a"]) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 74f050a5737d7..0ce7cfe5e0f70 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -348,7 +348,7 @@ def test_groupby_dropna_with_multiindex_input(input_index, keys, series): expected = obj.set_index(keys) if series: expected = expected["c"] - elif input_index == ["a", "b"] and keys == ["a"]: + elif input_index == ["a", "b"] and keys == "a": # Column b should not be aggregated expected = expected[["c"]] diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 0ea8bec832155..3aa2ee75b09a6 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1339,7 +1339,7 @@ def test_null_group_lambda_self(request, sort, dropna, keys): nulls1 = np.random.choice([False, True], size) nulls2 = np.random.choice([False, True], size) # Whether a group contains a null value or not - nulls_grouper = nulls1 if len(keys) == 1 else nulls1 | nulls2 + nulls_grouper = nulls1 if not isinstance(keys, list) else nulls1 | nulls2 a1 = np.random.randint(0, 5, size=size).astype(float) a1[nulls1] = np.nan From 6b254627de5830b3289577b616f50dd1a881e197 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 09:06:28 -0700 Subject: [PATCH 32/91] editting pivot --- pandas/core/reshape/pivot.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 03aad0ef64dec..7c8120afb6aeb 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -161,6 +161,9 @@ def __internal_pivot_table( pass values = list(values) + if isinstance(keys, list): + if len(keys) == 1: + keys = keys[0] grouped = data.groupby(keys, observed=observed, sort=sort) agged = grouped.agg(aggfunc) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): From b73e343cc49161e55b64d11ff7c3ef57ef4b33da Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 09:45:19 -0700 Subject: [PATCH 33/91] ex --- .../getting_started/intro_tutorials/07_reshape_table_layout.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index 27d6f95923ed0..cbb9042cc9724 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -152,7 +152,7 @@ called ``no2_subset``. .. ipython:: python # use 2 measurements (head) for each location (groupby) - no2_subset = no2.sort_index().groupby(["location"]).head(2) + no2_subset = no2.sort_index().groupby("location").head(2) no2_subset .. image:: ../../_static/schemas/07_pivot.svg From 3e1aeba4b648ca4f44b9e43cbb6692786420e09c Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 10:28:22 -0700 Subject: [PATCH 34/91] editing internal use --- pandas/tests/groupby/test_counting.py | 2 +- pandas/tests/resample/test_resampler_grouper.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index f9527f13dd7c5..5db36d1569edf 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -192,7 +192,7 @@ def test_ngroup_respects_groupby_order(self): np.random.seed(0) df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) for sort_flag in (False, True): - g = df.groupby(["a"], sort=sort_flag) + g = df.groupby("a", sort=sort_flag) df["group_id"] = -1 df["group_index"] = -1 diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index e521b300bfb39..84f1d7de9609c 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -430,7 +430,7 @@ def test_resample_groupby_agg_listlike(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("keys", [["a"], ["a", "b"]]) +@pytest.mark.parametrize("keys", ["a", ["a", "b"]]) def test_empty(keys): # GH 26411 df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([])) From 990d3b0d87ec5e2a787844c327d6c4be09deee9c Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 10:49:35 -0700 Subject: [PATCH 35/91] pivot --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 7c8120afb6aeb..52ce4b250a204 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -162,7 +162,7 @@ def __internal_pivot_table( values = list(values) if isinstance(keys, list): - if len(keys) == 1: + if len(keys) == 1 and isinstance(keys[0], str): keys = keys[0] grouped = data.groupby(keys, observed=observed, sort=sort) agged = grouped.agg(aggfunc) From 5dd83155d96862b84f4b17e7a77721ec20cdf868 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 07:58:57 -0700 Subject: [PATCH 36/91] warning expected --- pandas/tests/plotting/frame/test_hist_box_by.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index fe39c3d441396..5f9c36c7872c7 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -165,9 +165,10 @@ def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 # _check_plot_works adds an ax so catch warning. see GH #13188 with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works( - hist_df.plot.hist, column=column, by=by, layout=layout - ) + with tm.assert_produces_warning(FutureWarning): + axes = _check_plot_works( + hist_df.plot.hist, column=column, by=by, layout=layout + ) self._check_axes_shape(axes, axes_num=axes_num, layout=layout) @pytest.mark.parametrize( From cbfd6567881744982ca5df7ba9c4273b6d4298d5 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 09:28:08 -0700 Subject: [PATCH 37/91] warning --- pandas/tests/plotting/frame/test_hist_box_by.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index 5f9c36c7872c7..350236348fb38 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -1,3 +1,4 @@ +from nis import match import re import numpy as np @@ -164,8 +165,14 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df): def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 # _check_plot_works adds an ax so catch warning. see GH #13188 + msg = ( + "In a future version of pandas, a length 1 " + "tuple will be returned when grouping by a " + "list of length 1. Don't supply a list with " + "a single grouper to avoid this warning." + ) with tm.assert_produces_warning(UserWarning): - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, match=msg): axes = _check_plot_works( hist_df.plot.hist, column=column, by=by, layout=layout ) From a68c413c93d7738d4005e372655862835f4f0e01 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 10:22:11 -0700 Subject: [PATCH 38/91] ignore doc warning --- .../intro_tutorials/07_reshape_table_layout.rst | 3 ++- doc/source/user_guide/cookbook.rst | 2 +- doc/source/user_guide/merging.rst | 4 ++-- doc/source/user_guide/reshaping.rst | 2 ++ doc/source/user_guide/scale.rst | 2 +- pandas/tests/plotting/frame/test_hist_box_by.py | 8 ++++---- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index cbb9042cc9724..7bf29422c031d 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -239,7 +239,7 @@ interested in the row/column margins (subtotals) for each variable, set the ``margins`` parameter to ``True``: .. ipython:: python - + :okwarning: air_quality.pivot_table( values="value", index="location", @@ -260,6 +260,7 @@ For more information about :meth:`~DataFrame.pivot_table`, see the user guide se .. note:: + :okwarning: In case you are wondering, :meth:`~DataFrame.pivot_table` is indeed directly linked to :meth:`~DataFrame.groupby`. The same result can be derived by grouping on both ``parameter`` and ``location``: diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 3e93ab9e03eeb..364ca9fff0d74 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -694,7 +694,7 @@ The :ref:`Pivot ` docs. `__ .. ipython:: python - + :okwarning: grades = [48, 99, 75, 80, 42, 80, 72, 68, 36, 78] df = pd.DataFrame( { diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index bbca5773afdfe..6d82fe7a0a595 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -1355,7 +1355,7 @@ ordered data. In particular it has an optional ``fill_method`` keyword to fill/interpolate missing data: .. ipython:: python - + :okwarning: left = pd.DataFrame( {"k": ["K0", "K1", "K1", "K2"], "lv": [1, 2, 3, 4], "s": ["a", "b", "c", "d"]} ) @@ -1381,7 +1381,7 @@ For example; we might have ``trades`` and ``quotes`` and we want to ``asof`` merge them. .. ipython:: python - + :okwarning: trades = pd.DataFrame( { "time": pd.to_datetime( diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index adca9de6c130a..11e8e689c9c7d 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -461,6 +461,7 @@ Additionally, you can call :meth:`DataFrame.stack` to display a pivoted DataFram as having a multi-level index: .. ipython:: python + :okwarning: table.stack() @@ -578,6 +579,7 @@ array and is often used to transform continuous variables to discrete or categorical variables: .. ipython:: python + :okwarning: ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 129f43dd36930..4c74c66042076 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -335,7 +335,7 @@ Dask implements the most used parts of the pandas API. For example, we can do a familiar groupby aggregation. .. ipython:: python - + :okwarning: %time ddf.groupby("name")[["x", "y"]].mean().compute().head() The grouping and aggregation is done out-of-core and in parallel. diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index 350236348fb38..1a25f8c6530e0 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -166,10 +166,10 @@ def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 # _check_plot_works adds an ax so catch warning. see GH #13188 msg = ( - "In a future version of pandas, a length 1 " - "tuple will be returned when grouping by a " - "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." + "In a future version of pandas, a length 1 " + "tuple will be returned when grouping by a " + "list of length 1. Don't supply a list with " + "a single grouper to avoid this warning." ) with tm.assert_produces_warning(UserWarning): with tm.assert_produces_warning(FutureWarning, match=msg): From cc731be094dccd81f2a57db61753cc6b13dbc7c9 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 11:34:54 -0700 Subject: [PATCH 39/91] doc --- .../intro_tutorials/07_reshape_table_layout.rst | 3 +-- doc/source/user_guide/cookbook.rst | 2 +- doc/source/user_guide/merging.rst | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index 7bf29422c031d..cbb9042cc9724 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -239,7 +239,7 @@ interested in the row/column margins (subtotals) for each variable, set the ``margins`` parameter to ``True``: .. ipython:: python - :okwarning: + air_quality.pivot_table( values="value", index="location", @@ -260,7 +260,6 @@ For more information about :meth:`~DataFrame.pivot_table`, see the user guide se .. note:: - :okwarning: In case you are wondering, :meth:`~DataFrame.pivot_table` is indeed directly linked to :meth:`~DataFrame.groupby`. The same result can be derived by grouping on both ``parameter`` and ``location``: diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 364ca9fff0d74..3e93ab9e03eeb 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -694,7 +694,7 @@ The :ref:`Pivot ` docs. `__ .. ipython:: python - :okwarning: + grades = [48, 99, 75, 80, 42, 80, 72, 68, 36, 78] df = pd.DataFrame( { diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 6d82fe7a0a595..bbca5773afdfe 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -1355,7 +1355,7 @@ ordered data. In particular it has an optional ``fill_method`` keyword to fill/interpolate missing data: .. ipython:: python - :okwarning: + left = pd.DataFrame( {"k": ["K0", "K1", "K1", "K2"], "lv": [1, 2, 3, 4], "s": ["a", "b", "c", "d"]} ) @@ -1381,7 +1381,7 @@ For example; we might have ``trades`` and ``quotes`` and we want to ``asof`` merge them. .. ipython:: python - :okwarning: + trades = pd.DataFrame( { "time": pd.to_datetime( From 4e4f3971e2235bf814493ca96d2f8067e0481cb8 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 12:41:34 -0700 Subject: [PATCH 40/91] tests --- pandas/tests/plotting/frame/test_hist_box_by.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index 1a25f8c6530e0..43e2f4d06b597 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -1,4 +1,3 @@ -from nis import match import re import numpy as np @@ -172,7 +171,11 @@ def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): "a single grouper to avoid this warning." ) with tm.assert_produces_warning(UserWarning): - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning( + FutureWarning, + match=msg, + check_stacklevel=False + ): axes = _check_plot_works( hist_df.plot.hist, column=column, by=by, layout=layout ) From 7aa9948f397c2e7f9f10ef752183b1de7da6a35f Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 13:37:50 -0700 Subject: [PATCH 41/91] ignore warning --- .../tests/plotting/frame/test_hist_box_by.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index 43e2f4d06b597..dcd36cb91018a 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -1,5 +1,6 @@ import re +import warnings import numpy as np import pytest @@ -164,21 +165,11 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df): def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 # _check_plot_works adds an ax so catch warning. see GH #13188 - msg = ( - "In a future version of pandas, a length 1 " - "tuple will be returned when grouping by a " - "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." - ) with tm.assert_produces_warning(UserWarning): - with tm.assert_produces_warning( - FutureWarning, - match=msg, - check_stacklevel=False - ): - axes = _check_plot_works( - hist_df.plot.hist, column=column, by=by, layout=layout - ) + warnings.filterwarnings("ignore", category=FutureWarning) + axes = _check_plot_works( + hist_df.plot.hist, column=column, by=by, layout=layout + ) self._check_axes_shape(axes, axes_num=axes_num, layout=layout) @pytest.mark.parametrize( From ead96d5ec528be9319841fa53dc8436a63d64e63 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 15:12:46 -0700 Subject: [PATCH 42/91] test --- doc/source/user_guide/reshaping.rst | 2 -- doc/source/user_guide/scale.rst | 1 - pandas/tests/plotting/frame/test_hist_box_by.py | 16 ++++++++++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index 11e8e689c9c7d..adca9de6c130a 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -461,7 +461,6 @@ Additionally, you can call :meth:`DataFrame.stack` to display a pivoted DataFram as having a multi-level index: .. ipython:: python - :okwarning: table.stack() @@ -579,7 +578,6 @@ array and is often used to transform continuous variables to discrete or categorical variables: .. ipython:: python - :okwarning: ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 4c74c66042076..a40d3c03cfce5 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -335,7 +335,6 @@ Dask implements the most used parts of the pandas API. For example, we can do a familiar groupby aggregation. .. ipython:: python - :okwarning: %time ddf.groupby("name")[["x", "y"]].mean().compute().head() The grouping and aggregation is done out-of-core and in parallel. diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index dcd36cb91018a..d6cb75a6e9ff4 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -165,11 +165,19 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df): def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 # _check_plot_works adds an ax so catch warning. see GH #13188 + msg = ( + "In a future version of pandas, a length 1 " + "tuple will be returned when grouping by a " + "list of length 1. Don't supply a list with " + "a single grouper to avoid this warning." + ) with tm.assert_produces_warning(UserWarning): - warnings.filterwarnings("ignore", category=FutureWarning) - axes = _check_plot_works( - hist_df.plot.hist, column=column, by=by, layout=layout - ) + with warnings.catch_warnings(): + # We've already warned above + warnings.filterwarnings("ignore", message=msg) + axes = _check_plot_works( + hist_df.plot.hist, column=column, by=by, layout=layout + ) self._check_axes_shape(axes, axes_num=axes_num, layout=layout) @pytest.mark.parametrize( From 8d060223beaceddae87a2f47d5592ba83c42b7f7 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 16:45:56 -0700 Subject: [PATCH 43/91] plotting --- pandas/plotting/_matplotlib/core.py | 11 ++++++++++- pandas/plotting/_matplotlib/groupby.py | 10 +++++++++- pandas/plotting/_matplotlib/hist.py | 12 +++++++++++- .../tests/plotting/frame/test_hist_box_by.py | 19 +++++++++---------- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 3641cd7213fec..f183973e7f25e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -175,7 +175,8 @@ def __init__( # For `hist` plot, need to get grouped original data before `self.data` is # updated later if self.by is not None and self._kind == "hist": - self._grouped = data.groupby(self.by) + bymodi = fix_groupby_singlelist_input(by) + self._grouped = data.groupby(bymodi) self.kind = kind @@ -679,6 +680,7 @@ def _adorn_subplots(self): ) for ax in self.axes: + ax = getattr(ax, "right_ax", ax) if self.yticks is not None: ax.set_yticks(self.yticks) @@ -1828,3 +1830,10 @@ def blank_labeler(label, value): leglabels = labels if labels is not None else idx for p, l in zip(patches, leglabels): self._append_legend_handles_labels(p, l) + + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py index 4f1cd3f38343a..0c87db697b342 100644 --- a/pandas/plotting/_matplotlib/groupby.py +++ b/pandas/plotting/_matplotlib/groupby.py @@ -108,7 +108,8 @@ def reconstruct_data_with_by( 1 3.0 4.0 NaN NaN 2 NaN NaN 5.0 6.0 """ - grouped = data.groupby(by) + bymodi = fix_groupby_singlelist_input(by) + grouped = data.groupby(bymodi) data_list = [] for key, group in grouped: @@ -134,3 +135,10 @@ def reformat_hist_y_given_by( if by is not None and len(y.shape) > 1: return np.array([remove_na_arraylike(col) for col in y.T]).T return remove_na_arraylike(y) + + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 77496cf049f3d..e76441067bf65 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -61,7 +61,8 @@ def _args_adjust(self): # where subplots are created based on by argument if is_integer(self.bins): if self.by is not None: - grouped = self.data.groupby(self.by)[self.columns] + bymodi = fix_groupby_singlelist_input(self.by) + grouped = self.data.groupby(bymodi)[self.columns] self.bins = [self._calculate_bins(group) for key, group in grouped] else: self.bins = self._calculate_bins(self.data) @@ -265,6 +266,8 @@ def _grouped_plot( grouped = data.groupby(by) if column is not None: grouped = grouped[column] + if isinstance(by, list) and len(by) == 1: + by = [by] naxes = len(grouped) fig, axes = create_subplots( @@ -522,3 +525,10 @@ def hist_frame( maybe_adjust_figure(fig, wspace=0.3, hspace=0.3) return axes + + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index d6cb75a6e9ff4..dc1911da04e87 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -166,18 +166,17 @@ def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 # _check_plot_works adds an ax so catch warning. see GH #13188 msg = ( - "In a future version of pandas, a length 1 " - "tuple will be returned when grouping by a " - "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." + "In a future version of pandas, a length 1 " + "tuple will be returned when grouping by a " + "list of length 1. Don't supply a list with " + "a single grouper to avoid this warning." ) with tm.assert_produces_warning(UserWarning): - with warnings.catch_warnings(): - # We've already warned above - warnings.filterwarnings("ignore", message=msg) - axes = _check_plot_works( - hist_df.plot.hist, column=column, by=by, layout=layout - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=msg) + axes = _check_plot_works( + hist_df.plot.hist, column=column, by=by, layout=layout + ) self._check_axes_shape(axes, axes_num=axes_num, layout=layout) @pytest.mark.parametrize( From 8a2e15bd27d84359b6d6bb68f26b6a7c78ddcfe7 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 16:46:19 -0700 Subject: [PATCH 44/91] test --- pandas/tests/plotting/frame/test_hist_box_by.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index dc1911da04e87..7e8a6242dca82 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -1,6 +1,5 @@ import re -import warnings import numpy as np import pytest @@ -165,18 +164,10 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df): def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 # _check_plot_works adds an ax so catch warning. see GH #13188 - msg = ( - "In a future version of pandas, a length 1 " - "tuple will be returned when grouping by a " - "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." - ) with tm.assert_produces_warning(UserWarning): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message=msg) - axes = _check_plot_works( - hist_df.plot.hist, column=column, by=by, layout=layout - ) + axes = _check_plot_works( + hist_df.plot.hist, column=column, by=by, layout=layout + ) self._check_axes_shape(axes, axes_num=axes_num, layout=layout) @pytest.mark.parametrize( @@ -278,7 +269,7 @@ class TestBoxWithBy(TestPlotBase): ] * 2, ), - (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2), + ("C", None, ["A", "B"], [["a", "b", "c"]] * 2), ], ) def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df): From dbbdbf178192e8248a5b4a768619e079a6f85e86 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 17:47:47 -0700 Subject: [PATCH 45/91] doc --- .../intro_tutorials/07_reshape_table_layout.rst | 2 +- doc/source/user_guide/merging.rst | 1 + doc/source/user_guide/reshaping.rst | 4 +++- doc/source/user_guide/scale.rst | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index cbb9042cc9724..5eb28b24c8e5f 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -239,7 +239,7 @@ interested in the row/column margins (subtotals) for each variable, set the ``margins`` parameter to ``True``: .. ipython:: python - + :okwarning: air_quality.pivot_table( values="value", index="location", diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index bbca5773afdfe..92d2e70e8f37c 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -1355,6 +1355,7 @@ ordered data. In particular it has an optional ``fill_method`` keyword to fill/interpolate missing data: .. ipython:: python + :okwarning: left = pd.DataFrame( {"k": ["K0", "K1", "K1", "K2"], "lv": [1, 2, 3, 4], "s": ["a", "b", "c", "d"]} diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index adca9de6c130a..c9abc370810eb 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -447,7 +447,8 @@ rows will be added with partial group aggregates across the categories on the rows and columns: .. ipython:: python - + :okwarning: + table = df.pivot_table( index=["A", "B"], columns="C", @@ -562,6 +563,7 @@ Adding margins Finally, one can also add margins or normalize this output. .. ipython:: python + :okwarning: pd.crosstab( df["A"], df["B"], values=df["C"], aggfunc=np.sum, normalize=True, margins=True diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index a40d3c03cfce5..4c74c66042076 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -335,6 +335,7 @@ Dask implements the most used parts of the pandas API. For example, we can do a familiar groupby aggregation. .. ipython:: python + :okwarning: %time ddf.groupby("name")[["x", "y"]].mean().compute().head() The grouping and aggregation is done out-of-core and in parallel. From 4afe5fa62b5027a33e15a7ec294d4e1f792047f5 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 18:26:36 -0700 Subject: [PATCH 46/91] doc --- .../getting_started/intro_tutorials/07_reshape_table_layout.rst | 1 + doc/source/user_guide/reshaping.rst | 2 +- doc/source/user_guide/scale.rst | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index 5eb28b24c8e5f..01a1b1246af34 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -240,6 +240,7 @@ the ``margins`` parameter to ``True``: .. ipython:: python :okwarning: + air_quality.pivot_table( values="value", index="location", diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index c9abc370810eb..07e1596ae2b42 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -448,7 +448,7 @@ rows and columns: .. ipython:: python :okwarning: - + table = df.pivot_table( index=["A", "B"], columns="C", diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 4c74c66042076..ab54dedb1f632 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -336,6 +336,7 @@ a familiar groupby aggregation. .. ipython:: python :okwarning: + %time ddf.groupby("name")[["x", "y"]].mean().compute().head() The grouping and aggregation is done out-of-core and in parallel. From 123f42e4bfd8f561e044b4df0fd849b32c677401 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 18:36:11 -0700 Subject: [PATCH 47/91] white space --- .../getting_started/intro_tutorials/07_reshape_table_layout.rst | 2 +- doc/source/user_guide/scale.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index 01a1b1246af34..76bbe32717a01 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -240,7 +240,7 @@ the ``margins`` parameter to ``True``: .. ipython:: python :okwarning: - + air_quality.pivot_table( values="value", index="location", diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index ab54dedb1f632..ffb9ee6bed8b4 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -336,7 +336,7 @@ a familiar groupby aggregation. .. ipython:: python :okwarning: - + %time ddf.groupby("name")[["x", "y"]].mean().compute().head() The grouping and aggregation is done out-of-core and in parallel. From dfa9505cbb840b0f34a102f4cfe452daa2897919 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 19:17:38 -0700 Subject: [PATCH 48/91] doc --- doc/source/user_guide/cookbook.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 3e93ab9e03eeb..b76cfb3f0ad68 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -664,6 +664,7 @@ The :ref:`Pivot ` docs. `__ .. ipython:: python + :okwarning: df = pd.DataFrame( data={ @@ -694,6 +695,7 @@ The :ref:`Pivot ` docs. `__ .. ipython:: python + :okwarning: grades = [48, 99, 75, 80, 42, 80, 72, 68, 36, 78] df = pd.DataFrame( From 7ccba90f5be65ce782713a0d8bc91eabd5c36ffe Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 20:03:13 -0700 Subject: [PATCH 49/91] doc --- doc/source/user_guide/cookbook.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index b76cfb3f0ad68..48aee4c164ccc 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -770,6 +770,7 @@ The :ref:`Pivot ` docs. To create year and month cross tabulation: .. ipython:: python + :okwarning: df = pd.DataFrame( {"value": np.random.randn(36)}, From f612f3a3f0f8f255e1c83857ad8577d51fbb3455 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 07:48:42 -0700 Subject: [PATCH 50/91] doc --- doc/source/user_guide/cookbook.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 48aee4c164ccc..97b9dc3ce37eb 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -681,6 +681,10 @@ The :ref:`Pivot ` docs. "Sales": [13, 6, 16, 8, 4, 3, 1], } ) + +.. ipython:: python + :okwarning: + table = pd.pivot_table( df, values=["Sales"], @@ -755,6 +759,9 @@ The :ref:`Pivot ` docs. } ) +.. ipython:: python + :okwarning: + df.groupby("ExamYear").agg( { "Participated": lambda x: x.value_counts()["yes"], @@ -777,6 +784,9 @@ To create year and month cross tabulation: index=pd.date_range("2011-01-01", freq="M", periods=36), ) +.. ipython:: python + :okwarning: + pd.pivot_table( df, index=df.index.month, columns=df.index.year, values="value", aggfunc="sum" ) From 758a88e47ec9bff2e1015e105eb256e6898d0ecd Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 09:25:27 -0700 Subject: [PATCH 51/91] doc --- doc/source/user_guide/cookbook.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 97b9dc3ce37eb..6c5d57598e245 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -702,6 +702,10 @@ The :ref:`Pivot ` docs. :okwarning: grades = [48, 99, 75, 80, 42, 80, 72, 68, 36, 78] + +.. ipython:: python + :okwarning: + df = pd.DataFrame( { "ID": ["x%d" % r for r in range(10)], From 2fa069458ae1f9f91e7317805f705c523a127968 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 10:41:26 -0700 Subject: [PATCH 52/91] stacklevel --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66776c03752e6..5cb6de3c8c6f7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8369,7 +8369,6 @@ def groupby( "a single grouper to avoid this warning." ), FutureWarning, - stacklevel=find_stack_level(), ) if squeeze is not no_default: From e8b23172eb6d309dc903a304cfd1ca50d39bb313 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 11:34:05 -0700 Subject: [PATCH 53/91] pivot --- doc/source/user_guide/cookbook.rst | 20 -------------------- pandas/core/frame.py | 1 + 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 6c5d57598e245..582adff0b47b7 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -682,19 +682,6 @@ The :ref:`Pivot ` docs. } ) -.. ipython:: python - :okwarning: - - table = pd.pivot_table( - df, - values=["Sales"], - index=["Province"], - columns=["City"], - aggfunc=np.sum, - margins=True, - ) - table.stack("City") - `Frequency table like plyr in R `__ @@ -788,13 +775,6 @@ To create year and month cross tabulation: index=pd.date_range("2011-01-01", freq="M", periods=36), ) -.. ipython:: python - :okwarning: - - pd.pivot_table( - df, index=df.index.month, columns=df.index.year, values="value", aggfunc="sum" - ) - Apply ***** diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5cb6de3c8c6f7..66776c03752e6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8369,6 +8369,7 @@ def groupby( "a single grouper to avoid this warning." ), FutureWarning, + stacklevel=find_stack_level(), ) if squeeze is not no_default: From fd68cf87d6f28addb6236ae935df363938df1239 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 11:34:24 -0700 Subject: [PATCH 54/91] pivot --- doc/source/user_guide/cookbook.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 582adff0b47b7..9456c09f7bc5b 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -775,6 +775,13 @@ To create year and month cross tabulation: index=pd.date_range("2011-01-01", freq="M", periods=36), ) +.. ipython:: python + :okwarning: + + pd.pivot_table( + df, index=df.index.month, columns=df.index.year, values="value", aggfunc="sum" + ) + Apply ***** From 1dd52c89fdab743952f8ca90a5c1e676fb58f7ce Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 12:13:20 -0700 Subject: [PATCH 55/91] cookbook --- doc/source/user_guide/cookbook.rst | 22 +++++++++------------- pandas/core/reshape/pivot.py | 13 +++++++++++-- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 9456c09f7bc5b..3e93ab9e03eeb 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -664,7 +664,6 @@ The :ref:`Pivot ` docs. `__ .. ipython:: python - :okwarning: df = pd.DataFrame( data={ @@ -681,18 +680,22 @@ The :ref:`Pivot ` docs. "Sales": [13, 6, 16, 8, 4, 3, 1], } ) + table = pd.pivot_table( + df, + values=["Sales"], + index=["Province"], + columns=["City"], + aggfunc=np.sum, + margins=True, + ) + table.stack("City") `Frequency table like plyr in R `__ .. ipython:: python - :okwarning: grades = [48, 99, 75, 80, 42, 80, 72, 68, 36, 78] - -.. ipython:: python - :okwarning: - df = pd.DataFrame( { "ID": ["x%d" % r for r in range(10)], @@ -750,9 +753,6 @@ The :ref:`Pivot ` docs. } ) -.. ipython:: python - :okwarning: - df.groupby("ExamYear").agg( { "Participated": lambda x: x.value_counts()["yes"], @@ -768,16 +768,12 @@ The :ref:`Pivot ` docs. To create year and month cross tabulation: .. ipython:: python - :okwarning: df = pd.DataFrame( {"value": np.random.randn(36)}, index=pd.date_range("2011-01-01", freq="M", periods=36), ) -.. ipython:: python - :okwarning: - pd.pivot_table( df, index=df.index.month, columns=df.index.year, values="value", aggfunc="sum" ) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 52ce4b250a204..fa21377633750 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -367,7 +367,8 @@ def _all_key(key): return (key, margins_name) + ("",) * (len(cols) - 1) if len(rows) > 0: - margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc) + modifiedrow = fix_groupby_singlelist_input(rows) + margin = data[rows + values].groupby(modifiedrow, observed=observed).agg(aggfunc) cat_axis = 1 for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): @@ -407,7 +408,8 @@ def _all_key(key): margin_keys = table.columns if len(cols) > 0: - row_margin = data[cols + values].groupby(cols, observed=observed).agg(aggfunc) + modifiedcol = fix_groupby_singlelist_input(cols) + row_margin = data[cols + values].groupby(modifiedcol, observed=observed).agg(aggfunc) row_margin = row_margin.stack() # slight hack @@ -847,3 +849,10 @@ def get_duplicates(names): ] return rownames_mapper, unique_rownames, colnames_mapper, unique_colnames + + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys From 917a6625070aff2e975ed0836366273616d81984 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 12:29:20 -0700 Subject: [PATCH 56/91] flake8 --- pandas/core/reshape/pivot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index fa21377633750..0e488fc3efb79 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -409,7 +409,9 @@ def _all_key(key): if len(cols) > 0: modifiedcol = fix_groupby_singlelist_input(cols) - row_margin = data[cols + values].groupby(modifiedcol, observed=observed).agg(aggfunc) + row_margin = ( + data[cols + values].groupby(modifiedcol, observed=observed).agg(aggfunc) + ) row_margin = row_margin.stack() # slight hack From f02eb6fdd6dc0a4abb3528e0dbdecf11aa77b6e8 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 12:37:54 -0700 Subject: [PATCH 57/91] flake8 --- pandas/core/reshape/pivot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0e488fc3efb79..df277589117ea 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -368,7 +368,9 @@ def _all_key(key): if len(rows) > 0: modifiedrow = fix_groupby_singlelist_input(rows) - margin = data[rows + values].groupby(modifiedrow, observed=observed).agg(aggfunc) + margin = ( + data[rows + values].groupby(modifiedrow, observed=observed).agg(aggfunc) + ) cat_axis = 1 for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): From 811e63a9fb925a65791beeaca9a382d9d85d4ea8 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 13:42:22 -0700 Subject: [PATCH 58/91] what's new --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 090fea57872c5..a7ebe6f15cc4c 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -773,6 +773,7 @@ Other Deprecations - Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) - Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) - Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) +- Deprecated producing a single element when using :meth:`Pandas.groupby` with a single list element. A tuple of length one will be returned instead. .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: From 565d80f072636f5748fa5af4d6714a5e95193964 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 20 Jul 2022 14:15:02 -0700 Subject: [PATCH 59/91] syntax --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a7ebe6f15cc4c..b0c76d16ff41d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -773,7 +773,7 @@ Other Deprecations - Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) - Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) - Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) -- Deprecated producing a single element when using :meth:`Pandas.groupby` with a single list element. A tuple of length one will be returned instead. +- Deprecated producing a single element when using :meth:`Pandas.groupby` with a single list element. A tuple of length one will be returned instead. .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: From 057e6421d9fe2cca276075f07408fbbc3edc8900 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 25 Jul 2022 17:47:01 -0700 Subject: [PATCH 60/91] itr --- pandas/core/frame.py | 13 ------------- pandas/core/groupby/grouper.py | 12 +++++++++++- pandas/core/groupby/ops.py | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66776c03752e6..77f6869bcb2d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8359,19 +8359,6 @@ def groupby( ) -> DataFrameGroupBy: from pandas.core.groupby.generic import DataFrameGroupBy - if isinstance(by, list): - if len(by) == 1 and isinstance(by[0], str): - warnings.warn( - ( - "In a future version of pandas, a length 1 " - "tuple will be returned when grouping by a " - "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." - ), - FutureWarning, - stacklevel=find_stack_level(), - ) - if squeeze is not no_default: warnings.warn( ( diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index b9f4166b475ca..f0a571a178c3e 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -733,6 +733,11 @@ def get_grouper( """ group_axis = obj._get_axis(axis) + tuple_unified = False + if isinstance(key, list): + if len(key) == 1 and isinstance(key[0], str): + tuple_unified = True + # validate that the passed single level is compatible with the passed # axis of the object if level is not None: @@ -918,7 +923,12 @@ def is_in_obj(gpr) -> bool: # create the internals grouper grouper = ops.BaseGrouper( - group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna + group_axis, + groupings, + tuple_unified=tuple_unified, + sort=sort, + mutated=mutated, + dropna=dropna ) return grouper, frozenset(exclusions), obj diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6dc4ccfa8e1ee..b2b025d049457 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -17,6 +17,8 @@ Sequence, final, ) +import warnings +from pandas.util._exceptions import find_stack_level import numpy as np @@ -711,6 +713,7 @@ def __init__( self, axis: Index, groupings: Sequence[grouper.Grouping], + tuple_unified: bool = False, sort: bool = True, group_keys: bool = True, mutated: bool = False, @@ -721,6 +724,7 @@ def __init__( self.axis = axis self._groupings: list[grouper.Grouping] = list(groupings) + self.tuple_unified = tuple_unified self._sort = sort self.group_keys = group_keys self.mutated = mutated @@ -755,6 +759,17 @@ def get_iterator( """ splitter = self._get_splitter(data, axis=axis) keys = self.group_keys_seq + if self.tuple_unified: + warnings.warn( + ( + "In a future version of pandas, a length 1 " + "tuple will be returned when grouping by a " + "list of length 1. Don't supply a list with " + "a single grouper to avoid this warning." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) yield from zip(keys, splitter) @final @@ -1123,11 +1138,13 @@ def __init__( binlabels, mutated: bool = False, indexer=None, + tuple_unified: bool = False, ) -> None: self.bins = ensure_int64(bins) self.binlabels = ensure_index(binlabels) self.mutated = mutated self.indexer = indexer + self.tuple_unified = False # These lengths must match, otherwise we could call agg_series # with empty self.bins, which would raise in libreduction. From dcd14d14ad359e9385e7f27e123a07925228f6ad Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 25 Jul 2022 18:04:27 -0700 Subject: [PATCH 61/91] car names --- pandas/core/groupby/grouper.py | 8 ++++---- pandas/core/groupby/ops.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index f0a571a178c3e..74a6330f6fe86 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -733,10 +733,10 @@ def get_grouper( """ group_axis = obj._get_axis(axis) - tuple_unified = False + raise_warning_single_grouper = False if isinstance(key, list): if len(key) == 1 and isinstance(key[0], str): - tuple_unified = True + raise_warning_single_grouper = True # validate that the passed single level is compatible with the passed # axis of the object @@ -925,10 +925,10 @@ def is_in_obj(gpr) -> bool: grouper = ops.BaseGrouper( group_axis, groupings, - tuple_unified=tuple_unified, + raise_warning_single_grouper=raise_warning_single_grouper, sort=sort, mutated=mutated, - dropna=dropna + dropna=dropna, ) return grouper, frozenset(exclusions), obj diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b2b025d049457..f41ddbdc89319 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -18,7 +18,6 @@ final, ) import warnings -from pandas.util._exceptions import find_stack_level import numpy as np @@ -37,6 +36,7 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( maybe_cast_pointwise_result, @@ -713,7 +713,7 @@ def __init__( self, axis: Index, groupings: Sequence[grouper.Grouping], - tuple_unified: bool = False, + raise_warning_single_grouper: bool = False, sort: bool = True, group_keys: bool = True, mutated: bool = False, @@ -724,7 +724,7 @@ def __init__( self.axis = axis self._groupings: list[grouper.Grouping] = list(groupings) - self.tuple_unified = tuple_unified + self.raise_warning_single_grouper = raise_warning_single_grouper self._sort = sort self.group_keys = group_keys self.mutated = mutated @@ -759,7 +759,7 @@ def get_iterator( """ splitter = self._get_splitter(data, axis=axis) keys = self.group_keys_seq - if self.tuple_unified: + if self.raise_warning_single_grouper: warnings.warn( ( "In a future version of pandas, a length 1 " @@ -1138,13 +1138,13 @@ def __init__( binlabels, mutated: bool = False, indexer=None, - tuple_unified: bool = False, + raise_warning_single_grouper: bool = False, ) -> None: self.bins = ensure_int64(bins) self.binlabels = ensure_index(binlabels) self.mutated = mutated self.indexer = indexer - self.tuple_unified = False + self.raise_warning_single_grouper = False # These lengths must match, otherwise we could call agg_series # with empty self.bins, which would raise in libreduction. From 6f9cb71aa0efba7e8dd654160317fbf88529d82c Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 25 Jul 2022 19:32:33 -0700 Subject: [PATCH 62/91] test edit --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2f9d529aedb3a..8d4f6e9bbb57a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2807,4 +2807,4 @@ def test_single_element_list_grouping(): "a single grouper to avoid this warning." ) with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(["a"]) + values, _ = next(iter(df.groupby(['a']))) From acdab83b5fd668b55790cf5dca1b2285d6bb180f Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 25 Jul 2022 20:28:27 -0700 Subject: [PATCH 63/91] fixing tests --- asv_bench/benchmarks/groupby.py | 2 +- .../comparison/comparison_with_sql.rst | 8 ++++---- .../intro_tutorials/07_reshape_table_layout.rst | 2 +- doc/source/user_guide/cookbook.rst | 4 ++-- pandas/core/frame.py | 4 ++-- pandas/tests/groupby/aggregate/test_aggregate.py | 4 ++-- pandas/tests/groupby/aggregate/test_cython.py | 4 ++-- pandas/tests/groupby/test_allowlist.py | 4 ++-- pandas/tests/groupby/test_apply.py | 4 ++-- pandas/tests/groupby/test_apply_mutate.py | 6 ++++-- pandas/tests/groupby/test_categorical.py | 4 ++-- pandas/tests/groupby/test_counting.py | 8 ++++---- pandas/tests/groupby/test_function.py | 4 ++-- pandas/tests/groupby/test_groupby.py | 16 ++++++++-------- pandas/tests/groupby/test_groupby_dropna.py | 4 ++-- pandas/tests/groupby/test_groupby_shift_diff.py | 4 ++-- pandas/tests/groupby/test_grouping.py | 2 +- pandas/tests/groupby/test_missing.py | 4 ++-- pandas/tests/groupby/test_nunique.py | 2 +- pandas/tests/groupby/test_rank.py | 2 +- pandas/tests/groupby/test_value_counts.py | 4 ++-- pandas/tests/groupby/transform/test_transform.py | 8 ++++---- pandas/tests/plotting/frame/test_hist_box_by.py | 2 +- pandas/tests/resample/test_resampler_grouper.py | 4 ++-- pandas/tests/reshape/merge/test_join.py | 2 +- pandas/tests/reshape/test_crosstab.py | 4 ++-- 26 files changed, 59 insertions(+), 57 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 21da5fd10778a..2de1f25fceace 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -618,7 +618,7 @@ def setup(self): self.df = DataFrame({"a": arr, "b": arr}) def time_sum(self): - self.df.groupby("a")["b"].sum() + self.df.groupby(["a"])["b"].sum() class String: diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst index 245a8ad5a25d2..0a891a4c6d2d7 100644 --- a/doc/source/getting_started/comparison/comparison_with_sql.rst +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -433,8 +433,8 @@ Top n rows per group ( tips.assign( - rn=tips.sort_values("total_bill", ascending=False) - .groupby("day") + rn=tips.sort_values(["total_bill"], ascending=False) + .groupby(["day"]) .cumcount() + 1 ) @@ -448,7 +448,7 @@ the same using ``rank(method='first')`` function ( tips.assign( - rnk=tips.groupby("day")["total_bill"].rank( + rnk=tips.groupby(["day"])["total_bill"].rank( method="first", ascending=False ) ) @@ -478,7 +478,7 @@ Notice that when using ``rank(method='min')`` function ( tips[tips["tip"] < 2] - .assign(rnk_min=tips.groupby("sex")["tip"].rank(method="min")) + .assign(rnk_min=tips.groupby(["sex"])["tip"].rank(method="min")) .query("rnk_min < 3") .sort_values(["sex", "rnk_min"]) ) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index 76bbe32717a01..8d61c9e612d41 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -152,7 +152,7 @@ called ``no2_subset``. .. ipython:: python # use 2 measurements (head) for each location (groupby) - no2_subset = no2.sort_index().groupby("location").head(2) + no2_subset = no2.sort_index().groupby(["location"]).head(2) no2_subset .. image:: ../../_static/schemas/07_pivot.svg diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 3e93ab9e03eeb..daf5a0e481b8e 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -466,7 +466,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to .. ipython:: python - gb = df.groupby("animal") + gb = df.groupby(["animal"]) gb.get_group("cat") `Apply to different items in a group @@ -562,7 +562,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to {"Color": "Red Red Red Blue".split(), "Value": [100, 150, 50, 50]} ) df - df["Counts"] = df.groupby("Color").transform(len) + df["Counts"] = df.groupby(["Color"]).transform(len) df `Shift groups of the values in a column based on the index diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6559dd6df87de..47203fbf315e5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8293,13 +8293,13 @@ def update( >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]] >>> df = pd.DataFrame(l, columns=["a", "b", "c"]) ->>> df.groupby(by="b").sum() +>>> df.groupby(by=["b"]).sum() a c b 1.0 2 3 2.0 2 5 ->>> df.groupby(by="b", dropna=False).sum() +>>> df.groupby(by=["b"], dropna=False).sum() a c b 1.0 2 3 diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 044b8237f890a..54ee32502bbc9 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -504,7 +504,7 @@ def test_bool_agg_dtype(op): @pytest.mark.parametrize( "keys, agg_index", [ - ("a", Index([1], name="a")), + (["a"], Index([1], name="a")), (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])), ], ) @@ -535,7 +535,7 @@ def test_callable_result_dtype_frame( @pytest.mark.parametrize( "keys, agg_index", [ - ("a", Index([1], name="a")), + (["a"], Index([1], name="a")), (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])), ], ) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 98312eded1838..869ed31b6a2d9 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -291,8 +291,8 @@ def test_read_only_buffer_source_agg(agg): ) df._mgr.arrays[0].flags.writeable = False - result = df.groupby("species").agg({"sepal_length": agg}) - expected = df.copy().groupby("species").agg({"sepal_length": agg}) + result = df.groupby(["species"]).agg({"sepal_length": agg}) + expected = df.copy().groupby(["species"]).agg({"sepal_length": agg}) tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 5bd618898e567..46c47cf2b0f4e 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -362,7 +362,7 @@ def test_groupby_selection_with_methods(df, method): rng = date_range("2014", periods=len(df)) df.index = rng - g = df.groupby("A")[["C"]] + g = df.groupby(["A"])[["C"]] g_exp = df[["C"]].groupby(df["A"]) # TODO check groupby with > 1 col ? @@ -392,7 +392,7 @@ def test_groupby_selection_other_methods(df): df.columns.name = "foo" df.index = rng - g = df.groupby("A")[["C"]] + g = df.groupby(["A"])[["C"]] g_exp = df[["C"]].groupby(df["A"]) # methods which aren't just .foo() diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index a5b817df56151..4cfc3ea41543b 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -595,9 +595,9 @@ def test_apply_numeric_coercion_when_datetime(): df = DataFrame( {"Number": [1, 2], "Date": ["2017-03-02"] * 2, "Str": ["foo", "inf"]} ) - expected = df.groupby("Number").apply(lambda x: x.iloc[0]) + expected = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) df.Date = pd.to_datetime(df.Date) - result = df.groupby("Number").apply(lambda x: x.iloc[0]) + result = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) tm.assert_series_equal(result["Str"], expected["Str"]) # GH 15421 diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index 15dc95da7930d..ed0804946b9d9 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -14,7 +14,9 @@ def test_group_by_copy(): ).set_index("name") grp_by_same_value = df.groupby("age", group_keys=False).apply(lambda group: group) - grp_by_copy = df.groupby("age", group_keys=False).apply(lambda group: group.copy()) + grp_by_copy = df.groupby(["age"], group_keys=False).apply( + lambda group: group.copy() + ) tm.assert_frame_equal(grp_by_same_value, grp_by_copy) @@ -73,7 +75,7 @@ def fn(x): x.loc[x.index[-1], "col2"] = 0 return x.col2 - result = df.groupby("col1", as_index=False).apply(fn) + result = df.groupby(["col1"], as_index=False).apply(fn) expected = pd.Series( [1, 2, 0, 4, 5, 0], index=pd.MultiIndex.from_tuples( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index bf0bc944865fb..004e55f4d161f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -115,7 +115,7 @@ def test_basic(): # TODO: split this test ) x["person_name"] = Categorical(x.person_name) - g = x.groupby("person_id", observed=False) + g = x.groupby(["person_id"], observed=False) result = g.transform(lambda x: x) tm.assert_frame_equal(result, x[["person_name"]]) @@ -917,7 +917,7 @@ def test_sort(): df.value, range(0, 10500, 500), right=False, labels=cat_labels ) - res = df.groupby("value_group", observed=False)["value_group"].count() + res = df.groupby(["value_group"], observed=False)["value_group"].count() exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] exp.index = CategoricalIndex(exp.index, name=exp.index.name) tm.assert_series_equal(res, exp) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 5db36d1569edf..f0a3219d0b419 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -151,7 +151,7 @@ def test_ngroup_groupby_not_col(self): def test_ngroup_descending(self): df = DataFrame(["a", "a", "b", "a", "b"], columns=["A"]) - g = df.groupby("A") + g = df.groupby(["A"]) ascending = Series([0, 0, 1, 0, 1]) descending = Series([1, 1, 0, 1, 0]) @@ -179,7 +179,7 @@ def test_ngroup_cumcount_pair(self): # brute force comparison for all small series for p in product(range(3), repeat=4): df = DataFrame({"a": p}) - g = df.groupby("a") + g = df.groupby(["a"]) order = sorted(set(p)) ngroupd = [order.index(val) for val in p] @@ -192,7 +192,7 @@ def test_ngroup_respects_groupby_order(self): np.random.seed(0) df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) for sort_flag in (False, True): - g = df.groupby("a", sort=sort_flag) + g = df.groupby(["a"], sort=sort_flag) df["group_id"] = -1 df["group_index"] = -1 @@ -235,7 +235,7 @@ def test_count_with_only_nans_in_first_group(self): def test_count_groupby_column_with_nan_in_groupby_column(self): # https://github.com/pandas-dev/pandas/issues/32841 df = DataFrame({"A": [1, 1, 1, 1, 1], "B": [5, 4, np.NaN, 3, 0]}) - res = df.groupby("B").count() + res = df.groupby(["B"]).count() expected = DataFrame( index=Index([0.0, 3.0, 4.0, 5.0], name="B"), data={"A": [1, 1, 1, 1]} ) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index b66b7e68d04c5..dda583e3a1962 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1025,7 +1025,7 @@ def test_is_monotonic_increasing(in_vals, out_vals): tm.assert_series_equal(result, expected) # Also check result equal to manually taking x.is_monotonic_increasing. - expected = df.groupby("B").C.apply(lambda x: x.is_monotonic_increasing) + expected = df.groupby(["B"]).C.apply(lambda x: x.is_monotonic_increasing) tm.assert_series_equal(result, expected) @@ -1357,7 +1357,7 @@ def test_groupby_sum_timedelta_with_nat(): ], ) @pytest.mark.parametrize("numeric_only", [True, False, lib.no_default]) -@pytest.mark.parametrize("keys", ["a1", ["a1", "a2"]]) +@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) def test_deprecate_numeric_only( kernel, numeric_only_default, drops_nuisance, has_arg, numeric_only, keys ): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8d4f6e9bbb57a..0432e6e4dd5bc 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -806,7 +806,7 @@ def test_groupby_as_index_cython(df): msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = grouped.mean() - expected = data.groupby("A").mean() + expected = data.groupby(["A"]).mean() expected.insert(0, "A", expected.index) expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) @@ -1259,7 +1259,7 @@ def test_consistency_name(): } ) - expected = df.groupby("A").B.count() + expected = df.groupby(["A"]).B.count() result = df.B.groupby(df.A).count() tm.assert_series_equal(result, expected) @@ -1495,7 +1495,7 @@ def test_groupby_2d_malformed(): d["label"] = ["l1", "l2"] msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): - tmp = d.groupby("group").mean() + tmp = d.groupby(["group"]).mean() res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) @@ -2240,7 +2240,7 @@ def test_groupby_groups_in_BaseGrouper(): assert result.groups == expected.groups -@pytest.mark.parametrize("group_name", ["x"]) +@pytest.mark.parametrize("group_name", ["x", ["x"]]) def test_groupby_axis_1(group_name): # GH 27614 df = DataFrame( @@ -2643,7 +2643,7 @@ def test_groupby_aggregation_non_numeric_dtype(): index=Index(["M", "W"], dtype="object", name="MW"), ) - gb = df.groupby(by="MW") + gb = df.groupby(by=["MW"]) result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2666,7 +2666,7 @@ def test_groupby_aggregation_multi_non_numeric_dtype(): index=Index([0, 1], dtype="int64", name="x"), ) - gb = df.groupby(by="x") + gb = df.groupby(by=["x"]) result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2686,7 +2686,7 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): index=Index([0, 1], dtype="int64", name="x"), ) - gb = df.groupby(by="x") + gb = df.groupby(by=["x"]) msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = gb.sum() @@ -2766,7 +2766,7 @@ def test_by_column_values_with_same_starting_value(): ) aggregate_details = {"Mood": Series.mode, "Credit": "sum"} - result = df.groupby("Name").agg(aggregate_details) + result = df.groupby(["Name"]).agg(aggregate_details) expected_result = DataFrame( { "Mood": [["happy", "sad"], "happy"], diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 0ce7cfe5e0f70..515c96780e731 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -333,7 +333,7 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, @pytest.mark.parametrize("input_index", [None, ["a"], ["a", "b"]]) -@pytest.mark.parametrize("keys", ["a", ["a", "b"]]) +@pytest.mark.parametrize("keys", [["a"], ["a", "b"]]) @pytest.mark.parametrize("series", [True, False]) def test_groupby_dropna_with_multiindex_input(input_index, keys, series): # GH#46783 @@ -348,7 +348,7 @@ def test_groupby_dropna_with_multiindex_input(input_index, keys, series): expected = obj.set_index(keys) if series: expected = expected["c"] - elif input_index == ["a", "b"] and keys == "a": + elif input_index == ["a", "b"] and keys == ["a"]: # Column b should not be aggregated expected = expected[["c"]] diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index 2ad5b56fb5c4e..7ffee412e3cdf 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -141,8 +141,8 @@ def test_group_diff_object_raises(object_dtype): def test_empty_shift_with_fill(): # GH 41264, single-index check df = DataFrame(columns=["a", "b", "c"]) - shifted = df.groupby("a").shift(1) - shifted_with_fill = df.groupby("a").shift(1, fill_value=0) + shifted = df.groupby(["a"]).shift(1) + shifted_with_fill = df.groupby(["a"]).shift(1, fill_value=0) tm.assert_frame_equal(shifted, shifted_with_fill) tm.assert_index_equal(shifted.index, shifted_with_fill.index) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index ec94749d6952e..6da07dafcda74 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -873,7 +873,7 @@ def test_gb_key_len_equal_axis_len(self): class TestIteration: def test_groups(self, df): - grouped = df.groupby("A") + grouped = df.groupby(["A"]) groups = grouped.groups assert groups is grouped.groups # caching works diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py index 596b98d673042..76da8dfe0607b 100644 --- a/pandas/tests/groupby/test_missing.py +++ b/pandas/tests/groupby/test_missing.py @@ -17,7 +17,7 @@ def test_groupby_column_index_name_lost_fill_funcs(func): [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]], columns=Index(["type", "a", "b"], name="idx"), ) - df_grouped = df.groupby("type")[["a", "b"]] + df_grouped = df.groupby(["type"])[["a", "b"]] result = getattr(df_grouped, func)().columns expected = Index(["a", "b"], name="idx") tm.assert_index_equal(result, expected) @@ -28,7 +28,7 @@ def test_groupby_fill_duplicate_column_names(func): # GH: 25610 ValueError with duplicate column names df1 = DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]}) df2 = DataFrame({"field1": [1, np.nan, 4]}) - df_grouped = pd.concat([df1, df2], axis=1).groupby(by="field2") + df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"]) expected = DataFrame( [[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"] ) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index f1eb811a788b4..6656fd565f79d 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -164,7 +164,7 @@ def test_nunique_with_timegrouper(): def test_nunique_with_NaT(key, data, dropna, expected): # GH 27951 df = DataFrame({"key": key, "data": data}) - result = df.groupby("key")["data"].nunique(dropna=dropna) + result = df.groupby(["key"])["data"].nunique(dropna=dropna) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index 833ee9c095860..8bbe38d3379ac 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -580,7 +580,7 @@ def test_rank_pct_equal_values_on_group_transition(use_nan): ], columns=["group", "val"], ) - result = df.groupby("group")["val"].rank( + result = df.groupby(["group"])["val"].rank( method="dense", pct=True, ) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index a9f3109199a1f..577a72d3f5090 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -142,7 +142,7 @@ def test_series_groupby_value_counts_with_grouper(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("columns", [["A", "B", "C"]]) +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) def test_series_groupby_value_counts_empty(columns): # GH39172 df = DataFrame(columns=columns) @@ -155,7 +155,7 @@ def test_series_groupby_value_counts_empty(columns): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("columns", [["A", "B", "C"]]) +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) def test_series_groupby_value_counts_one_row(columns): # GH42618 df = DataFrame(data=[range(len(columns))], columns=columns) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 3aa2ee75b09a6..5c64ba3d9e266 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1194,7 +1194,7 @@ def test_transform_lambda_with_datetimetz(): "timezone": ["Etc/GMT+4", "US/Eastern"], } ) - result = df.groupby("timezone")["time"].transform( + result = df.groupby(["timezone"])["time"].transform( lambda x: x.dt.tz_localize(x.name) ) expected = Series( @@ -1328,7 +1328,7 @@ def test_transform_cumcount(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("keys", ["A1", ["A1", "A2"]]) +@pytest.mark.parametrize("keys", [["A1"], ["A1", "A2"]]) def test_null_group_lambda_self(request, sort, dropna, keys): # GH 17093 if not sort and not dropna: @@ -1339,7 +1339,7 @@ def test_null_group_lambda_self(request, sort, dropna, keys): nulls1 = np.random.choice([False, True], size) nulls2 = np.random.choice([False, True], size) # Whether a group contains a null value or not - nulls_grouper = nulls1 if not isinstance(keys, list) else nulls1 | nulls2 + nulls_grouper = nulls1 if len(keys) == 1 else nulls1 | nulls2 a1 = np.random.randint(0, 5, size=size).astype(float) a1[nulls1] = np.nan @@ -1543,7 +1543,7 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func) (lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]), ], ) -@pytest.mark.parametrize("keys", ["a1", ["a1", "a2"]]) +@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) @pytest.mark.parametrize("keys_in_index", [True, False]) def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index): # GH#45648 - transform should align with the input's index diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index 7e8a6242dca82..fe39c3d441396 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -269,7 +269,7 @@ class TestBoxWithBy(TestPlotBase): ] * 2, ), - ("C", None, ["A", "B"], [["a", "b", "c"]] * 2), + (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2), ], ) def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df): diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 84f1d7de9609c..8aff217cca5c1 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -430,7 +430,7 @@ def test_resample_groupby_agg_listlike(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("keys", ["a", ["a", "b"]]) +@pytest.mark.parametrize("keys", [["a"], ["a", "b"]]) def test_empty(keys): # GH 26411 df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([])) @@ -453,7 +453,7 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate): if consolidate: df = df._consolidate() - result = df.groupby("key").resample("W", on="date").min() + result = df.groupby(["key"]).resample("W", on="date").min() idx = pd.MultiIndex.from_arrays( [ ["A"] * 3 + ["B"] * 3, diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 9fb69a58e35e8..905c2af2d22a5 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -407,7 +407,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): def test_join_hierarchical_mixed(self): # GH 2024 df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"]) - new_df = df.groupby("a").agg({"b": [np.mean, np.sum]}) + new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]}) other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"]) other_df.set_index("a", inplace=True) # GH 9455, 12219 diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 6d3bc04239381..76448d5942a5a 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -139,7 +139,7 @@ def test_crosstab_margins(self): assert result.columns.names == ["b", "c"] all_cols = result["All", ""] - exp_cols = df.groupby("a").size().astype("i8") + exp_cols = df.groupby(["a"]).size().astype("i8") # to keep index.name exp_margin = Series([len(df)], index=Index(["All"], name="a")) exp_cols = pd.concat([exp_cols, exp_margin]) @@ -177,7 +177,7 @@ def test_crosstab_margins_set_margin_name(self): assert result.columns.names == ["b", "c"] all_cols = result["TOTAL", ""] - exp_cols = df.groupby("a").size().astype("i8") + exp_cols = df.groupby(["a"]).size().astype("i8") # to keep index.name exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a")) exp_cols = pd.concat([exp_cols, exp_margin]) From 1d96b70f6913dc730c5317a114b232093d64df49 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 25 Jul 2022 20:32:20 -0700 Subject: [PATCH 64/91] fixing tests --- doc/source/user_guide/groupby.rst | 14 +++++++------- doc/source/whatsnew/v1.1.0.rst | 4 ++-- pandas/tests/groupby/test_allowlist.py | 2 +- pandas/tests/groupby/test_apply_mutate.py | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index e7163f7893be7..34244a8edcbfa 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -186,8 +186,8 @@ By default the group keys are sorted during the ``groupby`` operation. You may h .. ipython:: python df2 = pd.DataFrame({"X": ["B", "B", "A", "A"], "Y": [1, 2, 3, 4]}) - df2.groupby("X").sum() - df2.groupby("X", sort=False).sum() + df2.groupby(["X"]).sum() + df2.groupby(["X"], sort=False).sum() Note that ``groupby`` will preserve the order in which *observations* are sorted *within* each group. @@ -196,9 +196,9 @@ For example, the groups created by ``groupby()`` below are in the order they app .. ipython:: python df3 = pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}) - df3.groupby("X").get_group("A") + df3.groupby(["X"]).get_group("A") - df3.groupby("X").get_group("B") + df3.groupby(["X"]).get_group("B") .. _groupby.dropna: @@ -221,10 +221,10 @@ in case you want to include ``NA`` values in group keys, you could pass ``dropna .. ipython:: python # Default ``dropna`` is set to True, which will exclude NaNs in keys - df_dropna.groupby(by="b", dropna=True).sum() + df_dropna.groupby(by=["b"], dropna=True).sum() # In order to allow NaN in keys, set ``dropna`` to False - df_dropna.groupby(by="b", dropna=False).sum() + df_dropna.groupby(by=["b"], dropna=False).sum() The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys. @@ -414,7 +414,7 @@ getting a column from a DataFrame, you can do: df - grouped = df.groupby("A") + grouped = df.groupby(["A"]) grouped_C = grouped["C"] grouped_D = grouped["D"] diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index ff6187499d9e4..e1f54c439ae9b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -117,10 +117,10 @@ compatibility (:issue:`3729`) .. ipython:: python # Default ``dropna`` is set to True, which will exclude NaNs in keys - df_dropna.groupby(by="b", dropna=True).sum() + df_dropna.groupby(by=["b"], dropna=True).sum() # In order to allow NaN in keys, set ``dropna`` to False - df_dropna.groupby(by="b", dropna=False).sum() + df_dropna.groupby(by=["b"], dropna=False).sum() The default setting of ``dropna`` argument is ``True`` which means ``NA`` are not included in group keys. diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 46c47cf2b0f4e..e541abb368a02 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -378,7 +378,7 @@ def test_groupby_selection_tshift_raises(df): rng = date_range("2014", periods=len(df)) df.index = rng - g = df.groupby("A")[["C"]] + g = df.groupby(["A"])[["C"]] # check that the index cache is cleared with pytest.raises(ValueError, match="Freq was not set in the index"): diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index ed0804946b9d9..d1f25aabe31a2 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -13,7 +13,7 @@ def test_group_by_copy(): } ).set_index("name") - grp_by_same_value = df.groupby("age", group_keys=False).apply(lambda group: group) + grp_by_same_value = df.groupby(["age"], group_keys=False).apply(lambda group: group) grp_by_copy = df.groupby(["age"], group_keys=False).apply( lambda group: group.copy() ) From 1ff640cd65c1709563515f873e7360d26b108bfd Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 25 Jul 2022 20:42:47 -0700 Subject: [PATCH 65/91] flake8 --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0432e6e4dd5bc..269b35799769b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2807,4 +2807,4 @@ def test_single_element_list_grouping(): "a single grouper to avoid this warning." ) with tm.assert_produces_warning(FutureWarning, match=msg): - values, _ = next(iter(df.groupby(['a']))) + values, _ = next(iter(df.groupby(["a"]))) From da101e426b471499986af76258742349b9a3db68 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 25 Jul 2022 20:45:10 -0700 Subject: [PATCH 66/91] rst edit --- .../getting_started/intro_tutorials/07_reshape_table_layout.rst | 1 - doc/source/user_guide/merging.rst | 1 - doc/source/user_guide/reshaping.rst | 2 -- doc/source/user_guide/scale.rst | 1 - 4 files changed, 5 deletions(-) diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst index 8d61c9e612d41..27d6f95923ed0 100644 --- a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -239,7 +239,6 @@ interested in the row/column margins (subtotals) for each variable, set the ``margins`` parameter to ``True``: .. ipython:: python - :okwarning: air_quality.pivot_table( values="value", diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 92d2e70e8f37c..bbca5773afdfe 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -1355,7 +1355,6 @@ ordered data. In particular it has an optional ``fill_method`` keyword to fill/interpolate missing data: .. ipython:: python - :okwarning: left = pd.DataFrame( {"k": ["K0", "K1", "K1", "K2"], "lv": [1, 2, 3, 4], "s": ["a", "b", "c", "d"]} diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index 07e1596ae2b42..adca9de6c130a 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -447,7 +447,6 @@ rows will be added with partial group aggregates across the categories on the rows and columns: .. ipython:: python - :okwarning: table = df.pivot_table( index=["A", "B"], @@ -563,7 +562,6 @@ Adding margins Finally, one can also add margins or normalize this output. .. ipython:: python - :okwarning: pd.crosstab( df["A"], df["B"], values=df["C"], aggfunc=np.sum, normalize=True, margins=True diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index ffb9ee6bed8b4..129f43dd36930 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -335,7 +335,6 @@ Dask implements the most used parts of the pandas API. For example, we can do a familiar groupby aggregation. .. ipython:: python - :okwarning: %time ddf.groupby("name")[["x", "y"]].mean().compute().head() From 165f53ebf13190fab294ed2ef554858f14f1b230 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 26 Jul 2022 14:33:06 -0700 Subject: [PATCH 67/91] __iter__ edit --- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/groupby.py | 12 ++++++++++++ pandas/core/groupby/ops.py | 11 ----------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9e26598d85e74..5f656c9a900f6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -465,7 +465,7 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: klass = type(self.obj) results = [] - for name, group in self: + for name, group in self.grouper.get_iterator(self._selected_obj, axis=self.axis): # this setattr is needed for test_transform_lambda_with_datetimetz object.__setattr__(group, "name", name) res = func(group, *args, **kwargs) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9b4991d32692b..b874a5c0745c8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -821,6 +821,18 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: Generator yielding sequence of (name, subsetted object) for each group """ + result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) + if result.gi_frame.f_locals["self"].raise_warning_single_grouper: + warnings.warn( + ( + "In a future version of pandas, a length 1 " + "tuple will be returned when grouping by a " + "list of length 1. Don't supply a list with " + "a single grouper to avoid this warning." + ), + FutureWarning, + stacklevel=1, + ) return self.grouper.get_iterator(self._selected_obj, axis=self.axis) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f41ddbdc89319..5ef74251b5ed0 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -759,17 +759,6 @@ def get_iterator( """ splitter = self._get_splitter(data, axis=axis) keys = self.group_keys_seq - if self.raise_warning_single_grouper: - warnings.warn( - ( - "In a future version of pandas, a length 1 " - "tuple will be returned when grouping by a " - "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." - ), - FutureWarning, - stacklevel=find_stack_level(), - ) yield from zip(keys, splitter) @final From dd9799eb6e97013d234d3e0d39f35b76c5a955bf Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 26 Jul 2022 14:43:19 -0700 Subject: [PATCH 68/91] flake8 --- pandas/core/groupby/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5f656c9a900f6..631f70f390319 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -465,7 +465,9 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: klass = type(self.obj) results = [] - for name, group in self.grouper.get_iterator(self._selected_obj, axis=self.axis): + for name, group in self.grouper.get_iterator( + self._selected_obj, axis=self.axis + ): # this setattr is needed for test_transform_lambda_with_datetimetz object.__setattr__(group, "name", name) res = func(group, *args, **kwargs) From ee1a4251e47c2310b574b03953d9e581b77576c7 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 26 Jul 2022 14:53:01 -0700 Subject: [PATCH 69/91] flake8 --- pandas/core/groupby/ops.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5ef74251b5ed0..692c3301c15e1 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -16,8 +16,7 @@ Iterator, Sequence, final, -) -import warnings +) import numpy as np @@ -36,7 +35,6 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( maybe_cast_pointwise_result, From 20d7e6414170e119dfba0508bff52df61ed9a2c7 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 26 Jul 2022 15:06:47 -0700 Subject: [PATCH 70/91] space --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 692c3301c15e1..6a783831711b5 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -16,7 +16,7 @@ Iterator, Sequence, final, -) +) import numpy as np From 384d1d3c3e30e287a33d418cd5b5c5d84ef78eb4 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 26 Jul 2022 17:57:57 -0700 Subject: [PATCH 71/91] test --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b874a5c0745c8..18fbb770588da 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -831,7 +831,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: "a single grouper to avoid this warning." ), FutureWarning, - stacklevel=1, + stacklevel=find_stack_level(), ) return self.grouper.get_iterator(self._selected_obj, axis=self.axis) From e01e4090b9241a34fc794bc814578c2ef93e7a33 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 26 Jul 2022 20:01:46 -0700 Subject: [PATCH 72/91] merge --- pandas/core/reshape/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6ce5ffac9de52..e06a288c1eb38 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -150,7 +150,7 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): if all(item in right.columns for item in by): rby = right.groupby(by, sort=False) - for key, lhs in lby: + for key, lhs in lby.grouper.get_iterator(lby._selected_obj, axis=lby.axis): if rby is None: rhs = right From 9fa72d52f4716b0e18c4f160d10d55dd9a9884da Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 27 Jul 2022 11:16:13 -0700 Subject: [PATCH 73/91] ignore the type --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 18fbb770588da..90c0260b9b3de 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -822,7 +822,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: for each group """ result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) - if result.gi_frame.f_locals["self"].raise_warning_single_grouper: + if result.gi_frame.f_locals["self"].raise_warning_single_grouper: # type: ignore[attr-defined] warnings.warn( ( "In a future version of pandas, a length 1 " From 0478afafca47a67cce0ddee9f960fa120d6487dc Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 27 Jul 2022 11:24:19 -0700 Subject: [PATCH 74/91] mypy --- pandas/core/groupby/groupby.py | 2 +- pandas/core/groupby/ops.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 90c0260b9b3de..18fbb770588da 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -822,7 +822,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: for each group """ result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) - if result.gi_frame.f_locals["self"].raise_warning_single_grouper: # type: ignore[attr-defined] + if result.gi_frame.f_locals["self"].raise_warning_single_grouper: warnings.warn( ( "In a future version of pandas, a length 1 " diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6a783831711b5..1de8b456f0c7b 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -11,6 +11,7 @@ import functools from typing import ( Callable, + Generator, Generic, Hashable, Iterator, @@ -746,7 +747,7 @@ def nkeys(self) -> int: def get_iterator( self, data: NDFrameT, axis: int = 0 - ) -> Iterator[tuple[Hashable, NDFrameT]]: + ) -> Generator[tuple[Hashable, NDFrameT]]: """ Groupby iterator From 374fad8b123f6f9d7e91d39cca2b3d053c6d8cc5 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Wed, 27 Jul 2022 11:47:39 -0700 Subject: [PATCH 75/91] type --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 1de8b456f0c7b..fb29ac40cb1d3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -747,7 +747,7 @@ def nkeys(self) -> int: def get_iterator( self, data: NDFrameT, axis: int = 0 - ) -> Generator[tuple[Hashable, NDFrameT]]: + ) -> Generator[tuple[Hashable, NDFrameT], None, None]: """ Groupby iterator From 6cc3f498f4db5f90d8ce3a5210504b3d5ca430a7 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 28 Jul 2022 09:24:59 -0700 Subject: [PATCH 76/91] self.keys --- pandas/core/groupby/groupby.py | 2 +- pandas/core/groupby/grouper.py | 5 ----- pandas/core/groupby/ops.py | 4 ---- pandas/core/reshape/pivot.py | 12 ++---------- pandas/plotting/_matplotlib/core.py | 10 +--------- pandas/plotting/_matplotlib/groupby.py | 14 +++++--------- pandas/plotting/_matplotlib/hist.py | 14 ++++---------- pandas/plotting/_matplotlib/misc.py | 7 +++++++ pandas/tests/groupby/test_groupby.py | 1 + 9 files changed, 21 insertions(+), 48 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 18fbb770588da..4adc7e0816a93 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -822,7 +822,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: for each group """ result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) - if result.gi_frame.f_locals["self"].raise_warning_single_grouper: + if isinstance(self.keys, list) and len(self.keys) == 1: warnings.warn( ( "In a future version of pandas, a length 1 " diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 74a6330f6fe86..582e9b20e12c8 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -733,10 +733,6 @@ def get_grouper( """ group_axis = obj._get_axis(axis) - raise_warning_single_grouper = False - if isinstance(key, list): - if len(key) == 1 and isinstance(key[0], str): - raise_warning_single_grouper = True # validate that the passed single level is compatible with the passed # axis of the object @@ -925,7 +921,6 @@ def is_in_obj(gpr) -> bool: grouper = ops.BaseGrouper( group_axis, groupings, - raise_warning_single_grouper=raise_warning_single_grouper, sort=sort, mutated=mutated, dropna=dropna, diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index fb29ac40cb1d3..6bd22dd2abeff 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -712,7 +712,6 @@ def __init__( self, axis: Index, groupings: Sequence[grouper.Grouping], - raise_warning_single_grouper: bool = False, sort: bool = True, group_keys: bool = True, mutated: bool = False, @@ -723,7 +722,6 @@ def __init__( self.axis = axis self._groupings: list[grouper.Grouping] = list(groupings) - self.raise_warning_single_grouper = raise_warning_single_grouper self._sort = sort self.group_keys = group_keys self.mutated = mutated @@ -1126,13 +1124,11 @@ def __init__( binlabels, mutated: bool = False, indexer=None, - raise_warning_single_grouper: bool = False, ) -> None: self.bins = ensure_int64(bins) self.binlabels = ensure_index(binlabels) self.mutated = mutated self.indexer = indexer - self.raise_warning_single_grouper = False # These lengths must match, otherwise we could call agg_series # with empty self.bins, which would raise in libreduction. diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index df277589117ea..2ec166a4ec5fb 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -367,9 +367,8 @@ def _all_key(key): return (key, margins_name) + ("",) * (len(cols) - 1) if len(rows) > 0: - modifiedrow = fix_groupby_singlelist_input(rows) margin = ( - data[rows + values].groupby(modifiedrow, observed=observed).agg(aggfunc) + data[rows + values].groupby(rows, observed=observed).agg(aggfunc) ) cat_axis = 1 @@ -410,9 +409,8 @@ def _all_key(key): margin_keys = table.columns if len(cols) > 0: - modifiedcol = fix_groupby_singlelist_input(cols) row_margin = ( - data[cols + values].groupby(modifiedcol, observed=observed).agg(aggfunc) + data[cols + values].groupby(cols, observed=observed).agg(aggfunc) ) row_margin = row_margin.stack() @@ -854,9 +852,3 @@ def get_duplicates(names): return rownames_mapper, unique_rownames, colnames_mapper, unique_colnames - -def fix_groupby_singlelist_input(keys): - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] - return keys diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index fe75f552c6633..9da1523f5449d 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -175,8 +175,7 @@ def __init__( # For `hist` plot, need to get grouped original data before `self.data` is # updated later if self.by is not None and self._kind == "hist": - bymodi = fix_groupby_singlelist_input(by) - self._grouped = data.groupby(bymodi) + self._grouped = data.groupby(by) self.kind = kind @@ -1833,10 +1832,3 @@ def blank_labeler(label, value): leglabels = labels if labels is not None else idx for p, l in zip(patches, leglabels): self._append_legend_handles_labels(p, l) - - -def fix_groupby_singlelist_input(keys): - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] - return keys diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py index 0c87db697b342..04c052ef35ae4 100644 --- a/pandas/plotting/_matplotlib/groupby.py +++ b/pandas/plotting/_matplotlib/groupby.py @@ -6,6 +6,9 @@ Dict, IndexLabel, ) +from pandas.plotting._matplotlib.misc import ( + unpack_single_str_list, +) from pandas.core.dtypes.missing import remove_na_arraylike @@ -108,8 +111,8 @@ def reconstruct_data_with_by( 1 3.0 4.0 NaN NaN 2 NaN NaN 5.0 6.0 """ - bymodi = fix_groupby_singlelist_input(by) - grouped = data.groupby(bymodi) + by_modified = unpack_single_str_list(by) + grouped = data.groupby(by_modified) data_list = [] for key, group in grouped: @@ -135,10 +138,3 @@ def reformat_hist_y_given_by( if by is not None and len(y.shape) > 1: return np.array([remove_na_arraylike(col) for col in y.T]).T return remove_na_arraylike(y) - - -def fix_groupby_singlelist_input(keys): - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] - return keys diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 61408d7c946c9..ce8233a4c084b 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -21,6 +21,9 @@ isna, remove_na_arraylike, ) +from pandas.plotting._matplotlib.misc import ( + unpack_single_str_list, +) from pandas.core.frame import DataFrame @@ -67,7 +70,7 @@ def _args_adjust(self): # where subplots are created based on by argument if is_integer(self.bins): if self.by is not None: - bymodi = fix_groupby_singlelist_input(self.by) + bymodi = unpack_single_str_list(self.by) grouped = self.data.groupby(bymodi)[self.columns] self.bins = [self._calculate_bins(group) for key, group in grouped] else: @@ -272,8 +275,6 @@ def _grouped_plot( grouped = data.groupby(by) if column is not None: grouped = grouped[column] - if isinstance(by, list) and len(by) == 1: - by = [by] naxes = len(grouped) fig, axes = create_subplots( @@ -531,10 +532,3 @@ def hist_frame( maybe_adjust_figure(fig, wspace=0.3, hspace=0.3) return axes - - -def fix_groupby_singlelist_input(keys): - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] - return keys diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index e2a0d50544f22..b335223ddafc0 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -475,3 +475,10 @@ def r(h): ax.legend() ax.grid() return ax + +def unpack_single_str_list(keys): + # GH 42795 + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 269b35799769b..69ad700d95c03 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2798,6 +2798,7 @@ def test_groupby_none_column_name(): def test_single_element_list_grouping(): + # GH 42795 df = DataFrame(columns=["a", "b", "c"], index=["x", "y"]) df.loc["y"] = Series({"a": 1, "b": 5, "c": 2}) msg = ( From 5c18c63bf8c7e8eabe186e4ca499859d699c586c Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 28 Jul 2022 09:29:54 -0700 Subject: [PATCH 77/91] tests --- pandas/core/groupby/grouper.py | 7 +------ pandas/core/reshape/pivot.py | 11 ++--------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 582e9b20e12c8..b9f4166b475ca 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -733,7 +733,6 @@ def get_grouper( """ group_axis = obj._get_axis(axis) - # validate that the passed single level is compatible with the passed # axis of the object if level is not None: @@ -919,11 +918,7 @@ def is_in_obj(gpr) -> bool: # create the internals grouper grouper = ops.BaseGrouper( - group_axis, - groupings, - sort=sort, - mutated=mutated, - dropna=dropna, + group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna ) return grouper, frozenset(exclusions), obj diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 2ec166a4ec5fb..692d1d5efd422 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -161,9 +161,6 @@ def __internal_pivot_table( pass values = list(values) - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] grouped = data.groupby(keys, observed=observed, sort=sort) agged = grouped.agg(aggfunc) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): @@ -367,9 +364,7 @@ def _all_key(key): return (key, margins_name) + ("",) * (len(cols) - 1) if len(rows) > 0: - margin = ( - data[rows + values].groupby(rows, observed=observed).agg(aggfunc) - ) + margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc) cat_axis = 1 for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): @@ -409,9 +404,7 @@ def _all_key(key): margin_keys = table.columns if len(cols) > 0: - row_margin = ( - data[cols + values].groupby(cols, observed=observed).agg(aggfunc) - ) + row_margin = data[cols + values].groupby(cols, observed=observed).agg(aggfunc) row_margin = row_margin.stack() # slight hack From d591a2ac66b61a6b393575ac72ac7eacb2155c3f Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 28 Jul 2022 09:46:28 -0700 Subject: [PATCH 78/91] . --- pandas/core/groupby/groupby.py | 1 - pandas/plotting/_matplotlib/core.py | 2 +- pandas/plotting/_matplotlib/groupby.py | 4 +--- pandas/plotting/_matplotlib/hist.py | 4 +--- pandas/plotting/_matplotlib/misc.py | 1 + 5 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4adc7e0816a93..795dbad6460c6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -821,7 +821,6 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: Generator yielding sequence of (name, subsetted object) for each group """ - result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) if isinstance(self.keys, list) and len(self.keys) == 1: warnings.warn( ( diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 9da1523f5449d..ee7493813f13a 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -175,7 +175,7 @@ def __init__( # For `hist` plot, need to get grouped original data before `self.data` is # updated later if self.by is not None and self._kind == "hist": - self._grouped = data.groupby(by) + self._grouped = data.groupby(self.by) self.kind = kind diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py index 04c052ef35ae4..af6ed009aa62c 100644 --- a/pandas/plotting/_matplotlib/groupby.py +++ b/pandas/plotting/_matplotlib/groupby.py @@ -6,9 +6,6 @@ Dict, IndexLabel, ) -from pandas.plotting._matplotlib.misc import ( - unpack_single_str_list, -) from pandas.core.dtypes.missing import remove_na_arraylike @@ -19,6 +16,7 @@ concat, ) +from pandas.plotting._matplotlib.misc import unpack_single_str_list def create_iter_data_given_by( data: DataFrame, kind: str = "hist" diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index ce8233a4c084b..06118eb01ecc0 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -21,9 +21,6 @@ isna, remove_na_arraylike, ) -from pandas.plotting._matplotlib.misc import ( - unpack_single_str_list, -) from pandas.core.frame import DataFrame @@ -36,6 +33,7 @@ create_iter_data_given_by, reformat_hist_y_given_by, ) +from pandas.plotting._matplotlib.misc import unpack_single_str_list from pandas.plotting._matplotlib.tools import ( create_subplots, flatten_axes, diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index b335223ddafc0..4b74b067053a6 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -476,6 +476,7 @@ def r(h): ax.grid() return ax + def unpack_single_str_list(keys): # GH 42795 if isinstance(keys, list): From 89922c2aacd29e59d5b38d980910d1072c03909c Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 28 Jul 2022 09:53:41 -0700 Subject: [PATCH 79/91] . --- pandas/core/reshape/pivot.py | 1 - pandas/plotting/_matplotlib/groupby.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 692d1d5efd422..03aad0ef64dec 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -844,4 +844,3 @@ def get_duplicates(names): ] return rownames_mapper, unique_rownames, colnames_mapper, unique_colnames - diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py index af6ed009aa62c..17a214292608b 100644 --- a/pandas/plotting/_matplotlib/groupby.py +++ b/pandas/plotting/_matplotlib/groupby.py @@ -18,6 +18,7 @@ from pandas.plotting._matplotlib.misc import unpack_single_str_list + def create_iter_data_given_by( data: DataFrame, kind: str = "hist" ) -> Dict[str, DataFrame | Series]: From 13d4f353fdda9bd16a061dfa8441a7c83b79534e Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 28 Jul 2022 14:08:33 -0700 Subject: [PATCH 80/91] adding keys --- pandas/core/groupby/groupby.py | 3 ++- pandas/core/groupby/ops.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 795dbad6460c6..879c503d90f86 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -821,7 +821,8 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: Generator yielding sequence of (name, subsetted object) for each group """ - if isinstance(self.keys, list) and len(self.keys) == 1: + keys = self.keys + if isinstance(keys, list) and len(keys) == 1: warnings.warn( ( "In a future version of pandas, a length 1 " diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6bd22dd2abeff..d4429ec26aa69 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -17,6 +17,9 @@ Iterator, Sequence, final, + Union, + List, + Mapping, ) import numpy as np @@ -97,6 +100,15 @@ ) +_KeysArgType = Union[ + Hashable, + List[Hashable], + Callable[[Hashable], Hashable], + List[Callable[[Hashable], Hashable]], + Mapping[Hashable, Hashable], +] + + class WrappedCythonOp: """ Dispatch logic for functions defined in _libs.groupby @@ -712,6 +724,7 @@ def __init__( self, axis: Index, groupings: Sequence[grouper.Grouping], + keys: _KeysArgType | None = None, sort: bool = True, group_keys: bool = True, mutated: bool = False, From cacc7264b3a743d8c277c5da0456c6dcbc0563ef Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 28 Jul 2022 14:19:35 -0700 Subject: [PATCH 81/91] order --- pandas/core/groupby/ops.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index d4429ec26aa69..f6252cda6cc26 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -15,11 +15,11 @@ Generic, Hashable, Iterator, - Sequence, - final, - Union, List, Mapping, + Sequence, + Union, + final, ) import numpy as np @@ -99,7 +99,6 @@ get_indexer_dict, ) - _KeysArgType = Union[ Hashable, List[Hashable], From a3bf81ae8fa789cade8be5a3c10026fe4961f7d7 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 28 Jul 2022 17:51:31 -0700 Subject: [PATCH 82/91] attribute --- pandas/core/groupby/ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f6252cda6cc26..4270f19766100 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -735,6 +735,7 @@ def __init__( self.axis = axis self._groupings: list[grouper.Grouping] = list(groupings) self._sort = sort + self.keys = keys self.group_keys = group_keys self.mutated = mutated self.indexer = indexer From 93b1ee115974d71986ccd673e5de83ae22240ad5 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 29 Jul 2022 08:27:48 -0700 Subject: [PATCH 83/91] ignores --- pandas/core/groupby/groupby.py | 2 +- pandas/core/groupby/ops.py | 13 ------------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 879c503d90f86..2fa48f25dcb44 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -821,7 +821,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: Generator yielding sequence of (name, subsetted object) for each group """ - keys = self.keys + keys = self.keys # type: ignore[attr-defined] if isinstance(keys, list) and len(keys) == 1: warnings.warn( ( diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 4270f19766100..6bd22dd2abeff 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -15,10 +15,7 @@ Generic, Hashable, Iterator, - List, - Mapping, Sequence, - Union, final, ) @@ -99,14 +96,6 @@ get_indexer_dict, ) -_KeysArgType = Union[ - Hashable, - List[Hashable], - Callable[[Hashable], Hashable], - List[Callable[[Hashable], Hashable]], - Mapping[Hashable, Hashable], -] - class WrappedCythonOp: """ @@ -723,7 +712,6 @@ def __init__( self, axis: Index, groupings: Sequence[grouper.Grouping], - keys: _KeysArgType | None = None, sort: bool = True, group_keys: bool = True, mutated: bool = False, @@ -735,7 +723,6 @@ def __init__( self.axis = axis self._groupings: list[grouper.Grouping] = list(groupings) self._sort = sort - self.keys = keys self.group_keys = group_keys self.mutated = mutated self.indexer = indexer From 797c198b638c990ab0740cc11f8956e247988bd4 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 29 Jul 2022 08:31:04 -0700 Subject: [PATCH 84/91] Update hist.py --- pandas/plotting/_matplotlib/hist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 06118eb01ecc0..62242a4a2ddab 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -68,8 +68,8 @@ def _args_adjust(self): # where subplots are created based on by argument if is_integer(self.bins): if self.by is not None: - bymodi = unpack_single_str_list(self.by) - grouped = self.data.groupby(bymodi)[self.columns] + by_modified = unpack_single_str_list(self.by) + grouped = self.data.groupby(by_modified)[self.columns] self.bins = [self._calculate_bins(group) for key, group in grouped] else: self.bins = self._calculate_bins(self.data) From 45eb0530624e388f0896cf158cabbf44af166024 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 29 Jul 2022 08:35:49 -0700 Subject: [PATCH 85/91] ignore --- pandas/core/groupby/groupby.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2fa48f25dcb44..e4f4a934ca993 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -645,6 +645,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): axis: int grouper: ops.BaseGrouper + keys: _KeysArgType | None = None, group_keys: bool | lib.NoDefault @final @@ -821,7 +822,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: Generator yielding sequence of (name, subsetted object) for each group """ - keys = self.keys # type: ignore[attr-defined] + keys = self.keys if isinstance(keys, list) and len(keys) == 1: warnings.warn( ( From d2460586eda7defc1cf0e6eee752bbf649c201c0 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 29 Jul 2022 09:06:09 -0700 Subject: [PATCH 86/91] . --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e4f4a934ca993..686db8ec40bc2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -645,7 +645,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): axis: int grouper: ops.BaseGrouper - keys: _KeysArgType | None = None, + keys: _KeysArgType | None = None group_keys: bool | lib.NoDefault @final From 14e5d6aee256eb16d49f65b5980f1a7559cb3626 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 29 Jul 2022 13:48:53 -0700 Subject: [PATCH 87/91] . --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/groupby/groupby.py | 7 ++++--- pandas/core/groupby/ops.py | 3 +-- pandas/tests/groupby/test_groupby.py | 5 +++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 6e9d3d0390724..8b4a4fb24ee75 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -774,7 +774,7 @@ Other Deprecations - Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) - Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) - Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`) -- Deprecated producing a single element when using :meth:`Pandas.groupby` with a single list element. A tuple of length one will be returned instead. +- Deprecated producing a single element when using :meth:`Pandas.groupby` with a single list element. A tuple of length one will be returned instead (:issue:`42795`) .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 686db8ec40bc2..7e109f895e776 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -827,9 +827,10 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: warnings.warn( ( "In a future version of pandas, a length 1 " - "tuple will be returned when grouping by a " - "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." + "tuple will be returned when iterating over a " + "a groupby with a grouper equal to a list of " + "length 1. Don't supply a list with a single grouper " + "to avoid this warning." ), FutureWarning, stacklevel=find_stack_level(), diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6bd22dd2abeff..6dc4ccfa8e1ee 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -11,7 +11,6 @@ import functools from typing import ( Callable, - Generator, Generic, Hashable, Iterator, @@ -745,7 +744,7 @@ def nkeys(self) -> int: def get_iterator( self, data: NDFrameT, axis: int = 0 - ) -> Generator[tuple[Hashable, NDFrameT], None, None]: + ) -> Iterator[tuple[Hashable, NDFrameT]]: """ Groupby iterator diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 69ad700d95c03..a6a3712a6ba2b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2799,8 +2799,9 @@ def test_groupby_none_column_name(): def test_single_element_list_grouping(): # GH 42795 - df = DataFrame(columns=["a", "b", "c"], index=["x", "y"]) - df.loc["y"] = Series({"a": 1, "b": 5, "c": 2}) + df = DataFrame( + {"a": [np.nan, 1], "b": [np.nan, 5], "c": [np.nan, 2]}, index=["x", "y"] + ) msg = ( "In a future version of pandas, a length 1 " "tuple will be returned when grouping by a " From 351aa563528a4cee89248055fcdc84e870fb34a9 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 29 Jul 2022 13:58:24 -0700 Subject: [PATCH 88/91] . --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8b4a4fb24ee75..bef0e1105f43a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -774,7 +774,7 @@ Other Deprecations - Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) - Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) - Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`) -- Deprecated producing a single element when using :meth:`Pandas.groupby` with a single list element. A tuple of length one will be returned instead (:issue:`42795`) +- Deprecated producing a single element when using :class:`Pandas.groupby` or :class:`SeriesGroupBy` with a single list element. A tuple of length one will be returned instead (:issue:`42795`) .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: From e1c12e984401a7dc48b0539fd0331b5e646933f3 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 29 Jul 2022 14:00:06 -0700 Subject: [PATCH 89/91] . --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index bef0e1105f43a..5567f30c47c2c 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -774,7 +774,7 @@ Other Deprecations - Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) - Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) - Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`) -- Deprecated producing a single element when using :class:`Pandas.groupby` or :class:`SeriesGroupBy` with a single list element. A tuple of length one will be returned instead (:issue:`42795`) +- Deprecated producing a single element when iterating over a :class:`Pandas.groupby` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`) .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: From 37843a4f4792e194602da79d434499f43da9ff5b Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 29 Jul 2022 14:28:58 -0700 Subject: [PATCH 90/91] . --- pandas/tests/groupby/test_groupby.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a6a3712a6ba2b..73aeb17d8c274 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2804,9 +2804,10 @@ def test_single_element_list_grouping(): ) msg = ( "In a future version of pandas, a length 1 " - "tuple will be returned when grouping by a " - "list of length 1. Don't supply a list with " - "a single grouper to avoid this warning." + "tuple will be returned when iterating over a " + "a groupby with a grouper equal to a list of " + "length 1. Don't supply a list with a single grouper " + "to avoid this warning." ) with tm.assert_produces_warning(FutureWarning, match=msg): values, _ = next(iter(df.groupby(["a"]))) From 1b93ae1c86a1933a86f4c0deb44e030ebcb28386 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 1 Aug 2022 12:55:39 -0700 Subject: [PATCH 91/91] Update doc/source/whatsnew/v1.5.0.rst Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 5567f30c47c2c..16dda65ecaf98 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -774,7 +774,7 @@ Other Deprecations - Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`) - Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) - Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`) -- Deprecated producing a single element when iterating over a :class:`Pandas.groupby` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`) +- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`) .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: