From bf86a8d9abea8d2efbf8aae6775103c796d67543 Mon Sep 17 00:00:00 2001 From: Thomas Lautenschlaeger Date: Sat, 26 Nov 2022 02:40:02 +0100 Subject: [PATCH] applied patch --- pandas/core/apply.py | 15 +++++---------- pandas/core/frame.py | 9 ++++++--- pandas/core/shared_docs.py | 4 ++++ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index b8056eff3fb2f..9ae5d35b91523 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -64,7 +64,6 @@ from pandas.core.resample import Resampler from pandas.core.window.rolling import BaseWindow - ResType = Dict[int, Any] @@ -285,10 +284,9 @@ def transform_str_or_callable(self, func) -> DataFrame | Series: return func(obj, *args, **kwargs) def _filter_numeric_only(self) -> list[Any]: - if "numeric_only" in self.kwargs and self.kwargs["numeric_only"] is True: + if "numeric_only" in self.kwargs and bool(self.kwargs["numeric_only"]) is True: obj = self.obj._get_numeric_data() - filtered_cols = list(set(self.obj) - set(obj)) - self.obj = obj + filtered_cols = list(obj) return filtered_cols return [] @@ -302,8 +300,9 @@ def agg_list_like(self) -> DataFrame | Series: """ from pandas.core.reshape.concat import concat - self._filter_numeric_only() - obj = self.obj + filtered_cols = self._filter_numeric_only() + n = len(filtered_cols) + obj = self.obj if n == 0 else self.obj[filtered_cols].astype("O") arg = cast(List[AggFuncTypeBase], self.f) if getattr(obj, "axis", 0) == 1: @@ -377,11 +376,8 @@ def agg_dict_like(self) -> DataFrame | Series: from pandas import Index from pandas.core.reshape.concat import concat - filtered_col = self._filter_numeric_only() - obj = self.obj arg = cast(AggFuncTypeDict, self.f) - arg = {k: arg[k] for k in arg.keys() if k not in filtered_col} if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") @@ -1181,7 +1177,6 @@ def reconstruct_func( if not relabeling: if isinstance(func, list) and len(func) > len(set(func)): - # GH 28426 will raise error if duplicated function names are used and # there is no reassigned name raise SpecificationError( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e3722eb95c03a..9df0febe24a67 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9221,10 +9221,10 @@ def _gotitem( ... ['c', 3, 6]], ... columns=['A', 'B', 'C']) - Works equivalently as above. Add argument `numeric_only=True` to avoid - exceptions or warnings. + Works equivalently as above. Add argument `numeric_only=True` to + aggregate only numeric columns. - >>> df.agg({'A': 'mean', 'B': [pd.DataFrame.mean, 'std'], 'C': ['sum', 'mean']}, + >>> df.agg({'B': ['mean', 'std'], 'C': ['sum', 'mean']}, ... numeric_only=True) B C mean 2.0 5.0 @@ -9394,6 +9394,9 @@ def apply( Functions that mutate the passed object can produce unexpected behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` for more details. + Use the keyword argument `numeric_only=True` to apply functions + only to numeric columns and to skip the non-numeric columns. + e.g. the column contains a string. Examples -------- diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 07dc203e556e8..fc8043ed99c3f 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -46,6 +46,10 @@ for more details. A passed user-defined-function will be passed a Series for evaluation. + +Use the keyword argument `numeric_only=True` to apply functions +only to numeric columns and to skip the non-numeric columns. +e.g. the column contains a string. {examples}""" _shared_docs[