diff --git a/modin/core/dataframe/algebra/default2pandas/default.py b/modin/core/dataframe/algebra/default2pandas/default.py index 23078bd9c7b..4d01a256221 100644 --- a/modin/core/dataframe/algebra/default2pandas/default.py +++ b/modin/core/dataframe/algebra/default2pandas/default.py @@ -128,6 +128,10 @@ def applyier(df, *args, **kwargs): and func not in ("rdivmod", pandas.Series.rdivmod) and func not in ("to_list", pandas.Series.to_list) and func not in ("to_dict", pandas.Series.to_dict) + and func not in ("mean", pandas.DataFrame.mean) + and func not in ("median", pandas.DataFrame.median) + and func not in ("skew", pandas.DataFrame.skew) + and func not in ("kurt", pandas.DataFrame.kurt) ): # When applying a DatetimeProperties or TimedeltaProperties function, # if we don't specify the dtype for the DataFrame, the frame might diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py index ab72a882703..7f522a436f2 100644 --- a/modin/core/storage_formats/pandas/query_compiler.py +++ b/modin/core/storage_formats/pandas/query_compiler.py @@ -900,7 +900,7 @@ def reduce_func(df, **kwargs): return TreeReduce.register(map_func, reduce_func)(self, axis=axis, **kwargs) def mean(self, axis, **kwargs): - if kwargs.get("level") is not None: + if kwargs.get("level") is not None or axis is None: return self.default_to_pandas(pandas.DataFrame.mean, axis=axis, **kwargs) skipna = kwargs.get("skipna", True) @@ -949,10 +949,24 @@ def reduce_fn(df, **kwargs): # Reduce operations idxmax = Reduce.register(pandas.DataFrame.idxmax) idxmin = Reduce.register(pandas.DataFrame.idxmin) - median = Reduce.register(pandas.DataFrame.median) + + def median(self, axis, **kwargs): + if axis is None: + return self.default_to_pandas(pandas.DataFrame.median, axis=axis, **kwargs) + return Reduce.register(pandas.DataFrame.median)(self, axis=axis, **kwargs) + nunique = Reduce.register(pandas.DataFrame.nunique) - skew = Reduce.register(pandas.DataFrame.skew) - kurt = Reduce.register(pandas.DataFrame.kurt) + + def skew(self, axis, **kwargs): + if axis is None: + return self.default_to_pandas(pandas.DataFrame.skew, axis=axis, **kwargs) + return Reduce.register(pandas.DataFrame.skew)(self, axis=axis, **kwargs) + + def kurt(self, axis, **kwargs): + if axis is None: + return self.default_to_pandas(pandas.DataFrame.kurt, axis=axis, **kwargs) + return Reduce.register(pandas.DataFrame.kurt)(self, axis=axis, **kwargs) + sem = Reduce.register(pandas.DataFrame.sem) std = Reduce.register(pandas.DataFrame.std) var = Reduce.register(pandas.DataFrame.var) diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 2ab71c01f1c..d6a513851f2 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -1779,22 +1779,7 @@ def iloc(self): # noqa: RT01, D200 @_inherit_docstrings(pandas.DataFrame.kurt, apilink="pandas.DataFrame.kurt") def kurt(self, axis=0, skipna=True, numeric_only=False, **kwargs): - validate_bool_kwarg(skipna, "skipna", none_allowed=False) - axis = self._get_axis_number(axis) - - if not numeric_only: - self._validate_dtypes(numeric_only=True) - - data = self._get_numeric_data(axis) if numeric_only else self - - return self._reduce_dimension( - data._query_compiler.kurt( - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - ) + return self._stat_operation("kurt", axis, skipna, numeric_only, **kwargs) kurtosis = kurt @@ -1867,9 +1852,10 @@ def max( Return the maximum of the values over the requested axis. """ validate_bool_kwarg(skipna, "skipna", none_allowed=False) + orig_axis = axis axis = self._get_axis_number(axis) data = self._validate_dtypes_min_max(axis, numeric_only) - return data._reduce_dimension( + res = data._reduce_dimension( data._query_compiler.max( axis=axis, skipna=skipna, @@ -1877,6 +1863,49 @@ def max( **kwargs, ) ) + if orig_axis is None: + res = res._reduce_dimension( + res._query_compiler.max( + axis=0, + skipna=skipna, + numeric_only=False, + **kwargs, + ) + ) + return res + + def min( + self, + axis: Axis = 0, + skipna: bool = True, + numeric_only=False, + **kwargs, + ): # noqa: PR01, RT01, D200 + """ + Return the minimum of the values over the requested axis. + """ + validate_bool_kwarg(skipna, "skipna", none_allowed=False) + orig_axis = axis + axis = self._get_axis_number(axis) + data = self._validate_dtypes_min_max(axis, numeric_only) + res = data._reduce_dimension( + data._query_compiler.min( + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + ) + if orig_axis is None: + res = res._reduce_dimension( + res._query_compiler.min( + axis=0, + skipna=skipna, + numeric_only=False, + **kwargs, + ) + ) + return res def _stat_operation( self, @@ -1911,7 +1940,7 @@ def _stat_operation( self is DataFrame and level is not specified. `DataFrame` - self is DataFrame and level is specified. """ - axis = self._get_axis_number(axis) + axis = self._get_axis_number(axis) if axis is not None else None validate_bool_kwarg(skipna, "skipna", none_allowed=False) if op_name == "median": numpy_compat.function.validate_median((), kwargs) @@ -1924,14 +1953,23 @@ def _stat_operation( if not numeric_only: self._validate_dtypes(numeric_only=True) - data = self._get_numeric_data(axis) if numeric_only else self + data = ( + self._get_numeric_data(axis if axis is not None else 0) + if numeric_only + else self + ) result_qc = getattr(data._query_compiler, op_name)( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs, ) - return self._reduce_dimension(result_qc) + return ( + self._reduce_dimension(result_qc) + if isinstance(result_qc, type(self._query_compiler)) + # scalar case + else result_qc + ) def memory_usage(self, index=True, deep=False): # noqa: PR01, RT01, D200 """ @@ -1941,28 +1979,6 @@ def memory_usage(self, index=True, deep=False): # noqa: PR01, RT01, D200 self._query_compiler.memory_usage(index=index, deep=deep) ) - def min( - self, - axis: Axis = 0, - skipna: bool = True, - numeric_only=False, - **kwargs, - ): # noqa: PR01, RT01, D200 - """ - Return the minimum of the values over the requested axis. - """ - validate_bool_kwarg(skipna, "skipna", none_allowed=False) - axis = self._get_axis_number(axis) - data = self._validate_dtypes_min_max(axis, numeric_only) - return data._reduce_dimension( - data._query_compiler.min( - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - ) - def mod( self, other, axis="columns", level=None, fill_value=None ): # noqa: PR01, RT01, D200 diff --git a/modin/pandas/series.py b/modin/pandas/series.py index a27b0ef03ae..d873ab7f488 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -1135,21 +1135,6 @@ def keys(self): # noqa: RT01, D200 """ return self.index - def kurt( - self, - axis: Axis = 0, - skipna=True, - numeric_only=False, - **kwargs, - ): # noqa: PR01, RT01, D200 - """ - Return unbiased kurtosis over requested axis. - """ - axis = self._get_axis_number(axis) - return super(Series, self).kurt(axis, skipna, numeric_only, **kwargs) - - kurtosis = kurt - def le(self, other, level=None, fill_value=None, axis=0): # noqa: PR01, RT01, D200 """ Return less than or equal to of series and `other`, element-wise (binary operator `le`). diff --git a/modin/pandas/test/dataframe/test_reduce.py b/modin/pandas/test/dataframe/test_reduce.py index ffbb9e5986e..8e1176acfca 100644 --- a/modin/pandas/test/dataframe/test_reduce.py +++ b/modin/pandas/test/dataframe/test_reduce.py @@ -332,7 +332,7 @@ def test_sum_single_column(data): @pytest.mark.parametrize( "fn", ["max", "min", "median", "mean", "skew", "kurt", "sem", "std", "var"] ) -@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize("axis", [0, 1, None]) @pytest.mark.parametrize( "numeric_only", bool_arg_values, ids=arg_keys("numeric_only", bool_arg_keys) )