diff --git a/docs/supported_apis/series_supported.rst b/docs/supported_apis/series_supported.rst index a6b31d05a28..c30ff5977fe 100644 --- a/docs/supported_apis/series_supported.rst +++ b/docs/supported_apis/series_supported.rst @@ -474,10 +474,8 @@ the related section on `Defaulting to pandas`_. +-----------------------------+---------------------------------+----------------------------------------------------+ | ``valid`` | D | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``value_counts`` | Y | The indices of resulting object will be in | -| | | descending (ascending, if ascending=True) order for| -| | | equal values. | -| | | In pandas indices are located in random order. | +| ``value_counts`` | Y | The indices order of resulting object may differ | +| | | from pandas. | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``values`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/supported_apis/utilities_supported.rst b/docs/supported_apis/utilities_supported.rst index 89e5e66a79a..f928c3600a8 100644 --- a/docs/supported_apis/utilities_supported.rst +++ b/docs/supported_apis/utilities_supported.rst @@ -21,10 +21,8 @@ default to pandas. +---------------------------+---------------------------------+----------------------------------------------------+ | `pd.unique`_ | Y | | +---------------------------+---------------------------------+----------------------------------------------------+ -| ``pd.value_counts`` | Y | The indices of resulting object will be in | -| | | descending (ascending, if ascending=True) order for| -| | | equal values. | -| | | In pandas indices are located in random order. | +| ``pd.value_counts`` | Y | The indices order of resulting object may differ | +| | | from pandas. | +---------------------------+---------------------------------+----------------------------------------------------+ | `pd.cut`_ | D | | +---------------------------+---------------------------------+----------------------------------------------------+ diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index 4f35a3133ae..bd5eba110e8 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -750,58 +750,7 @@ def reduce_func(df, *args, **kwargs): if normalize: result = result / df.squeeze(axis=1).sum() - result = result.sort_values(ascending=ascending) if sort else result - - # We want to sort both values and indices of the result object. - # This function will sort indices for equal values. - def sort_index_for_equal_values(result, ascending): - """ - Sort indices for equal values of result object. - - Parameters - ---------- - result : pandas.Series or pandas.DataFrame with one column - The object whose indices for equal values is needed to sort. - ascending : boolean - Sort in ascending (if it is True) or descending (if it is False) order. - - Returns - ------- - pandas.DataFrame - A new DataFrame with sorted indices. - """ - is_range = False - is_end = False - i = 0 - new_index = np.empty(len(result), dtype=type(result.index)) - while i < len(result): - j = i - if i < len(result) - 1: - while result[result.index[i]] == result[result.index[i + 1]]: - i += 1 - if is_range is False: - is_range = True - if i == len(result) - 1: - is_end = True - break - if is_range: - k = j - for val in sorted( - result.index[j : i + 1], reverse=not ascending - ): - new_index[k] = val - k += 1 - if is_end: - break - is_range = False - else: - new_index[j] = result.index[j] - i += 1 - return pandas.DataFrame( - result, index=new_index, columns=["__reduced__"] - ) - - return sort_index_for_equal_values(result, ascending) + return result.sort_values(ascending=ascending) if sort else result return MapReduceFunction.register( map_func, reduce_func, axis=0, preserve_index=False diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py index e70852c8723..cab6ddc6c9d 100644 --- a/modin/pandas/test/test_general.py +++ b/modin/pandas/test/test_general.py @@ -352,23 +352,29 @@ def sort_index_for_equal_values(result, ascending): else: new_index[j] = result.index[j] i += 1 - return pandas.Series(result, index=new_index) + return type(result)(result, index=new_index) - # We sort indices for pandas result because of issue #1650 + # We sort indices for Modin and pandas result because of issue #1650 values = np.array([3, 1, 2, 3, 4, np.nan]) - modin_result = pd.value_counts(values, normalize=normalize, ascending=False) + modin_result = sort_index_for_equal_values( + pd.value_counts(values, normalize=normalize, ascending=False), False + ) pandas_result = sort_index_for_equal_values( pandas.value_counts(values, normalize=normalize, ascending=False), False ) df_equals(modin_result, pandas_result) - modin_result = pd.value_counts(values, bins=bins, ascending=False) + modin_result = sort_index_for_equal_values( + pd.value_counts(values, bins=bins, ascending=False), False + ) pandas_result = sort_index_for_equal_values( pandas.value_counts(values, bins=bins, ascending=False), False ) df_equals(modin_result, pandas_result) - modin_result = pd.value_counts(values, dropna=dropna, ascending=True) + modin_result = sort_index_for_equal_values( + pd.value_counts(values, dropna=dropna, ascending=True), True + ) pandas_result = sort_index_for_equal_values( pandas.value_counts(values, dropna=dropna, ascending=True), True ) diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py index 1f40d7a590e..86e16ed4a6a 100644 --- a/modin/pandas/test/test_series.py +++ b/modin/pandas/test/test_series.py @@ -21,7 +21,7 @@ from pandas.core.base import SpecificationError import sys -from modin.utils import to_pandas, get_current_backend +from modin.utils import to_pandas from .utils import ( random_state, RAND_LOW, @@ -3376,33 +3376,28 @@ def sort_index_for_equal_values(result, ascending): i += 1 return type(result)(result, index=new_index) - # We sort indices for pandas result because of issue #1650 + # We sort indices for Modin and pandas result because of issue #1650 modin_series, pandas_series = create_test_series(test_data_values[0]) - modin_result = modin_series.value_counts(normalize=normalize, ascending=False) - - if get_current_backend() == "BaseOnPython": - modin_result = sort_index_for_equal_values(modin_result, ascending=False) + modin_result = sort_index_for_equal_values( + modin_series.value_counts(normalize=normalize, ascending=False), False + ) pandas_result = sort_index_for_equal_values( pandas_series.value_counts(normalize=normalize, ascending=False), False ) df_equals(modin_result, pandas_result) - modin_result = modin_series.value_counts(bins=bins, ascending=False) - - if get_current_backend() == "BaseOnPython": - modin_result = sort_index_for_equal_values(modin_result, ascending=False) - + modin_result = sort_index_for_equal_values( + modin_series.value_counts(bins=bins, ascending=False), False + ) pandas_result = sort_index_for_equal_values( pandas_series.value_counts(bins=bins, ascending=False), False ) df_equals(modin_result, pandas_result) - modin_result = modin_series.value_counts(dropna=dropna, ascending=True) - - if get_current_backend() == "BaseOnPython": - modin_result = sort_index_for_equal_values(modin_result, ascending=True) - + modin_result = sort_index_for_equal_values( + modin_series.value_counts(dropna=dropna, ascending=True), True + ) pandas_result = sort_index_for_equal_values( pandas_series.value_counts(dropna=dropna, ascending=True), True ) @@ -3412,20 +3407,20 @@ def sort_index_for_equal_values(result, ascending): arr = np.random.rand(2 ** 6) arr[::10] = np.nan modin_series, pandas_series = create_test_series(arr) - modin_result = modin_series.value_counts(dropna=False, ascending=True) + modin_result = sort_index_for_equal_values( + modin_series.value_counts(dropna=False, ascending=True), True + ) pandas_result = sort_index_for_equal_values( pandas_series.value_counts(dropna=False, ascending=True), True ) - if get_current_backend() == "BaseOnPython": - modin_result = sort_index_for_equal_values(modin_result, ascending=True) df_equals(modin_result, pandas_result) - modin_result = modin_series.value_counts(dropna=False, ascending=False) + modin_result = sort_index_for_equal_values( + modin_series.value_counts(dropna=False, ascending=False), False + ) pandas_result = sort_index_for_equal_values( pandas_series.value_counts(dropna=False, ascending=False), False ) - if get_current_backend() == "BaseOnPython": - modin_result = sort_index_for_equal_values(modin_result, ascending=False) df_equals(modin_result, pandas_result)