diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 1e4f25a2480..19126f825e4 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -119,14 +119,14 @@ def _update_inplace(self, new_query_compiler): sib._query_compiler = new_query_compiler old_query_compiler.free() - def _handle_level_agg(self, axis, level, op, **kwargs): + def _handle_level_agg(self, axis, level, op, sort=False, **kwargs): """Helper method to perform error checking for aggregation functions with a level parameter. Args: axis: The axis to apply the operation on level: The level of the axis to apply the operation on op: String representation of the operation to be performed on the level """ - return getattr(self.groupby(level=level, axis=axis, sort=False), op)(**kwargs) + return getattr(self.groupby(level=level, axis=axis, sort=sort), op)(**kwargs) def _validate_other( self, @@ -752,7 +752,7 @@ def count(self, axis=0, level=None, numeric_only=False): # error thrown by pandas raise TypeError("Can only count levels on hierarchical columns.") - return self._handle_level_agg(axis, level, "count") + return self._handle_level_agg(axis=axis, level=level, op="count", sort=True) return self._reduce_dimension( self._query_compiler.count( diff --git a/modin/pandas/test/test_dataframe.py b/modin/pandas/test/test_dataframe.py index cb24e219078..66a2f376994 100644 --- a/modin/pandas/test/test_dataframe.py +++ b/modin/pandas/test/test_dataframe.py @@ -31,6 +31,7 @@ df_is_empty, arg_keys, name_contains, + test_data, test_data_values, test_data_keys, test_data_with_duplicates_values, @@ -5393,6 +5394,26 @@ def test___len__(self, data): assert len(modin_df) == len(pandas_df) + def test_index_order(self): + # see #1708 and #1869 for details + df_modin, df_pandas = ( + pd.DataFrame(test_data["dense_nan_data"]), + pandas.DataFrame(test_data["dense_nan_data"]), + ) + rows_number = len(df_modin.index) + level_0 = np.random.choice([x for x in range(10)], rows_number) + level_1 = np.random.choice([x for x in range(10)], rows_number) + index = pandas.MultiIndex.from_arrays([level_0, level_1]) + + df_modin.index = index + df_pandas.index = index + + for func in ["all", "any", "mad", "count"]: + df_equals( + getattr(df_modin, func)(level=0).index, + getattr(df_pandas, func)(level=0).index, + ) + class TestDataFrameIter: @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py index 7842d127ee9..c4cdddf235e 100644 --- a/modin/pandas/test/test_series.py +++ b/modin/pandas/test/test_series.py @@ -1835,7 +1835,7 @@ def test_last(): def test_index_order(): - # see #1708 for details + # see #1708 and #1869 for details s_modin, s_pandas = create_test_series(test_data["dense_nan_data"]) rows_number = len(s_modin.index) level_0 = np.random.choice([x for x in range(10)], rows_number) @@ -1845,7 +1845,7 @@ def test_index_order(): s_modin.index = index s_pandas.index = index - for func in ["all", "any", "mad"]: + for func in ["all", "any", "mad", "count"]: df_equals( getattr(s_modin, func)(level=0).index, getattr(s_pandas, func)(level=0).index,