FIX-#1869: index sort for count(level=...) (#1870)

* FIX-#1869: issue fix Signed-off-by: Alexander Myskov <alexander.myskov@intel.com> * FIX-#1869: remove comment Signed-off-by: Alexander Myskov <alexander.myskov@intel.com> * FIX-#1869: sort by _handle_level_agg Signed-off-by: Alexander Myskov <alexander.myskov@intel.com>
modin-project · Aug 5, 2020 · 19fd1b4 · 19fd1b4
1 parent e517a09
commit 19fd1b4
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 5 deletions.
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
@@ -119,14 +119,14 @@ def _update_inplace(self, new_query_compiler):
             sib._query_compiler = new_query_compiler
         old_query_compiler.free()
 
-    def _handle_level_agg(self, axis, level, op, **kwargs):
+    def _handle_level_agg(self, axis, level, op, sort=False, **kwargs):
         """Helper method to perform error checking for aggregation functions with a level parameter.
         Args:
             axis: The axis to apply the operation on
             level: The level of the axis to apply the operation on
             op: String representation of the operation to be performed on the level
         """
-        return getattr(self.groupby(level=level, axis=axis, sort=False), op)(**kwargs)
+        return getattr(self.groupby(level=level, axis=axis, sort=sort), op)(**kwargs)
 
     def _validate_other(
         self,
@@ -752,7 +752,7 @@ def count(self, axis=0, level=None, numeric_only=False):
                 # error thrown by pandas
                 raise TypeError("Can only count levels on hierarchical columns.")
 
-            return self._handle_level_agg(axis, level, "count")
+            return self._handle_level_agg(axis=axis, level=level, op="count", sort=True)
 
         return self._reduce_dimension(
             self._query_compiler.count(

diff --git a/modin/pandas/test/test_dataframe.py b/modin/pandas/test/test_dataframe.py
@@ -31,6 +31,7 @@
     df_is_empty,
     arg_keys,
     name_contains,
+    test_data,
     test_data_values,
     test_data_keys,
     test_data_with_duplicates_values,
@@ -5393,6 +5394,26 @@ def test___len__(self, data):
 
         assert len(modin_df) == len(pandas_df)
 
+    def test_index_order(self):
+        # see #1708 and #1869 for details
+        df_modin, df_pandas = (
+            pd.DataFrame(test_data["dense_nan_data"]),
+            pandas.DataFrame(test_data["dense_nan_data"]),
+        )
+        rows_number = len(df_modin.index)
+        level_0 = np.random.choice([x for x in range(10)], rows_number)
+        level_1 = np.random.choice([x for x in range(10)], rows_number)
+        index = pandas.MultiIndex.from_arrays([level_0, level_1])
+
+        df_modin.index = index
+        df_pandas.index = index
+
+        for func in ["all", "any", "mad", "count"]:
+            df_equals(
+                getattr(df_modin, func)(level=0).index,
+                getattr(df_pandas, func)(level=0).index,
+            )
+
 
 class TestDataFrameIter:
     @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)

diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
@@ -1835,7 +1835,7 @@ def test_last():
 
 
 def test_index_order():
-    # see #1708 for details
+    # see #1708 and #1869 for details
     s_modin, s_pandas = create_test_series(test_data["dense_nan_data"])
     rows_number = len(s_modin.index)
     level_0 = np.random.choice([x for x in range(10)], rows_number)
@@ -1845,7 +1845,7 @@ def test_index_order():
     s_modin.index = index
     s_pandas.index = index
 
-    for func in ["all", "any", "mad"]:
+    for func in ["all", "any", "mad", "count"]:
         df_equals(
             getattr(s_modin, func)(level=0).index,
             getattr(s_pandas, func)(level=0).index,