Skip to content

Commit

Permalink
FIX-modin-project#1869: index sort for count(level=...) (modin-projec…
Browse files Browse the repository at this point in the history
…t#1870)

* FIX-modin-project#1869: issue fix

Signed-off-by: Alexander Myskov <alexander.myskov@intel.com>

* FIX-modin-project#1869: remove comment

Signed-off-by: Alexander Myskov <alexander.myskov@intel.com>

* FIX-modin-project#1869: sort by _handle_level_agg

Signed-off-by: Alexander Myskov <alexander.myskov@intel.com>
  • Loading branch information
amyskov authored and aregm committed Sep 16, 2020
1 parent 8739fd9 commit 7fd2476
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 5 deletions.
6 changes: 3 additions & 3 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,14 @@ def _update_inplace(self, new_query_compiler):
sib._query_compiler = new_query_compiler
old_query_compiler.free()

def _handle_level_agg(self, axis, level, op, **kwargs):
def _handle_level_agg(self, axis, level, op, sort=False, **kwargs):
"""Helper method to perform error checking for aggregation functions with a level parameter.
Args:
axis: The axis to apply the operation on
level: The level of the axis to apply the operation on
op: String representation of the operation to be performed on the level
"""
return getattr(self.groupby(level=level, axis=axis, sort=False), op)(**kwargs)
return getattr(self.groupby(level=level, axis=axis, sort=sort), op)(**kwargs)

def _validate_other(
self,
Expand Down Expand Up @@ -752,7 +752,7 @@ def count(self, axis=0, level=None, numeric_only=False):
# error thrown by pandas
raise TypeError("Can only count levels on hierarchical columns.")

return self._handle_level_agg(axis, level, "count")
return self._handle_level_agg(axis=axis, level=level, op="count", sort=True)

return self._reduce_dimension(
self._query_compiler.count(
Expand Down
21 changes: 21 additions & 0 deletions modin/pandas/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
df_is_empty,
arg_keys,
name_contains,
test_data,
test_data_values,
test_data_keys,
test_data_with_duplicates_values,
Expand Down Expand Up @@ -5393,6 +5394,26 @@ def test___len__(self, data):

assert len(modin_df) == len(pandas_df)

def test_index_order(self):
# see #1708 and #1869 for details
df_modin, df_pandas = (
pd.DataFrame(test_data["dense_nan_data"]),
pandas.DataFrame(test_data["dense_nan_data"]),
)
rows_number = len(df_modin.index)
level_0 = np.random.choice([x for x in range(10)], rows_number)
level_1 = np.random.choice([x for x in range(10)], rows_number)
index = pandas.MultiIndex.from_arrays([level_0, level_1])

df_modin.index = index
df_pandas.index = index

for func in ["all", "any", "mad", "count"]:
df_equals(
getattr(df_modin, func)(level=0).index,
getattr(df_pandas, func)(level=0).index,
)


class TestDataFrameIter:
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/test/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1835,7 +1835,7 @@ def test_last():


def test_index_order():
# see #1708 for details
# see #1708 and #1869 for details
s_modin, s_pandas = create_test_series(test_data["dense_nan_data"])
rows_number = len(s_modin.index)
level_0 = np.random.choice([x for x in range(10)], rows_number)
Expand All @@ -1845,7 +1845,7 @@ def test_index_order():
s_modin.index = index
s_pandas.index = index

for func in ["all", "any", "mad"]:
for func in ["all", "any", "mad", "count"]:
df_equals(
getattr(s_modin, func)(level=0).index,
getattr(s_pandas, func)(level=0).index,
Expand Down

0 comments on commit 7fd2476

Please sign in to comment.