diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index 2dc8d5fe748..99f9ed4445d 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -732,18 +732,21 @@ def reduce_func(df, *args, **kwargs): dropna = kwargs.get("dropna", True) try: - result = df.squeeze(axis=1).groupby(df.index, sort=False).sum() + result = ( + df.squeeze(axis=1) + .groupby(df.index, sort=False, dropna=dropna) + .sum() + ) # This will happen with Arrow buffer read-only errors. We don't want to copy # all the time, so this will try to fast-path the code first. except (ValueError): - result = df.copy().squeeze(axis=1).groupby(df.index, sort=False).sum() - - if not dropna and np.nan in df.index: - result = result.append( - pandas.Series( - [df.squeeze(axis=1).loc[[np.nan]].sum()], index=[np.nan] - ) + result = ( + df.copy() + .squeeze(axis=1) + .groupby(df.index, sort=False, dropna=dropna) + .sum() ) + if normalize: result = result / df.squeeze(axis=1).sum() diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py index d524537148d..8d814762b13 100644 --- a/modin/pandas/test/test_series.py +++ b/modin/pandas/test/test_series.py @@ -3444,6 +3444,26 @@ def sort_index_for_equal_values(result, ascending): ) df_equals(modin_result, pandas_result) + # from issue #2365 + arr = np.random.rand(2 ** 6) + arr[::10] = np.nan + modin_series, pandas_series = create_test_series(arr) + modin_result = modin_series.value_counts(dropna=False, ascending=True) + pandas_result = sort_index_for_equal_values( + pandas_series.value_counts(dropna=False, ascending=True), True + ) + if get_current_backend() == "BaseOnPython": + modin_result = sort_index_for_equal_values(modin_result, ascending=True) + df_equals(modin_result, pandas_result) + + modin_result = modin_series.value_counts(dropna=False, ascending=False) + pandas_result = sort_index_for_equal_values( + pandas_series.value_counts(dropna=False, ascending=False), False + ) + if get_current_backend() == "BaseOnPython": + modin_result = sort_index_for_equal_values(modin_result, ascending=False) + df_equals(modin_result, pandas_result) + @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) def test_values(data):