diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee1f36e637c..b7f2f015918 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: with: python-version: "3.7.x" architecture: "x64" - - run: pip install black + - run: pip install "black==19.10b0" - run: black --check --diff modin/ lint-flake8: diff --git a/modin/pandas/test/test_dataframe.py b/modin/pandas/test/test_dataframe.py index 3aa43bce7ae..2340468ec02 100644 --- a/modin/pandas/test/test_dataframe.py +++ b/modin/pandas/test/test_dataframe.py @@ -62,6 +62,7 @@ udf_func_values, udf_func_keys, generate_multiindex, + test_bool_data, ) pd.DEFAULT_NPARTITIONS = 4 @@ -3005,190 +3006,57 @@ def test_hasattr_sparse(self, data): class TestDataFrameReduction_A: @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) - @pytest.mark.parametrize("axis", axis_values, ids=axis_keys) - @pytest.mark.parametrize( - "skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys) - ) - @pytest.mark.parametrize( - "bool_only", bool_arg_values, ids=arg_keys("bool_only", bool_arg_keys) - ) - def test_all(self, data, axis, skipna, bool_only): - modin_df = pd.DataFrame(data) - pandas_df = pandas.DataFrame(data) - - try: - pandas_result = pandas_df.all(axis=axis, skipna=skipna, bool_only=bool_only) - except Exception as e: - with pytest.raises(type(e)): - modin_df.all(axis=axis, skipna=skipna, bool_only=bool_only) - else: - modin_result = modin_df.all(axis=axis, skipna=skipna, bool_only=bool_only) - df_equals(modin_result, pandas_result) - - # Test when axis is None. This will get repeated but easier than using list in parameterize decorator - try: - pandas_result = pandas_df.all(axis=None, skipna=skipna, bool_only=bool_only) - except Exception as e: - with pytest.raises(type(e)): - modin_df.all(axis=None, skipna=skipna, bool_only=bool_only) - else: - modin_result = modin_df.all(axis=None, skipna=skipna, bool_only=bool_only) - df_equals(modin_result, pandas_result) - - try: - pandas_result = pandas_df.T.all( - axis=axis, skipna=skipna, bool_only=bool_only - ) - except Exception as e: - with pytest.raises(type(e)): - modin_df.T.all(axis=axis, skipna=skipna, bool_only=bool_only) - else: - modin_result = modin_df.T.all(axis=axis, skipna=skipna, bool_only=bool_only) - df_equals(modin_result, pandas_result) - - # Test when axis is None. This will get repeated but easier than using list in parameterize decorator - try: - pandas_result = pandas_df.T.all( - axis=None, skipna=skipna, bool_only=bool_only - ) - except Exception as e: - with pytest.raises(type(e)): - modin_df.T.all(axis=None, skipna=skipna, bool_only=bool_only) - else: - modin_result = modin_df.T.all(axis=None, skipna=skipna, bool_only=bool_only) - df_equals(modin_result, pandas_result) - - # test level - modin_df_multi_level = modin_df.copy() - pandas_df_multi_level = pandas_df.copy() - axis = modin_df._get_axis_number(axis) if axis is not None else 0 - levels = 3 - axis_names_list = [["a", "b", "c"], None] - for axis_names in axis_names_list: - if axis == 0: - new_idx = pandas.MultiIndex.from_tuples( - [(i // 4, i // 2, i) for i in range(len(modin_df.index))], - names=axis_names, - ) - modin_df_multi_level.index = new_idx - pandas_df_multi_level.index = new_idx - else: - new_col = pandas.MultiIndex.from_tuples( - [(i // 4, i // 2, i) for i in range(len(modin_df.columns))], - names=axis_names, - ) - modin_df_multi_level.columns = new_col - pandas_df_multi_level.columns = new_col - - for level in list(range(levels)) + (axis_names if axis_names else []): - try: - pandas_multi_level_result = pandas_df_multi_level.all( - axis=axis, bool_only=bool_only, level=level, skipna=skipna - ) - - except Exception as e: - with pytest.raises(type(e)): - modin_df_multi_level.all( - axis=axis, bool_only=bool_only, level=level, skipna=skipna - ) - else: - modin_multi_level_result = modin_df_multi_level.all( - axis=axis, bool_only=bool_only, level=level, skipna=skipna - ) + @pytest.mark.parametrize("axis", [None, 0, 1]) + @pytest.mark.parametrize("method", ["all", "any"]) + def test_all_any_default(self, data, axis, method): + modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) - df_equals(modin_multi_level_result, pandas_multi_level_result) + eval_general( + modin_df, + pandas_df, + lambda df: getattr(df, method)(axis=axis, skipna=True, bool_only=None), + ) - @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) - @pytest.mark.parametrize("axis", axis_values, ids=axis_keys) @pytest.mark.parametrize( "skipna", bool_arg_values, ids=arg_keys("skipna", bool_arg_keys) ) @pytest.mark.parametrize( "bool_only", bool_arg_values, ids=arg_keys("bool_only", bool_arg_keys) ) - def test_any(self, data, axis, skipna, bool_only): - modin_df = pd.DataFrame(data) - pandas_df = pandas.DataFrame(data) + @pytest.mark.parametrize("method", ["all", "any"]) + @pytest.mark.parametrize("transpose", [False, True]) + @pytest.mark.parametrize("data", [test_data["dense_nan_data"], test_bool_data]) + def test_all_any(self, data, skipna, bool_only, method, transpose): + modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) - try: - pandas_result = pandas_df.any(axis=axis, skipna=skipna, bool_only=bool_only) - except Exception as e: - with pytest.raises(type(e)): - modin_df.any(axis=axis, skipna=skipna, bool_only=bool_only) - else: - modin_result = modin_df.any(axis=axis, skipna=skipna, bool_only=bool_only) - df_equals(modin_result, pandas_result) + if transpose: + modin_df, pandas_df = modin_df.T, pandas_df.T - try: - pandas_result = pandas_df.any(axis=None, skipna=skipna, bool_only=bool_only) - except Exception as e: - with pytest.raises(type(e)): - modin_df.any(axis=None, skipna=skipna, bool_only=bool_only) - else: - modin_result = modin_df.any(axis=None, skipna=skipna, bool_only=bool_only) - df_equals(modin_result, pandas_result) + eval_general( + modin_df, + pandas_df, + lambda df: getattr(df, method)(skipna=skipna, bool_only=bool_only), + ) - try: - pandas_result = pandas_df.T.any( - axis=axis, skipna=skipna, bool_only=bool_only - ) - except Exception as e: - with pytest.raises(type(e)): - modin_df.T.any(axis=axis, skipna=skipna, bool_only=bool_only) - else: - modin_result = modin_df.T.any(axis=axis, skipna=skipna, bool_only=bool_only) - df_equals(modin_result, pandas_result) + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("level", [-1, 0, 1]) + @pytest.mark.parametrize("method", ["all", "any"]) + def test_all_level(self, axis, level, method): + data = test_data_values[0] + modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) - try: - pandas_result = pandas_df.T.any( - axis=None, skipna=skipna, bool_only=bool_only - ) - except Exception as e: - with pytest.raises(type(e)): - modin_df.T.any(axis=None, skipna=skipna, bool_only=bool_only) + if axis == 0: + new_idx = generate_multiindex(len(modin_df.index)) + modin_df.index = new_idx + pandas_df.index = new_idx else: - modin_result = modin_df.T.any(axis=None, skipna=skipna, bool_only=bool_only) - df_equals(modin_result, pandas_result) + new_col = generate_multiindex(len(modin_df.columns)) + modin_df.columns = new_col + pandas_df.columns = new_col - # test level - modin_df_multi_level = modin_df.copy() - pandas_df_multi_level = pandas_df.copy() - axis = modin_df._get_axis_number(axis) if axis is not None else 0 - levels = 3 - axis_names_list = [["a", "b", "c"], None] - for axis_names in axis_names_list: - if axis == 0: - new_idx = pandas.MultiIndex.from_tuples( - [(i // 4, i // 2, i) for i in range(len(modin_df.index))], - names=axis_names, - ) - modin_df_multi_level.index = new_idx - pandas_df_multi_level.index = new_idx - else: - new_col = pandas.MultiIndex.from_tuples( - [(i // 4, i // 2, i) for i in range(len(modin_df.columns))], - names=axis_names, - ) - modin_df_multi_level.columns = new_col - pandas_df_multi_level.columns = new_col - - for level in list(range(levels)) + (axis_names if axis_names else []): - try: - pandas_multi_level_result = pandas_df_multi_level.any( - axis=axis, bool_only=bool_only, level=level, skipna=skipna - ) - - except Exception as e: - with pytest.raises(type(e)): - modin_df_multi_level.any( - axis=axis, bool_only=bool_only, level=level, skipna=skipna - ) - else: - modin_multi_level_result = modin_df_multi_level.any( - axis=axis, bool_only=bool_only, level=level, skipna=skipna - ) - - df_equals(modin_multi_level_result, pandas_multi_level_result) + eval_general( + modin_df, pandas_df, lambda df: getattr(df, method)(axis=axis, level=level), + ) class TestDataFrameReduction_B: diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py index 8c794b0cfc6..2ae987f2d45 100644 --- a/modin/pandas/test/utils.py +++ b/modin/pandas/test/utils.py @@ -127,6 +127,13 @@ test_data_values = list(test_data.values()) test_data_keys = list(test_data.keys()) +test_bool_data = { + "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): random_state.choice( + [True, False], size=(NROWS) + ) + for i in range(NCOLS) +} + test_data_with_duplicates = { "no_duplicates": { "col{}".format(int((i - NCOLS / 2) % NCOLS + 1)): range(NROWS) @@ -681,10 +688,10 @@ def generate_multiindex(index): return df1, df2 -def generate_multiindex(cols_number): +def generate_multiindex(elements_number): arrays = [ - random_state.choice(["bar", "baz", "foo", "qux"], cols_number), - random_state.choice(["one", "two"], cols_number), + random_state.choice(["bar", "baz", "foo", "qux"], elements_number), + random_state.choice(["one", "two"], elements_number), ] return pd.MultiIndex.from_tuples(list(zip(*arrays)), names=["first", "second"])