From aeb9ef6960c4d4836f57cb93ee4d100270ed27fc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 10 Sep 2022 00:02:58 +0200 Subject: [PATCH 1/2] BUG: Fix pyarrow groupby tests (#48443) # Conflicts: # pandas/tests/extension/test_arrow.py --- pandas/core/series.py | 5 ++- pandas/tests/extension/test_arrow.py | 49 +++++++++------------------- 2 files changed, 19 insertions(+), 35 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d2f66e9bd36e2..0e6f40564c003 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -572,7 +572,10 @@ def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None: """ labels = ensure_index(labels) - if labels._is_all_dates: + if labels._is_all_dates and not ( + type(labels) is Index and not isinstance(labels.dtype, np.dtype) + ): + # exclude e.g. timestamp[ns][pyarrow] dtype from this casting deep_labels = labels if isinstance(labels, CategoricalIndex): deep_labels = labels.categories diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9100b67edbe69..43ba651a9e60a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -31,6 +31,7 @@ pa_version_under8p0, pa_version_under9p0, ) +from pandas.errors import PerformanceWarning import pandas as pd import pandas._testing as tm @@ -515,15 +516,6 @@ def test_groupby_extension_no_sort(self, data_for_grouping, request): reason=f"pyarrow doesn't support factorizing {pa_dtype}", ) ) - elif pa.types.is_date(pa_dtype) or ( - pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None - ): - request.node.add_marker( - pytest.mark.xfail( - raises=AttributeError, - reason="GH 34986", - ) - ) super().test_groupby_extension_no_sort(data_for_grouping) def test_groupby_extension_transform(self, data_for_grouping, request): @@ -547,8 +539,7 @@ def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): pa_dtype = data_for_grouping.dtype.pyarrow_dtype - # Is there a better way to get the "series" ID for groupby_apply_op? - is_series = "series" in request.node.nodeid + # TODO: Is there a better way to get the "object" ID for groupby_apply_op? is_object = "object" in request.node.nodeid if pa.types.is_duration(pa_dtype): request.node.add_marker( @@ -567,14 +558,10 @@ def test_groupby_extension_apply( reason="GH 47514: _concat_datetime expects axis arg.", ) ) - elif not is_series: - request.node.add_marker( - pytest.mark.xfail( - raises=AttributeError, - reason="GH 34986", - ) - ) - super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) def test_in_numeric_groupby(self, data_for_grouping, request): pa_dtype = data_for_grouping.dtype.pyarrow_dtype @@ -603,17 +590,10 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request): reason=f"pyarrow doesn't support factorizing {pa_dtype}", ) ) - elif as_index is True and ( - pa.types.is_date(pa_dtype) - or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False ): - request.node.add_marker( - pytest.mark.xfail( - raises=AttributeError, - reason="GH 34986", - ) - ) - super().test_groupby_extension_agg(as_index, data_for_grouping) + super().test_groupby_extension_agg(as_index, data_for_grouping) class TestBaseDtype(base.BaseDtypeTests): @@ -1443,12 +1423,13 @@ def test_diff(self, data, periods, request): @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): pa_dtype = all_data.dtype.pyarrow_dtype - if pa.types.is_date(pa_dtype) or ( - pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None - ): + if ( + pa.types.is_date(pa_dtype) + or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) + ) and dropna: request.node.add_marker( pytest.mark.xfail( - raises=AttributeError, + raises=NotImplementedError, # tries casting to i8 reason="GH 34986", ) ) @@ -1468,7 +1449,7 @@ def test_value_counts_with_normalize(self, data, request): ): request.node.add_marker( pytest.mark.xfail( - raises=AttributeError, + raises=NotImplementedError, # tries casting to i8 reason="GH 34986", ) ) From f36685eb7f09e66ce71c2810606f443d6c2245d2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 10 Sep 2022 12:54:34 +0200 Subject: [PATCH 2/2] CI: Fix failing tests (#48493) --- pandas/tests/extension/test_arrow.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 43ba651a9e60a..53d59c78b40cc 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1423,17 +1423,7 @@ def test_diff(self, data, periods, request): @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): pa_dtype = all_data.dtype.pyarrow_dtype - if ( - pa.types.is_date(pa_dtype) - or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) - ) and dropna: - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, # tries casting to i8 - reason="GH 34986", - ) - ) - elif pa.types.is_duration(pa_dtype): + if pa.types.is_duration(pa_dtype): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowNotImplementedError, @@ -1444,16 +1434,7 @@ def test_value_counts(self, all_data, dropna, request): def test_value_counts_with_normalize(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if pa.types.is_date(pa_dtype) or ( - pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None - ): - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, # tries casting to i8 - reason="GH 34986", - ) - ) - elif pa.types.is_duration(pa_dtype): + if pa.types.is_duration(pa_dtype): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowNotImplementedError,