From 14a0ba295de03a830f9caef239b5185881c8b108 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 7 Sep 2022 10:27:53 -0700 Subject: [PATCH] BUG: Fix pyarrow groupby tests --- pandas/core/series.py | 5 +++- pandas/tests/extension/test_arrow.py | 40 +++++----------------------- 2 files changed, 11 insertions(+), 34 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bf313925905f7..56b959a893d12 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -573,7 +573,10 @@ def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None: """ labels = ensure_index(labels) - if labels._is_all_dates: + if labels._is_all_dates and not ( + type(labels) is Index and not isinstance(labels.dtype, np.dtype) + ): + # exclude e.g. timestamp[ns][pyarrow] dtype from this casting deep_labels = labels if isinstance(labels, CategoricalIndex): deep_labels = labels.categories diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 0385e4482a32b..f700a7c918d49 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -516,15 +516,6 @@ def test_groupby_extension_no_sort(self, data_for_grouping, request): reason=f"pyarrow doesn't support factorizing {pa_dtype}", ) ) - elif pa.types.is_date(pa_dtype) or ( - pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None - ): - request.node.add_marker( - pytest.mark.xfail( - raises=AttributeError, - reason="GH 34986", - ) - ) super().test_groupby_extension_no_sort(data_for_grouping) def test_groupby_extension_transform(self, data_for_grouping, request): @@ -551,8 +542,7 @@ def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): pa_dtype = data_for_grouping.dtype.pyarrow_dtype - # Is there a better way to get the "series" ID for groupby_apply_op? - is_series = "series" in request.node.nodeid + # TODO: Is there a better way to get the "object" ID for groupby_apply_op? is_object = "object" in request.node.nodeid if pa.types.is_duration(pa_dtype): request.node.add_marker( @@ -571,13 +561,6 @@ def test_groupby_extension_apply( reason="GH 47514: _concat_datetime expects axis arg.", ) ) - elif not is_series: - request.node.add_marker( - pytest.mark.xfail( - raises=AttributeError, - reason="GH 34986", - ) - ) with tm.maybe_produces_warning( PerformanceWarning, pa_version_under7p0, check_stacklevel=False ): @@ -610,16 +593,6 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request): reason=f"pyarrow doesn't support factorizing {pa_dtype}", ) ) - elif as_index is True and ( - pa.types.is_date(pa_dtype) - or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) - ): - request.node.add_marker( - pytest.mark.xfail( - raises=AttributeError, - reason="GH 34986", - ) - ) with tm.maybe_produces_warning( PerformanceWarning, pa_version_under7p0, check_stacklevel=False ): @@ -1464,12 +1437,13 @@ def test_diff(self, data, periods, request): @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): pa_dtype = all_data.dtype.pyarrow_dtype - if pa.types.is_date(pa_dtype) or ( - pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None - ): + if ( + pa.types.is_date(pa_dtype) + or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None) + ) and dropna: request.node.add_marker( pytest.mark.xfail( - raises=AttributeError, + raises=NotImplementedError, # tries casting to i8 reason="GH 34986", ) ) @@ -1489,7 +1463,7 @@ def test_value_counts_with_normalize(self, data, request): ): request.node.add_marker( pytest.mark.xfail( - raises=AttributeError, + raises=NotImplementedError, # tries casting to i8 reason="GH 34986", ) )