diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index dcf69dfda1ae8..fd156ccfc8b31 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -297,19 +297,10 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T if obj.ndim == 2: kwargs[k] = obj[[i]] - # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no - # attribute "tz" - if hasattr(arr, "tz") and arr.tz is None: # type: ignore[union-attr] - # DatetimeArray needs to be converted to ndarray for DatetimeLikeBlock - - # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no - # attribute "_data" - arr = arr._data # type: ignore[union-attr] - elif arr.dtype.kind == "m" and not isinstance(arr, np.ndarray): - # TimedeltaArray needs to be converted to ndarray for TimedeltaBlock - - # error: "ExtensionArray" has no attribute "_data" - arr = arr._data # type: ignore[attr-defined] + if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray): + # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to + # convert for the Block constructors. + arr = np.asarray(arr) if self.ndim == 2: arr = ensure_block_shape(arr, 2) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 0385e4482a32b..e491909aabe7a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -716,63 +716,6 @@ class TestBaseMissing(base.BaseMissingTests): def test_dropna_array(self, data_missing): super().test_dropna_array(data_missing) - def test_fillna_limit_pad(self, data_missing, using_array_manager, request): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_limit_pad(data_missing) - - def test_fillna_limit_backfill(self, data_missing, using_array_manager, request): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_limit_backfill(data_missing) - - def test_fillna_series(self, data_missing, using_array_manager, request): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_series(data_missing) - - def test_fillna_series_method( - self, data_missing, fillna_method, using_array_manager, request - ): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_series_method(data_missing, fillna_method) - - def test_fillna_frame(self, data_missing, using_array_manager, request): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_fillna_frame(data_missing) - class TestBasePrinting(base.BasePrintingTests): def test_series_repr(self, data, request): @@ -981,7 +924,7 @@ def test_setitem_scalar_series(self, data, box_in_series, request): ) super().test_setitem_scalar_series(data, box_in_series) - def test_setitem_sequence(self, data, box_in_series, using_array_manager, request): + def test_setitem_sequence(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -989,47 +932,9 @@ def test_setitem_sequence(self, data, box_in_series, using_array_manager, reques reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_sequence(data, box_in_series) - def test_setitem_sequence_mismatched_length_raises( - self, data, as_array, using_array_manager, request - ): - if using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_setitem_sequence_mismatched_length_raises(data, as_array) - - def test_setitem_empty_indexer( - self, data, box_in_series, using_array_manager, request - ): - if ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) - super().test_setitem_empty_indexer(data, box_in_series) - - def test_setitem_sequence_broadcasts( - self, data, box_in_series, using_array_manager, request - ): + def test_setitem_sequence_broadcasts(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1037,20 +942,10 @@ def test_setitem_sequence_broadcasts( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_sequence_broadcasts(data, box_in_series) @pytest.mark.parametrize("setter", ["loc", "iloc"]) - def test_setitem_scalar(self, data, setter, using_array_manager, request): + def test_setitem_scalar(self, data, setter, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1058,15 +953,9 @@ def test_setitem_scalar(self, data, setter, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_scalar(data, setter) - def test_setitem_loc_scalar_mixed(self, data, using_array_manager, request): + def test_setitem_loc_scalar_mixed(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1074,15 +963,9 @@ def test_setitem_loc_scalar_mixed(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_loc_scalar_mixed(data) - def test_setitem_loc_scalar_single(self, data, using_array_manager, request): + def test_setitem_loc_scalar_single(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1090,17 +973,9 @@ def test_setitem_loc_scalar_single(self, data, using_array_manager, request): reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_loc_scalar_single(data) - def test_setitem_loc_scalar_multiple_homogoneous( - self, data, using_array_manager, request - ): + def test_setitem_loc_scalar_multiple_homogoneous(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1108,15 +983,9 @@ def test_setitem_loc_scalar_multiple_homogoneous( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_loc_scalar_multiple_homogoneous(data) - def test_setitem_iloc_scalar_mixed(self, data, using_array_manager, request): + def test_setitem_iloc_scalar_mixed(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1124,15 +993,9 @@ def test_setitem_iloc_scalar_mixed(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_iloc_scalar_mixed(data) - def test_setitem_iloc_scalar_single(self, data, using_array_manager, request): + def test_setitem_iloc_scalar_single(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1140,17 +1003,9 @@ def test_setitem_iloc_scalar_single(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_iloc_scalar_single(data) - def test_setitem_iloc_scalar_multiple_homogoneous( - self, data, using_array_manager, request - ): + def test_setitem_iloc_scalar_multiple_homogoneous(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1158,12 +1013,6 @@ def test_setitem_iloc_scalar_multiple_homogoneous( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_iloc_scalar_multiple_homogoneous(data) @pytest.mark.parametrize( @@ -1175,9 +1024,7 @@ def test_setitem_iloc_scalar_multiple_homogoneous( ], ids=["numpy-array", "boolean-array", "boolean-array-na"], ) - def test_setitem_mask( - self, data, mask, box_in_series, using_array_manager, request - ): + def test_setitem_mask(self, data, mask, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1185,21 +1032,9 @@ def test_setitem_mask( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_mask(data, mask, box_in_series) - def test_setitem_mask_boolean_array_with_na( - self, data, box_in_series, using_array_manager, request - ): + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) unit = getattr(data.dtype.pyarrow_dtype, "unit", None) if pa_version_under2p0 and tz not in (None, "UTC") and unit == "us": @@ -1208,16 +1043,6 @@ def test_setitem_mask_boolean_array_with_na( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_mask_boolean_array_with_na(data, box_in_series) @pytest.mark.parametrize( @@ -1225,9 +1050,7 @@ def test_setitem_mask_boolean_array_with_na( [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], ids=["list", "integer-array", "numpy-array"], ) - def test_setitem_integer_array( - self, data, idx, box_in_series, using_array_manager, request - ): + def test_setitem_integer_array(self, data, idx, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1235,23 +1058,11 @@ def test_setitem_integer_array( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_integer_array(data, idx, box_in_series) @pytest.mark.parametrize("as_callable", [True, False]) @pytest.mark.parametrize("setter", ["loc", None]) - def test_setitem_mask_aligned( - self, data, as_callable, setter, using_array_manager, request - ): + def test_setitem_mask_aligned(self, data, as_callable, setter, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1259,16 +1070,10 @@ def test_setitem_mask_aligned( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_mask_aligned(data, as_callable, setter) @pytest.mark.parametrize("setter", ["loc", None]) - def test_setitem_mask_broadcast(self, data, setter, using_array_manager, request): + def test_setitem_mask_broadcast(self, data, setter, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1276,12 +1081,6 @@ def test_setitem_mask_broadcast(self, data, setter, using_array_manager, request reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_mask_broadcast(data, setter) def test_setitem_tuple_index(self, data, request): @@ -1294,7 +1093,7 @@ def test_setitem_tuple_index(self, data, request): ) super().test_setitem_tuple_index(data) - def test_setitem_slice(self, data, box_in_series, using_array_manager, request): + def test_setitem_slice(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1302,19 +1101,9 @@ def test_setitem_slice(self, data, box_in_series, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and box_in_series - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_slice(data, box_in_series) - def test_setitem_loc_iloc_slice(self, data, using_array_manager, request): + def test_setitem_loc_iloc_slice(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1322,12 +1111,6 @@ def test_setitem_loc_iloc_slice(self, data, using_array_manager, request): reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_loc_iloc_slice(data) def test_setitem_slice_array(self, data, request): @@ -1340,9 +1123,7 @@ def test_setitem_slice_array(self, data, request): ) super().test_setitem_slice_array(data) - def test_setitem_with_expansion_dataframe_column( - self, data, full_indexer, using_array_manager, request - ): + def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request): # Is there a better way to get the full_indexer id "null_slice"? is_null_slice = "null_slice" in request.node.nodeid tz = getattr(data.dtype.pyarrow_dtype, "tz", None) @@ -1352,21 +1133,9 @@ def test_setitem_with_expansion_dataframe_column( reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) - elif ( - using_array_manager - and pa.types.is_duration(data.dtype.pyarrow_dtype) - and not is_null_slice - ): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_with_expansion_dataframe_column(data, full_indexer) - def test_setitem_with_expansion_row( - self, data, na_value, using_array_manager, request - ): + def test_setitem_with_expansion_row(self, data, na_value, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1374,15 +1143,9 @@ def test_setitem_with_expansion_row( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_with_expansion_row(data, na_value) - def test_setitem_frame_2d_values(self, data, using_array_manager, request): + def test_setitem_frame_2d_values(self, data, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -1390,12 +1153,6 @@ def test_setitem_frame_2d_values(self, data, using_array_manager, request): reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_frame_2d_values(data) @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") @@ -1678,26 +1435,6 @@ def test_factorize_empty(self, data, request): ) super().test_factorize_empty(data) - def test_fillna_copy_frame(self, data_missing, request, using_array_manager): - pa_dtype = data_missing.dtype.pyarrow_dtype - if using_array_manager and pa.types.is_duration(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Checking ndim when using arraymanager with {pa_dtype}" - ) - ) - super().test_fillna_copy_frame(data_missing) - - def test_fillna_copy_series(self, data_missing, request, using_array_manager): - pa_dtype = data_missing.dtype.pyarrow_dtype - if using_array_manager and pa.types.is_duration(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Checking ndim when using arraymanager with {pa_dtype}" - ) - ) - super().test_fillna_copy_series(data_missing) - def test_shift_fill_value(self, data, request): pa_dtype = data.dtype.pyarrow_dtype tz = getattr(pa_dtype, "tz", None) @@ -1735,16 +1472,10 @@ def test_insert(self, data, request): ) super().test_insert(data) - def test_combine_first(self, data, request, using_array_manager): + def test_combine_first(self, data, request): pa_dtype = data.dtype.pyarrow_dtype tz = getattr(pa_dtype, "tz", None) - if using_array_manager and pa.types.is_duration(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Checking ndim when using arraymanager with {pa_dtype}" - ) - ) - elif pa_version_under2p0 and tz not in (None, "UTC"): + if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( pytest.mark.xfail( reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" @@ -1752,30 +1483,6 @@ def test_combine_first(self, data, request, using_array_manager): ) super().test_combine_first(data) - @pytest.mark.parametrize("frame", [True, False]) - @pytest.mark.parametrize( - "periods, indices", - [(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])], - ) - def test_container_shift( - self, data, frame, periods, indices, request, using_array_manager - ): - pa_dtype = data.dtype.pyarrow_dtype - if ( - using_array_manager - and pa.types.is_duration(pa_dtype) - and periods in (-2, 2) - ): - request.node.add_marker( - pytest.mark.xfail( - reason=( - f"Checking ndim when using arraymanager with " - f"{pa_dtype} and periods={periods}" - ) - ) - ) - super().test_container_shift(data, frame, periods, indices) - @pytest.mark.xfail( reason="result dtype pyarrow[bool] better than expected dtype object" ) @@ -1803,15 +1510,9 @@ def test_searchsorted(self, data_for_sorting, as_series, request): ) super().test_searchsorted(data_for_sorting, as_series) - def test_where_series(self, data, na_value, as_frame, request, using_array_manager): + def test_where_series(self, data, na_value, as_frame, request): pa_dtype = data.dtype.pyarrow_dtype - if using_array_manager and pa.types.is_duration(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Checking ndim when using arraymanager with {pa_dtype}" - ) - ) - elif pa.types.is_temporal(pa_dtype): + if pa.types.is_temporal(pa_dtype): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowNotImplementedError,