diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 87da8a8dda8cf..9298bc6a61bae 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -673,6 +673,7 @@ Performance improvements - Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`) - Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`) - Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`) +- Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 93d87f6bb4dfa..201e177d8bb10 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -16,7 +16,10 @@ from pandas._typing import ArrayLike from pandas.core.dtypes.cast import maybe_promote -from pandas.core.dtypes.common import ensure_platform_int +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_1d_only_ea_obj, +) from pandas.core.dtypes.missing import na_value_for_dtype from pandas.core.construction import ensure_wrapped_if_datetimelike @@ -91,12 +94,14 @@ def take_nd( if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, - if arr.ndim == 2: - # e.g. DatetimeArray, TimedeltArray + # includes for EA to catch DatetimeArray, TimedeltaArray + if not is_1d_only_ea_obj(arr): + # i.e. DatetimeArray, TimedeltaArray arr = cast("NDArrayBackedExtensionArray", arr) return arr.take( indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis ) + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) arr = np.asarray(arr) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d90487647d35b..efefeb23445af 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -101,6 +101,7 @@ ensure_platform_int, infer_dtype_from_object, is_1d_only_ea_dtype, + is_1d_only_ea_obj, is_bool_dtype, is_dataclass, is_datetime64_any_dtype, @@ -139,7 +140,11 @@ ) from pandas.core.array_algos.take import take_2d_multi from pandas.core.arraylike import OpsMixin -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + TimedeltaArray, +) from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.construction import ( extract_array, @@ -852,6 +857,28 @@ def _can_fast_transpose(self) -> bool: # TODO(EA2D) special case would be unnecessary with 2D EAs return not is_1d_only_ea_dtype(dtype) + @property + def _values_compat(self) -> np.ndarray | DatetimeArray | TimedeltaArray: + """ + Analogue to ._values that may return a 2D ExtensionArray. + """ + mgr = self._mgr + if isinstance(mgr, ArrayManager): + return self._values + + blocks = mgr.blocks + if len(blocks) != 1: + return self._values + + arr = blocks[0].values + if arr.ndim == 1: + # non-2D ExtensionArray + return self._values + + # more generally, whatever we allow in NDArrayBackedExtensionBlock + arr = cast("DatetimeArray | TimedeltaArray", arr) + return arr.T + # ---------------------------------------------------------------------- # Rendering Methods @@ -3292,7 +3319,18 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: # construct the args dtypes = list(self.dtypes) - if self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]): + + if self._can_fast_transpose: + # Note: tests pass without this, but this improves perf quite a bit. + new_vals = self._values_compat.T + if copy: + new_vals = new_vals.copy() + + result = self._constructor(new_vals, index=self.columns, columns=self.index) + + elif ( + self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]) + ): # We have EAs with the same dtype. We can preserve that dtype in transpose. dtype = dtypes[0] arr_type = dtype.construct_array_type() @@ -9760,8 +9798,9 @@ def func(values: np.ndarray): def blk_func(values, axis=1): if isinstance(values, ExtensionArray): - if values.ndim == 2: - # i.e. DatetimeArray, TimedeltaArray + if not is_1d_only_ea_obj(values) and not isinstance( + self._mgr, ArrayManager + ): return values._reduce(name, axis=1, skipna=skipna, **kwds) return values._reduce(name, skipna=skipna, **kwds) else: diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index d6ab3268c8c37..62537d37a8c11 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -90,3 +90,16 @@ def test_transpose_get_view(self, float_frame): dft.values[:, 5:10] = 5 assert (float_frame.values[5:10] == 5).all() + + @td.skip_array_manager_invalid_test + def test_transpose_get_view_dt64tzget_view(self): + dti = date_range("2016-01-01", periods=6, tz="US/Pacific") + arr = dti._data.reshape(3, 2) + df = DataFrame(arr) + assert df._mgr.nblocks == 1 + + result = df.T + assert result._mgr.nblocks == 1 + + rtrip = result._mgr.blocks[0].values + assert np.shares_memory(arr._data, rtrip._data)