Skip to content

Commit

Permalink
PERF: DataFrame.transpose with dt64tz (pandas-dev#40149)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and TLouf committed Jun 1, 2021
1 parent a73b25a commit 6b38fbf
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ Performance improvements
- Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`)
- Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`)
- Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`)
- Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`)
- Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`)

.. ---------------------------------------------------------------------------
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
from pandas._typing import ArrayLike

from pandas.core.dtypes.cast import maybe_promote
from pandas.core.dtypes.common import ensure_platform_int
from pandas.core.dtypes.common import (
ensure_platform_int,
is_1d_only_ea_obj,
)
from pandas.core.dtypes.missing import na_value_for_dtype

from pandas.core.construction import ensure_wrapped_if_datetimelike
Expand Down Expand Up @@ -91,12 +94,14 @@ def take_nd(

if not isinstance(arr, np.ndarray):
# i.e. ExtensionArray,
if arr.ndim == 2:
# e.g. DatetimeArray, TimedeltArray
# includes for EA to catch DatetimeArray, TimedeltaArray
if not is_1d_only_ea_obj(arr):
# i.e. DatetimeArray, TimedeltaArray
arr = cast("NDArrayBackedExtensionArray", arr)
return arr.take(
indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
)

return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

arr = np.asarray(arr)
Expand Down
47 changes: 43 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
ensure_platform_int,
infer_dtype_from_object,
is_1d_only_ea_dtype,
is_1d_only_ea_obj,
is_bool_dtype,
is_dataclass,
is_datetime64_any_dtype,
Expand Down Expand Up @@ -139,7 +140,11 @@
)
from pandas.core.array_algos.take import take_2d_multi
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
TimedeltaArray,
)
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.construction import (
extract_array,
Expand Down Expand Up @@ -852,6 +857,28 @@ def _can_fast_transpose(self) -> bool:
# TODO(EA2D) special case would be unnecessary with 2D EAs
return not is_1d_only_ea_dtype(dtype)

@property
def _values_compat(self) -> np.ndarray | DatetimeArray | TimedeltaArray:
"""
Analogue to ._values that may return a 2D ExtensionArray.
"""
mgr = self._mgr
if isinstance(mgr, ArrayManager):
return self._values

blocks = mgr.blocks
if len(blocks) != 1:
return self._values

arr = blocks[0].values
if arr.ndim == 1:
# non-2D ExtensionArray
return self._values

# more generally, whatever we allow in NDArrayBackedExtensionBlock
arr = cast("DatetimeArray | TimedeltaArray", arr)
return arr.T

# ----------------------------------------------------------------------
# Rendering Methods

Expand Down Expand Up @@ -3292,7 +3319,18 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
# construct the args

dtypes = list(self.dtypes)
if self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]):

if self._can_fast_transpose:
# Note: tests pass without this, but this improves perf quite a bit.
new_vals = self._values_compat.T
if copy:
new_vals = new_vals.copy()

result = self._constructor(new_vals, index=self.columns, columns=self.index)

elif (
self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0])
):
# We have EAs with the same dtype. We can preserve that dtype in transpose.
dtype = dtypes[0]
arr_type = dtype.construct_array_type()
Expand Down Expand Up @@ -9760,8 +9798,9 @@ def func(values: np.ndarray):

def blk_func(values, axis=1):
if isinstance(values, ExtensionArray):
if values.ndim == 2:
# i.e. DatetimeArray, TimedeltaArray
if not is_1d_only_ea_obj(values) and not isinstance(
self._mgr, ArrayManager
):
return values._reduce(name, axis=1, skipna=skipna, **kwds)
return values._reduce(name, skipna=skipna, **kwds)
else:
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/frame/methods/test_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,16 @@ def test_transpose_get_view(self, float_frame):
dft.values[:, 5:10] = 5

assert (float_frame.values[5:10] == 5).all()

@td.skip_array_manager_invalid_test
def test_transpose_get_view_dt64tzget_view(self):
dti = date_range("2016-01-01", periods=6, tz="US/Pacific")
arr = dti._data.reshape(3, 2)
df = DataFrame(arr)
assert df._mgr.nblocks == 1

result = df.T
assert result._mgr.nblocks == 1

rtrip = result._mgr.blocks[0].values
assert np.shares_memory(arr._data, rtrip._data)

0 comments on commit 6b38fbf

Please sign in to comment.