Skip to content

Commit

Permalink
BUG: preserve EA dtype in transpose (pandas-dev#30091)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger authored and AlexKirko committed Dec 29, 2019
1 parent bdfdcfa commit 5054b34
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 84 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,7 @@ Reshaping
- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`)
- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`)
- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
-

Expand Down
37 changes: 32 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2485,7 +2485,7 @@ def memory_usage(self, index=True, deep=False):
)
return result

def transpose(self, *args, **kwargs):
def transpose(self, *args, copy: bool = False):
"""
Transpose index and columns.
Expand All @@ -2495,9 +2495,14 @@ def transpose(self, *args, **kwargs):
Parameters
----------
*args, **kwargs
Additional arguments and keywords have no effect but might be
accepted for compatibility with numpy.
*args : tuple, optional
Accepted for compatibility with NumPy.
copy : bool, default False
Whether to copy the data after transposing, even for DataFrames
with a single dtype.
Note that a copy is always required for mixed dtype DataFrames,
or for DataFrames with any extension types.
Returns
-------
Expand Down Expand Up @@ -2578,7 +2583,29 @@ def transpose(self, *args, **kwargs):
dtype: object
"""
nv.validate_transpose(args, dict())
return super().transpose(1, 0, **kwargs)
# construct the args

dtypes = list(self.dtypes)
if self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]):
# We have EAs with the same dtype. We can preserve that dtype in transpose.
dtype = dtypes[0]
arr_type = dtype.construct_array_type()
values = self.values

new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values]
result = self._constructor(
dict(zip(self.index, new_values)), index=self.columns
)

else:
new_values = self.values.T
if copy:
new_values = new_values.copy()
result = self._constructor(
new_values, index=self.columns, columns=self.index
)

return result.__finalize__(self)

T = property(transpose)

Expand Down
44 changes: 0 additions & 44 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,50 +643,6 @@ def _set_axis(self, axis, labels):
self._data.set_axis(axis, labels)
self._clear_item_cache()

def transpose(self, *args, **kwargs):
"""
Permute the dimensions of the %(klass)s
Parameters
----------
args : %(args_transpose)s
copy : bool, default False
Make a copy of the underlying data. Mixed-dtype data will
always result in a copy
**kwargs
Additional keyword arguments will be passed to the function.
Returns
-------
y : same as input
Examples
--------
>>> p.transpose(2, 0, 1)
>>> p.transpose(2, 0, 1, copy=True)
"""

# construct the args
axes, kwargs = self._construct_axes_from_arguments(
args, kwargs, require_all=True
)
axes_names = tuple(self._get_axis_name(axes[a]) for a in self._AXIS_ORDERS)
axes_numbers = tuple(self._get_axis_number(axes[a]) for a in self._AXIS_ORDERS)

# we must have unique axes
if len(axes) != len(set(axes)):
raise ValueError(f"Must specify {self._AXIS_LEN} unique axes")

new_axes = self._construct_axes_dict_from(
self, [self._get_axis(x) for x in axes_names]
)
new_values = self.values.transpose(axes_numbers)
if kwargs.pop("copy", None) or (len(args) and args[-1]):
new_values = new_values.copy()

nv.validate_transpose(tuple(), kwargs)
return self._constructor(new_values, **new_axes).__finalize__(self)

def swapaxes(self, axis1, axis2, copy=True):
"""
Interchange axes and swap values axes appropriately.
Expand Down
19 changes: 0 additions & 19 deletions pandas/tests/arithmetic/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,25 +235,6 @@ def box_df_fail(request):
return request.param


@pytest.fixture(
params=[
(pd.Index, False),
(pd.Series, False),
(pd.DataFrame, False),
pytest.param((pd.DataFrame, True), marks=pytest.mark.xfail),
(tm.to_array, False),
],
ids=id_func,
)
def box_transpose_fail(request):
"""
Fixture similar to `box` but testing both transpose cases for DataFrame,
with the transpose=True case xfailed.
"""
# GH#23620
return request.param


@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, tm.to_array], ids=id_func)
def box_with_array(request):
"""
Expand Down
27 changes: 11 additions & 16 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,18 +753,18 @@ def test_pi_sub_isub_offset(self):
rng -= pd.offsets.MonthEnd(5)
tm.assert_index_equal(rng, expected)

def test_pi_add_offset_n_gt1(self, box_transpose_fail):
@pytest.mark.parametrize("transpose", [True, False])
def test_pi_add_offset_n_gt1(self, box_with_array, transpose):
# GH#23215
# add offset to PeriodIndex with freq.n > 1
box, transpose = box_transpose_fail

per = pd.Period("2016-01", freq="2M")
pi = pd.PeriodIndex([per])

expected = pd.PeriodIndex(["2016-03"], freq="2M")

pi = tm.box_expected(pi, box, transpose=transpose)
expected = tm.box_expected(expected, box, transpose=transpose)
pi = tm.box_expected(pi, box_with_array, transpose=transpose)
expected = tm.box_expected(expected, box_with_array, transpose=transpose)

result = pi + per.freq
tm.assert_equal(result, expected)
Expand Down Expand Up @@ -982,16 +982,15 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq):
with pytest.raises(IncompatibleFrequency, match=msg):
rng -= other

def test_parr_add_sub_td64_nat(self, box_transpose_fail):
@pytest.mark.parametrize("transpose", [True, False])
def test_parr_add_sub_td64_nat(self, box_with_array, transpose):
# GH#23320 special handling for timedelta64("NaT")
box, transpose = box_transpose_fail

pi = pd.period_range("1994-04-01", periods=9, freq="19D")
other = np.timedelta64("NaT")
expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")

obj = tm.box_expected(pi, box, transpose=transpose)
expected = tm.box_expected(expected, box, transpose=transpose)
obj = tm.box_expected(pi, box_with_array, transpose=transpose)
expected = tm.box_expected(expected, box_with_array, transpose=transpose)

result = obj + other
tm.assert_equal(result, expected)
Expand All @@ -1009,16 +1008,12 @@ def test_parr_add_sub_td64_nat(self, box_transpose_fail):
TimedeltaArray._from_sequence(["NaT"] * 9),
],
)
def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other):
# FIXME: DataFrame fails because when when operating column-wise
# timedelta64 entries become NaT and are treated like datetimes
box = box_df_fail

def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other):
pi = pd.period_range("1994-04-01", periods=9, freq="19D")
expected = pd.PeriodIndex(["NaT"] * 9, freq="19D")

obj = tm.box_expected(pi, box)
expected = tm.box_expected(expected, box)
obj = tm.box_expected(pi, box_with_array)
expected = tm.box_expected(expected, box_with_array)

result = obj + other
tm.assert_equal(result, expected)
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/extension/base/reshaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,19 @@ def test_ravel(self, data):
# Check that we have a view, not a copy
result[0] = result[1]
assert data[0] == data[1]

def test_transpose(self, data):
df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"])
result = df.T
expected = pd.DataFrame(
{
"a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype),
"b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype),
"c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype),
"d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype),
},
index=["A", "B"],
)
self.assert_frame_equal(result, expected)
self.assert_frame_equal(np.transpose(np.transpose(df)), df)
self.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]])
4 changes: 4 additions & 0 deletions pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ def test_unstack(self, data, index):
# this matches otherwise
return super().test_unstack(data, index)

@pytest.mark.xfail(reason="Inconsistent sizes.")
def test_transpose(self, data):
super().test_transpose(data)


class TestGetitem(BaseJSON, base.BaseGetitemTests):
pass
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ def test_merge_on_extension_array_duplicates(self, data):
# Fails creating expected
super().test_merge_on_extension_array_duplicates(data)

@skip_nested
def test_transpose(self, data):
super().test_transpose(data)


class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
@skip_nested
Expand Down

0 comments on commit 5054b34

Please sign in to comment.