diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4671170fa79ae..cc8e6f50cba2b 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -863,6 +863,7 @@ Reshaping - Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) - Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) - Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) +- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dfda1470413b7..45b89e8425b17 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2485,7 +2485,7 @@ def memory_usage(self, index=True, deep=False): ) return result - def transpose(self, *args, **kwargs): + def transpose(self, *args, copy: bool = False): """ Transpose index and columns. @@ -2495,9 +2495,14 @@ def transpose(self, *args, **kwargs): Parameters ---------- - *args, **kwargs - Additional arguments and keywords have no effect but might be - accepted for compatibility with numpy. + *args : tuple, optional + Accepted for compatibility with NumPy. + copy : bool, default False + Whether to copy the data after transposing, even for DataFrames + with a single dtype. + + Note that a copy is always required for mixed dtype DataFrames, + or for DataFrames with any extension types. Returns ------- @@ -2578,7 +2583,29 @@ def transpose(self, *args, **kwargs): dtype: object """ nv.validate_transpose(args, dict()) - return super().transpose(1, 0, **kwargs) + # construct the args + + dtypes = list(self.dtypes) + if self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]): + # We have EAs with the same dtype. We can preserve that dtype in transpose. + dtype = dtypes[0] + arr_type = dtype.construct_array_type() + values = self.values + + new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] + result = self._constructor( + dict(zip(self.index, new_values)), index=self.columns + ) + + else: + new_values = self.values.T + if copy: + new_values = new_values.copy() + result = self._constructor( + new_values, index=self.columns, columns=self.index + ) + + return result.__finalize__(self) T = property(transpose) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f846d5883a8b5..ab5ee8414d9c8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -643,50 +643,6 @@ def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) self._clear_item_cache() - def transpose(self, *args, **kwargs): - """ - Permute the dimensions of the %(klass)s - - Parameters - ---------- - args : %(args_transpose)s - copy : bool, default False - Make a copy of the underlying data. Mixed-dtype data will - always result in a copy - **kwargs - Additional keyword arguments will be passed to the function. - - Returns - ------- - y : same as input - - Examples - -------- - >>> p.transpose(2, 0, 1) - >>> p.transpose(2, 0, 1, copy=True) - """ - - # construct the args - axes, kwargs = self._construct_axes_from_arguments( - args, kwargs, require_all=True - ) - axes_names = tuple(self._get_axis_name(axes[a]) for a in self._AXIS_ORDERS) - axes_numbers = tuple(self._get_axis_number(axes[a]) for a in self._AXIS_ORDERS) - - # we must have unique axes - if len(axes) != len(set(axes)): - raise ValueError(f"Must specify {self._AXIS_LEN} unique axes") - - new_axes = self._construct_axes_dict_from( - self, [self._get_axis(x) for x in axes_names] - ) - new_values = self.values.transpose(axes_numbers) - if kwargs.pop("copy", None) or (len(args) and args[-1]): - new_values = new_values.copy() - - nv.validate_transpose(tuple(), kwargs) - return self._constructor(new_values, **new_axes).__finalize__(self) - def swapaxes(self, axis1, axis2, copy=True): """ Interchange axes and swap values axes appropriately. diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index 33dda75e2f110..64588af3e3053 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -235,25 +235,6 @@ def box_df_fail(request): return request.param -@pytest.fixture( - params=[ - (pd.Index, False), - (pd.Series, False), - (pd.DataFrame, False), - pytest.param((pd.DataFrame, True), marks=pytest.mark.xfail), - (tm.to_array, False), - ], - ids=id_func, -) -def box_transpose_fail(request): - """ - Fixture similar to `box` but testing both transpose cases for DataFrame, - with the transpose=True case xfailed. - """ - # GH#23620 - return request.param - - @pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, tm.to_array], ids=id_func) def box_with_array(request): """ diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 5917c8deee8a9..f0edcd11567d2 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -753,18 +753,18 @@ def test_pi_sub_isub_offset(self): rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) - def test_pi_add_offset_n_gt1(self, box_transpose_fail): + @pytest.mark.parametrize("transpose", [True, False]) + def test_pi_add_offset_n_gt1(self, box_with_array, transpose): # GH#23215 # add offset to PeriodIndex with freq.n > 1 - box, transpose = box_transpose_fail per = pd.Period("2016-01", freq="2M") pi = pd.PeriodIndex([per]) expected = pd.PeriodIndex(["2016-03"], freq="2M") - pi = tm.box_expected(pi, box, transpose=transpose) - expected = tm.box_expected(expected, box, transpose=transpose) + pi = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) result = pi + per.freq tm.assert_equal(result, expected) @@ -982,16 +982,15 @@ def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): with pytest.raises(IncompatibleFrequency, match=msg): rng -= other - def test_parr_add_sub_td64_nat(self, box_transpose_fail): + @pytest.mark.parametrize("transpose", [True, False]) + def test_parr_add_sub_td64_nat(self, box_with_array, transpose): # GH#23320 special handling for timedelta64("NaT") - box, transpose = box_transpose_fail - pi = pd.period_range("1994-04-01", periods=9, freq="19D") other = np.timedelta64("NaT") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") - obj = tm.box_expected(pi, box, transpose=transpose) - expected = tm.box_expected(expected, box, transpose=transpose) + obj = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) result = obj + other tm.assert_equal(result, expected) @@ -1009,16 +1008,12 @@ def test_parr_add_sub_td64_nat(self, box_transpose_fail): TimedeltaArray._from_sequence(["NaT"] * 9), ], ) - def test_parr_add_sub_tdt64_nat_array(self, box_df_fail, other): - # FIXME: DataFrame fails because when when operating column-wise - # timedelta64 entries become NaT and are treated like datetimes - box = box_df_fail - + def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): pi = pd.period_range("1994-04-01", periods=9, freq="19D") expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") - obj = tm.box_expected(pi, box) - expected = tm.box_expected(expected, box) + obj = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, box_with_array) result = obj + other tm.assert_equal(result, expected) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 90e607343297d..89c9ed3674a66 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -295,3 +295,19 @@ def test_ravel(self, data): # Check that we have a view, not a copy result[0] = result[1] assert data[0] == data[1] + + def test_transpose(self, data): + df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"]) + result = df.T + expected = pd.DataFrame( + { + "a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype), + "b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype), + "c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype), + "d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype), + }, + index=["A", "B"], + ) + self.assert_frame_equal(result, expected) + self.assert_frame_equal(np.transpose(np.transpose(df)), df) + self.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]]) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 16a4caa7d7ebe..01f2565e2ee58 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -163,6 +163,10 @@ def test_unstack(self, data, index): # this matches otherwise return super().test_unstack(data, index) + @pytest.mark.xfail(reason="Inconsistent sizes.") + def test_transpose(self, data): + super().test_transpose(data) + class TestGetitem(BaseJSON, base.BaseGetitemTests): pass diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index beb3fc80eccd6..55a617caf28ce 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -332,6 +332,10 @@ def test_merge_on_extension_array_duplicates(self, data): # Fails creating expected super().test_merge_on_extension_array_duplicates(data) + @skip_nested + def test_transpose(self, data): + super().test_transpose(data) + class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): @skip_nested