From ef32d4cf7ba8daaffdaf9a972230321963460907 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 12 Sep 2022 20:34:02 +0200 Subject: [PATCH] BUG: DataFrame.pivot not respecting None as column name (#48293) * BUG: DataFrame.pivot not respecting None as column name * Add gh ref * Fix test * Add comment * Update doc/source/whatsnew/v1.6.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v1.6.0.rst | 1 + pandas/core/frame.py | 4 +- pandas/core/reshape/pivot.py | 31 ++++++++---- pandas/tests/reshape/test_pivot.py | 50 ++++++++++++++++++- pandas/tests/reshape/test_pivot_multilevel.py | 8 +-- 5 files changed, 80 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 00d03dc5b1f92..c3df675f15e63 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -196,6 +196,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ +- Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`) - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 74cb9fae4fd20..c52a7b0daa30e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8583,7 +8583,9 @@ def groupby( @Substitution("") @Appender(_shared_docs["pivot"]) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) - def pivot(self, index=None, columns=None, values=None) -> DataFrame: + def pivot( + self, index=lib.NoDefault, columns=lib.NoDefault, values=lib.NoDefault + ) -> DataFrame: from pandas.core.reshape.pivot import pivot return pivot(self, index=index, columns=columns, values=values) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 75323a05d3b41..b14c49e735355 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -10,6 +10,7 @@ import numpy as np +from pandas._libs import lib from pandas._typing import ( AggFuncType, AggFuncTypeBase, @@ -482,30 +483,37 @@ def _convert_by(by): @deprecate_nonkeyword_arguments(version=None, allowed_args=["data"]) def pivot( data: DataFrame, - index: IndexLabel | None = None, - columns: IndexLabel | None = None, - values: IndexLabel | None = None, + index: IndexLabel | lib.NoDefault = lib.NoDefault, + columns: IndexLabel | lib.NoDefault = lib.NoDefault, + values: IndexLabel | lib.NoDefault = lib.NoDefault, ) -> DataFrame: - if columns is None: + if columns is lib.NoDefault: raise TypeError("pivot() missing 1 required argument: 'columns'") columns_listlike = com.convert_to_list_like(columns) + # If columns is None we will create a MultiIndex level with None as name + # which might cause duplicated names because None is the default for + # level names + data.index.names = [ + name if name is not None else lib.NoDefault for name in data.index.names + ] + indexed: DataFrame | Series - if values is None: - if index is not None: + if values is lib.NoDefault: + if index is not lib.NoDefault: cols = com.convert_to_list_like(index) else: cols = [] - append = index is None + append = index is lib.NoDefault # error: Unsupported operand types for + ("List[Any]" and "ExtensionArray") # error: Unsupported left operand type for + ("ExtensionArray") indexed = data.set_index( cols + columns_listlike, append=append # type: ignore[operator] ) else: - if index is None: + if index is lib.NoDefault: if isinstance(data.index, MultiIndex): # GH 23955 index_list = [ @@ -531,7 +539,12 @@ def pivot( # error: Argument 1 to "unstack" of "DataFrame" has incompatible type "Union # [List[Any], ExtensionArray, ndarray[Any, Any], Index, Series]"; expected # "Hashable" - return indexed.unstack(columns_listlike) # type: ignore[arg-type] + result = indexed.unstack(columns_listlike) # type: ignore[arg-type] + result.index.names = [ + name if name is not lib.NoDefault else None for name in result.index.names + ] + + return result def crosstab( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 30859e9fdafc0..3fff9eb141e43 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2341,7 +2341,7 @@ def test_pivot_index_list_values_none_immutable_args(self): ) index = ["lev1", "lev2"] columns = ["lev3"] - result = df.pivot(index=index, columns=columns, values=None) + result = df.pivot(index=index, columns=columns) expected = DataFrame( np.array( @@ -2365,3 +2365,51 @@ def test_pivot_index_list_values_none_immutable_args(self): assert index == ["lev1", "lev2"] assert columns == ["lev3"] + + def test_pivot_columns_not_given(self): + # GH#48293 + df = DataFrame({"a": [1], "b": 1}) + with pytest.raises(TypeError, match="missing 1 required argument"): + df.pivot() + + def test_pivot_columns_is_none(self): + # GH#48293 + df = DataFrame({None: [1], "b": 2, "c": 3}) + result = df.pivot(columns=None) + expected = DataFrame({("b", 1): [2], ("c", 1): 3}) + tm.assert_frame_equal(result, expected) + + result = df.pivot(columns=None, index="b") + expected = DataFrame({("c", 1): 3}, index=Index([2], name="b")) + tm.assert_frame_equal(result, expected) + + result = df.pivot(columns=None, index="b", values="c") + expected = DataFrame({1: 3}, index=Index([2], name="b")) + tm.assert_frame_equal(result, expected) + + def test_pivot_index_is_none(self): + # GH#48293 + df = DataFrame({None: [1], "b": 2, "c": 3}) + + result = df.pivot(columns="b", index=None) + expected = DataFrame({("c", 2): 3}, index=[1]) + expected.columns.names = [None, "b"] + tm.assert_frame_equal(result, expected) + + result = df.pivot(columns="b", index=None, values="c") + expected = DataFrame(3, index=[1], columns=Index([2], name="b")) + tm.assert_frame_equal(result, expected) + + def test_pivot_values_is_none(self): + # GH#48293 + df = DataFrame({None: [1], "b": 2, "c": 3}) + + result = df.pivot(columns="b", index="c", values=None) + expected = DataFrame( + 1, index=Index([3], name="c"), columns=Index([2], name="b") + ) + tm.assert_frame_equal(result, expected) + + result = df.pivot(columns="b", values=None) + expected = DataFrame(1, index=[0], columns=Index([2], name="b")) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index 308f7329b128f..c6931a1961702 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._libs import lib + import pandas as pd from pandas import ( Index, @@ -33,7 +35,7 @@ ( ["lev4"], "lev3", - None, + lib.NoDefault, [ [1.0, np.nan, 1.0, np.nan, 0.0, np.nan], [np.nan, 1.0, np.nan, 1.0, np.nan, 1.0], @@ -70,7 +72,7 @@ ( ["lev1", "lev2"], "lev3", - None, + lib.NoDefault, [[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]], MultiIndex.from_tuples( [("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)], @@ -243,7 +245,7 @@ def test_pivot_df_multiindex_index_none(): ) df = df.set_index(["index_1", "index_2"]) - result = df.pivot(index=None, columns="label", values="value") + result = df.pivot(columns="label", values="value") expected = pd.DataFrame( [[1.0, np.nan], [np.nan, 2.0], [3.0, np.nan], [np.nan, 4.0]], index=df.index,