Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: DataFrame.pivot not respecting None as column name #48293

Merged
merged 10 commits into from
Sep 12, 2022
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ Groupby/resample/rolling

Reshaping
^^^^^^^^^
- Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`)
- Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`)
-

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8583,7 +8583,9 @@ def groupby(
@Substitution("")
@Appender(_shared_docs["pivot"])
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
def pivot(self, index=None, columns=None, values=None) -> DataFrame:
def pivot(
self, index=lib.NoDefault, columns=lib.NoDefault, values=lib.NoDefault
) -> DataFrame:
from pandas.core.reshape.pivot import pivot

return pivot(self, index=index, columns=columns, values=values)
Expand Down
31 changes: 22 additions & 9 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import numpy as np

from pandas._libs import lib
from pandas._typing import (
AggFuncType,
AggFuncTypeBase,
Expand Down Expand Up @@ -482,30 +483,37 @@ def _convert_by(by):
@deprecate_nonkeyword_arguments(version=None, allowed_args=["data"])
def pivot(
data: DataFrame,
index: IndexLabel | None = None,
columns: IndexLabel | None = None,
values: IndexLabel | None = None,
index: IndexLabel | lib.NoDefault = lib.NoDefault,
columns: IndexLabel | lib.NoDefault = lib.NoDefault,
values: IndexLabel | lib.NoDefault = lib.NoDefault,
) -> DataFrame:
if columns is None:
if columns is lib.NoDefault:
raise TypeError("pivot() missing 1 required argument: 'columns'")

columns_listlike = com.convert_to_list_like(columns)

# If columns is None we will create a MultiIndex level with None as name
# which might cause duplicated names because None is the default for
# level names
data.index.names = [
name if name is not None else lib.NoDefault for name in data.index.names
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
]

indexed: DataFrame | Series
if values is None:
if index is not None:
if values is lib.NoDefault:
if index is not lib.NoDefault:
cols = com.convert_to_list_like(index)
else:
cols = []

append = index is None
append = index is lib.NoDefault
# error: Unsupported operand types for + ("List[Any]" and "ExtensionArray")
# error: Unsupported left operand type for + ("ExtensionArray")
indexed = data.set_index(
cols + columns_listlike, append=append # type: ignore[operator]
)
else:
if index is None:
if index is lib.NoDefault:
if isinstance(data.index, MultiIndex):
# GH 23955
index_list = [
Expand All @@ -531,7 +539,12 @@ def pivot(
# error: Argument 1 to "unstack" of "DataFrame" has incompatible type "Union
# [List[Any], ExtensionArray, ndarray[Any, Any], Index, Series]"; expected
# "Hashable"
return indexed.unstack(columns_listlike) # type: ignore[arg-type]
result = indexed.unstack(columns_listlike) # type: ignore[arg-type]
result.index.names = [
name if name is not lib.NoDefault else None for name in result.index.names
]

return result


def crosstab(
Expand Down
50 changes: 49 additions & 1 deletion pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2341,7 +2341,7 @@ def test_pivot_index_list_values_none_immutable_args(self):
)
index = ["lev1", "lev2"]
columns = ["lev3"]
result = df.pivot(index=index, columns=columns, values=None)
result = df.pivot(index=index, columns=columns)

expected = DataFrame(
np.array(
Expand All @@ -2365,3 +2365,51 @@ def test_pivot_index_list_values_none_immutable_args(self):

assert index == ["lev1", "lev2"]
assert columns == ["lev3"]

def test_pivot_columns_not_given(self):
# GH#48293
df = DataFrame({"a": [1], "b": 1})
with pytest.raises(TypeError, match="missing 1 required argument"):
df.pivot()

def test_pivot_columns_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})
result = df.pivot(columns=None)
expected = DataFrame({("b", 1): [2], ("c", 1): 3})
tm.assert_frame_equal(result, expected)

result = df.pivot(columns=None, index="b")
expected = DataFrame({("c", 1): 3}, index=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

result = df.pivot(columns=None, index="b", values="c")
expected = DataFrame({1: 3}, index=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

def test_pivot_index_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})

result = df.pivot(columns="b", index=None)
expected = DataFrame({("c", 2): 3}, index=[1])
expected.columns.names = [None, "b"]
tm.assert_frame_equal(result, expected)

result = df.pivot(columns="b", index=None, values="c")
expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

def test_pivot_values_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})

result = df.pivot(columns="b", index="c", values=None)
expected = DataFrame(
1, index=Index([3], name="c"), columns=Index([2], name="b")
)
tm.assert_frame_equal(result, expected)

result = df.pivot(columns="b", values=None)
expected = DataFrame(1, index=[0], columns=Index([2], name="b"))
tm.assert_frame_equal(result, expected)
8 changes: 5 additions & 3 deletions pandas/tests/reshape/test_pivot_multilevel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._libs import lib

import pandas as pd
from pandas import (
Index,
Expand Down Expand Up @@ -33,7 +35,7 @@
(
["lev4"],
"lev3",
None,
lib.NoDefault,
[
[1.0, np.nan, 1.0, np.nan, 0.0, np.nan],
[np.nan, 1.0, np.nan, 1.0, np.nan, 1.0],
Expand Down Expand Up @@ -70,7 +72,7 @@
(
["lev1", "lev2"],
"lev3",
None,
lib.NoDefault,
[[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]],
MultiIndex.from_tuples(
[("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)],
Expand Down Expand Up @@ -243,7 +245,7 @@ def test_pivot_df_multiindex_index_none():
)
df = df.set_index(["index_1", "index_2"])

result = df.pivot(index=None, columns="label", values="value")
result = df.pivot(columns="label", values="value")
expected = pd.DataFrame(
[[1.0, np.nan], [np.nan, 2.0], [3.0, np.nan], [np.nan, 4.0]],
index=df.index,
Expand Down