Skip to content

Commit

Permalink
BUG: DataFrame.pivot not respecting None as column name (#48293)
Browse files Browse the repository at this point in the history
* BUG: DataFrame.pivot not respecting None as column name

* Add gh ref

* Fix test

* Add comment

* Update doc/source/whatsnew/v1.6.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
  • Loading branch information
phofl and mroeschke committed Sep 12, 2022
1 parent 9f9d80f commit ef32d4c
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 14 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ Groupby/resample/rolling

Reshaping
^^^^^^^^^
- Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`)
- Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`)
-

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8583,7 +8583,9 @@ def groupby(
@Substitution("")
@Appender(_shared_docs["pivot"])
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
def pivot(self, index=None, columns=None, values=None) -> DataFrame:
def pivot(
self, index=lib.NoDefault, columns=lib.NoDefault, values=lib.NoDefault
) -> DataFrame:
from pandas.core.reshape.pivot import pivot

return pivot(self, index=index, columns=columns, values=values)
Expand Down
31 changes: 22 additions & 9 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import numpy as np

from pandas._libs import lib
from pandas._typing import (
AggFuncType,
AggFuncTypeBase,
Expand Down Expand Up @@ -482,30 +483,37 @@ def _convert_by(by):
@deprecate_nonkeyword_arguments(version=None, allowed_args=["data"])
def pivot(
data: DataFrame,
index: IndexLabel | None = None,
columns: IndexLabel | None = None,
values: IndexLabel | None = None,
index: IndexLabel | lib.NoDefault = lib.NoDefault,
columns: IndexLabel | lib.NoDefault = lib.NoDefault,
values: IndexLabel | lib.NoDefault = lib.NoDefault,
) -> DataFrame:
if columns is None:
if columns is lib.NoDefault:
raise TypeError("pivot() missing 1 required argument: 'columns'")

columns_listlike = com.convert_to_list_like(columns)

# If columns is None we will create a MultiIndex level with None as name
# which might cause duplicated names because None is the default for
# level names
data.index.names = [
name if name is not None else lib.NoDefault for name in data.index.names
]

indexed: DataFrame | Series
if values is None:
if index is not None:
if values is lib.NoDefault:
if index is not lib.NoDefault:
cols = com.convert_to_list_like(index)
else:
cols = []

append = index is None
append = index is lib.NoDefault
# error: Unsupported operand types for + ("List[Any]" and "ExtensionArray")
# error: Unsupported left operand type for + ("ExtensionArray")
indexed = data.set_index(
cols + columns_listlike, append=append # type: ignore[operator]
)
else:
if index is None:
if index is lib.NoDefault:
if isinstance(data.index, MultiIndex):
# GH 23955
index_list = [
Expand All @@ -531,7 +539,12 @@ def pivot(
# error: Argument 1 to "unstack" of "DataFrame" has incompatible type "Union
# [List[Any], ExtensionArray, ndarray[Any, Any], Index, Series]"; expected
# "Hashable"
return indexed.unstack(columns_listlike) # type: ignore[arg-type]
result = indexed.unstack(columns_listlike) # type: ignore[arg-type]
result.index.names = [
name if name is not lib.NoDefault else None for name in result.index.names
]

return result


def crosstab(
Expand Down
50 changes: 49 additions & 1 deletion pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2341,7 +2341,7 @@ def test_pivot_index_list_values_none_immutable_args(self):
)
index = ["lev1", "lev2"]
columns = ["lev3"]
result = df.pivot(index=index, columns=columns, values=None)
result = df.pivot(index=index, columns=columns)

expected = DataFrame(
np.array(
Expand All @@ -2365,3 +2365,51 @@ def test_pivot_index_list_values_none_immutable_args(self):

assert index == ["lev1", "lev2"]
assert columns == ["lev3"]

def test_pivot_columns_not_given(self):
# GH#48293
df = DataFrame({"a": [1], "b": 1})
with pytest.raises(TypeError, match="missing 1 required argument"):
df.pivot()

def test_pivot_columns_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})
result = df.pivot(columns=None)
expected = DataFrame({("b", 1): [2], ("c", 1): 3})
tm.assert_frame_equal(result, expected)

result = df.pivot(columns=None, index="b")
expected = DataFrame({("c", 1): 3}, index=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

result = df.pivot(columns=None, index="b", values="c")
expected = DataFrame({1: 3}, index=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

def test_pivot_index_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})

result = df.pivot(columns="b", index=None)
expected = DataFrame({("c", 2): 3}, index=[1])
expected.columns.names = [None, "b"]
tm.assert_frame_equal(result, expected)

result = df.pivot(columns="b", index=None, values="c")
expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
tm.assert_frame_equal(result, expected)

def test_pivot_values_is_none(self):
# GH#48293
df = DataFrame({None: [1], "b": 2, "c": 3})

result = df.pivot(columns="b", index="c", values=None)
expected = DataFrame(
1, index=Index([3], name="c"), columns=Index([2], name="b")
)
tm.assert_frame_equal(result, expected)

result = df.pivot(columns="b", values=None)
expected = DataFrame(1, index=[0], columns=Index([2], name="b"))
tm.assert_frame_equal(result, expected)
8 changes: 5 additions & 3 deletions pandas/tests/reshape/test_pivot_multilevel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._libs import lib

import pandas as pd
from pandas import (
Index,
Expand Down Expand Up @@ -33,7 +35,7 @@
(
["lev4"],
"lev3",
None,
lib.NoDefault,
[
[1.0, np.nan, 1.0, np.nan, 0.0, np.nan],
[np.nan, 1.0, np.nan, 1.0, np.nan, 1.0],
Expand Down Expand Up @@ -70,7 +72,7 @@
(
["lev1", "lev2"],
"lev3",
None,
lib.NoDefault,
[[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]],
MultiIndex.from_tuples(
[("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)],
Expand Down Expand Up @@ -243,7 +245,7 @@ def test_pivot_df_multiindex_index_none():
)
df = df.set_index(["index_1", "index_2"])

result = df.pivot(index=None, columns="label", values="value")
result = df.pivot(columns="label", values="value")
expected = pd.DataFrame(
[[1.0, np.nan], [np.nan, 2.0], [3.0, np.nan], [np.nan, 4.0]],
index=df.index,
Expand Down

0 comments on commit ef32d4c

Please sign in to comment.