Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pivot table drops column/index names=nan when dropna=false #14246

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions pandas/tools/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandas.tools.util import cartesian_product
from pandas.compat import range, lrange, zip
from pandas import compat
from pandas import isnull
import pandas.core.common as com
import numpy as np

Expand Down Expand Up @@ -81,9 +82,21 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
DataFrame.pivot : pivot without aggregation that can handle
non-numeric data
"""
pd_null = "_null_pd"

index = _convert_by(index)
columns = _convert_by(columns)

keys = index + columns

if not dropna:
key_data = np.array(data[keys], dtype='object')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what the heck is this?

Copy link
Contributor Author

@OXPHOS OXPHOS Jan 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback converting NaN values in keys to special strings to avoid the passing of dropna around.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is not acceptable, we use masks if needed. converting things like that will just lead to future reader confusion and bugs.

_data_null_idx = isnull(key_data)
_data_null_val = key_data[_data_null_idx]
key_data[_data_null_idx] = pd_null
for idx, k in enumerate(keys):
data[k] = key_data[:, idx]

if isinstance(aggfunc, list):
pieces = []
keys = []
Expand All @@ -96,8 +109,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
keys.append(func.__name__)
return concat(pieces, keys=keys, axis=1)

keys = index + columns

values_passed = values is not None
if values_passed:
if is_list_like(values):
Expand Down Expand Up @@ -180,6 +191,27 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
if len(index) == 0 and len(columns) > 0:
table = table.T

if not dropna:
if _data_null_val.size > 0:
def _convert_null_vals(indexes):
if isinstance(indexes, MultiIndex):
_new_level = []
for _tmp_index in indexes.levels:
tmp = np.array(_tmp_index)
tmp[tmp == pd_null] = _data_null_val[0]
_new_level.append(Index(tmp, name=_tmp_index.name))
indexes = MultiIndex(levels=_new_level,
labels=indexes.labels,
names=indexes.names)
else:
tmp = np.array(indexes)
tmp[tmp == pd_null] = _data_null_val[0]
indexes = Index(tmp, name=indexes.name)
return indexes

table.columns = _convert_null_vals(table.columns)
table.index = _convert_null_vals(table.index)

return table


Expand Down
56 changes: 40 additions & 16 deletions pandas/tools/tests/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,26 @@ def test_pivot_table_dropna(self):
tm.assert_index_equal(pv_col.columns, m)
tm.assert_index_equal(pv_ind.index, m)

df = DataFrame([[1, 'a', 'A'], [1, 'b', 'B'], [1, 'c', None]],
columns=['x', 'y', 'z'])
actual = df.pivot_table(values='x', index='y', columns='z',
aggfunc='sum', fill_value=0, margins=True,
dropna=True)
expected = pd.DataFrame([[1.0, 0.0, 1.0], [0.0, 1.0, 1.0],
[1.0, 1.0, 2.0]])
expected.index = Index(['a', 'b', 'All'], name='y')
expected.columns = Index(['A', 'B', 'All'], name='z')
tm.assert_frame_equal(actual, expected)

actual = df.pivot_table(values='x', index='y', columns='z',
aggfunc='sum', fill_value=0, margins=True,
dropna=False)
expected = pd.DataFrame([[1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 1.0],
[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 1.0, 3.0]])
expected.index = Index(['a', 'b', 'c', 'All'], name='y')
expected.columns = Index(['A', 'B', None, 'All'], name='z')
tm.assert_frame_equal(actual, expected)

def test_pass_array(self):
result = self.data.pivot_table(
'D', index=self.data.A, columns=self.data.C)
Expand Down Expand Up @@ -1072,17 +1092,18 @@ def test_margin_dropna(self):
df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan],
'b': [3, 3, 4, 4, 4, 4]})
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
expected.index = Index([1.0, 2.0, 'All'], name='a')
expected.columns = Index([3, 4, 'All'], name='b')
expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [0, 1, 1], [2, 4, 6]])
expected.index = Index([1.0, 2.0, np.nan, 'All'], name='a')
expected.columns = Index([3.0, 4.0, 'All'], name='b')
tm.assert_frame_equal(actual, expected)

df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
'b': [3, np.nan, 4, 4, 4, 4]})
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
expected.index = Index([1.0, 2.0, 'All'], name='a')
expected.columns = Index([3.0, 4.0, 'All'], name='b')
expected = pd.DataFrame([[1, 0, 0, 1], [0, 1, 0, 1], [0, 3, 1, 4],
[1, 4, 1, 6]])
expected.index = Index([1.0, 2.0, np.nan, 'All'], name='a')
expected.columns = Index([3.0, 4.0, np.nan, 'All'], name='b')
tm.assert_frame_equal(actual, expected)

a = np.array(['foo', 'foo', 'foo', 'bar',
Expand All @@ -1094,21 +1115,24 @@ def test_margin_dropna(self):

actual = pd.crosstab(a, [b, c], rownames=['a'],
colnames=['b', 'c'], margins=True, dropna=False)
m = MultiIndex.from_arrays([['one', 'one', 'two', 'two', 'All'],
['dull', 'shiny', 'dull', 'shiny', '']],
names=['b', 'c'])
expected = DataFrame([[1, 0, 1, 0, 2], [2, 0, 1, 1, 5],
[3, 0, 2, 1, 7]], columns=m)

m = MultiIndex(levels=[Index(['All', np.nan, 'one', 'two']),
Index(['', 'dull', 'shiny'])],
labels=[[1, 1, 2, 2, 3, 3, 0],
[1, 2, 1, 2, 1, 2, 0]], names=['b', 'c'])
expected = DataFrame([[0, 0, 1, 0, 1, 0, 2], [0, 1, 2, 0, 1, 1, 5],
[0, 1, 3, 0, 2, 1, 7]], columns=m)
expected.index = Index(['bar', 'foo', 'All'], name='a')
tm.assert_frame_equal(actual, expected)

actual = pd.crosstab([a, b], c, rownames=['a', 'b'],
colnames=['c'], margins=True, dropna=False)
m = MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo', 'All'],
['one', 'two', 'one', 'two', '']],
names=['a', 'b'])
expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2],
[5, 2, 7]], index=m)
m = MultiIndex(levels=[['All', 'bar', 'foo'],
['', np.nan, 'one', 'two']],
labels=[[1, 1, 1, 2, 2, 2, 0], [1, 2, 3, 1, 2, 3, 0]],
names=['a', 'b'])
expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1],
[2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m)
expected.columns = Index(['dull', 'shiny', 'All'], name='c')
tm.assert_frame_equal(actual, expected)

Expand Down