Skip to content

Commit

Permalink
fix indexes dropna=false
Browse files Browse the repository at this point in the history
  • Loading branch information
OXPHOS committed Jan 3, 2017
1 parent 0f38f43 commit 2e3f8e0
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 10 deletions.
36 changes: 34 additions & 2 deletions pandas/tools/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandas.tools.util import cartesian_product
from pandas.compat import range, lrange, zip
from pandas import compat
from pandas import isnull
import pandas.core.common as com
import numpy as np

Expand Down Expand Up @@ -81,9 +82,21 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
DataFrame.pivot : pivot without aggregation that can handle
non-numeric data
"""
pd_null = "_null_pd"

index = _convert_by(index)
columns = _convert_by(columns)

keys = index + columns

if not dropna:
key_data = np.array(data[keys], dtype='object')
_data_null_idx = isnull(key_data)
_data_null_val = key_data[_data_null_idx]
key_data[_data_null_idx] = pd_null
for idx, k in enumerate(keys):
data[k] = key_data[:, idx]

if isinstance(aggfunc, list):
pieces = []
keys = []
Expand All @@ -96,8 +109,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
keys.append(func.__name__)
return concat(pieces, keys=keys, axis=1)

keys = index + columns

values_passed = values is not None
if values_passed:
if is_list_like(values):
Expand Down Expand Up @@ -180,6 +191,27 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
if len(index) == 0 and len(columns) > 0:
table = table.T

if not dropna:
if _data_null_val.size > 0:
def _convert_null_vals(indexes):
if isinstance(indexes, MultiIndex):
_new_level = []
for _tmp_index in indexes.levels:
tmp = np.array(_tmp_index)
tmp[tmp == pd_null] = _data_null_val[0]
_new_level.append(Index(tmp, name=_tmp_index.name))
indexes = MultiIndex(levels=_new_level,
labels=indexes.labels,
names=indexes.names)
else:
tmp = np.array(indexes)
tmp[tmp == pd_null] = _data_null_val[0]
indexes = Index(tmp, name=indexes.name)
return indexes

table.columns = _convert_null_vals(table.columns)
table.index = _convert_null_vals(table.index)

return table


Expand Down
37 changes: 29 additions & 8 deletions pandas/tools/tests/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,26 @@ def test_pivot_table_dropna(self):
tm.assert_index_equal(pv_col.columns, m)
tm.assert_index_equal(pv_ind.index, m)

df = DataFrame([[1, 'a', 'A'], [1, 'b', 'B'], [1, 'c', None]],
columns=['x', 'y', 'z'])
actual = df.pivot_table(values='x', index='y', columns='z',
aggfunc='sum', fill_value=0, margins=True,
dropna=True)
expected = pd.DataFrame([[1.0, 0.0, 1.0], [0.0, 1.0, 1.0],
[1.0, 1.0, 2.0]])
expected.index = Index(['a', 'b', 'All'], name='y')
expected.columns = Index(['A', 'B', 'All'], name='z')
tm.assert_frame_equal(actual, expected)

actual = df.pivot_table(values='x', index='y', columns='z',
aggfunc='sum', fill_value=0, margins=True,
dropna=False)
expected = pd.DataFrame([[1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 1.0],
[0.0, 0.0, 1.0, 1.0], [1.0, 1.0, 1.0, 3.0]])
expected.index = Index(['a', 'b', 'c', 'All'], name='y')
expected.columns = Index(['A', 'B', None, 'All'], name='z')
tm.assert_frame_equal(actual, expected)

def test_pass_array(self):
result = self.data.pivot_table(
'D', index=self.data.A, columns=self.data.C)
Expand Down Expand Up @@ -1080,7 +1100,8 @@ def test_margin_dropna(self):
df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
'b': [3, np.nan, 4, 4, 4, 4]})
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
expected = pd.DataFrame([[1, 0, 0, 1], [0, 1, 0, 1], [0, 3, 1, 4], [1, 4, 1, 6]])
expected = pd.DataFrame([[1, 0, 0, 1], [0, 1, 0, 1], [0, 3, 1, 4],
[1, 4, 1, 6]])
expected.index = Index([1.0, 2.0, np.nan, 'All'], name='a')
expected.columns = Index([3.0, 4.0, np.nan, 'All'], name='b')
tm.assert_frame_equal(actual, expected)
Expand All @@ -1095,8 +1116,9 @@ def test_margin_dropna(self):
actual = pd.crosstab(a, [b, c], rownames=['a'],
colnames=['b', 'c'], margins=True, dropna=False)

m = MultiIndex(levels = [Index(['All', np.nan, 'one', 'two']),
Index(['', 'dull', 'shiny'])], labels = [[1, 1, 2, 2, 3, 3, 0],
m = MultiIndex(levels=[Index(['All', np.nan, 'one', 'two']),
Index(['', 'dull', 'shiny'])],
labels=[[1, 1, 2, 2, 3, 3, 0],
[1, 2, 1, 2, 1, 2, 0]], names=['b', 'c'])
expected = DataFrame([[0, 0, 1, 0, 1, 0, 2], [0, 1, 2, 0, 1, 1, 5],
[0, 1, 3, 0, 2, 1, 7]], columns=m)
Expand All @@ -1105,13 +1127,12 @@ def test_margin_dropna(self):

actual = pd.crosstab([a, b], c, rownames=['a', 'b'],
colnames=['c'], margins=True, dropna=False)

print actual.index
m = MultiIndex(levels=[['All', 'bar', 'foo'], ['', np.nan, 'one', 'two']],
m = MultiIndex(levels=[['All', 'bar', 'foo'],
['', np.nan, 'one', 'two']],
labels=[[1, 1, 1, 2, 2, 2, 0], [1, 2, 3, 1, 2, 3, 0]],
names=['a', 'b'])
expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1], [2, 0, 2], [1, 1, 2],
[5, 2, 7]], index=m)
expected = DataFrame([[0, 0, 0], [1, 0, 1], [1, 0, 1], [0, 1, 1],
[2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m)
expected.columns = Index(['dull', 'shiny', 'All'], name='c')
tm.assert_frame_equal(actual, expected)

Expand Down

0 comments on commit 2e3f8e0

Please sign in to comment.