Skip to content

Commit

Permalink
BUG: fix int32 overflow when computing group_index, GH #850
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 3, 2012
1 parent 89bdb1e commit de81cc1
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1472,7 +1472,7 @@ def get_group_index(label_list, shape):
mask = np.zeros(n, dtype=bool)
for i in xrange(len(shape)):
stride = np.prod([x for x in shape[i+1:]], dtype=int)
group_index += label_list[i] * stride
group_index += com._ensure_int64(label_list[i]) * stride
mask |= label_list[i] < 0

np.putmask(group_index, mask, -1)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1457,6 +1457,15 @@ def test_groupby_2d_malformed(self):
self.assert_(np.array_equal(tmp.columns, ['zeros', 'ones']))
self.assert_(np.array_equal(tmp.values, res_values))

def test_int32_overflow(self):
B = np.concatenate((np.arange(100000), np.arange(100000),
np.arange(50000)))
A = np.arange(250000)
df = DataFrame({'A' : A, 'B' : B, 'C' : np.random.randn(250000)})

left = df.groupby(['A', 'B']).sum()
right = df.groupby(['B', 'A']).sum()
self.assert_(len(left) == len(right))

def test_decons():
from pandas.core.groupby import decons_group_index, get_group_index
Expand Down
25 changes: 16 additions & 9 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,24 @@ def hist(data, column, by=None, ax=None, fontsize=None):
return ax

def grouped_hist(data, column=None, by=None, ax=None, bins=50, log=False,
figsize=None, layout=None):
figsize=None, layout=None, sharex=False, sharey=False,
rot=90):
"""
Returns
-------
fig : matplotlib.Figure
"""
if isinstance(data, DataFrame):
data = data[column]
# if isinstance(data, DataFrame):
# data = data[column]

def plot_group(group, ax):
ax.hist(group.dropna(), bins=bins)
fig, axes = _grouped_plot(plot_group, data, by=by, sharex=False,
sharey=False, figsize=figsize,
layout=layout)

fig, axes = _grouped_plot(plot_group, data, column=column,
by=by, sharex=sharex, sharey=sharey,
figsize=figsize, layout=layout,
rot=rot)
fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9,
hspace=0.3, wspace=0.2)
return fig
Expand All @@ -45,7 +48,7 @@ def plot_group(group, ax):
def boxplot(data, column=None, by=None, ax=None, fontsize=None,
rot=0, grid=True, figsize=None):
"""
Make a box plot from DataFrame column optionally grouped by some columns or
Make a box plot from DataFrame column optionally grouped b ysome columns or
other inputs
Parameters
Expand Down Expand Up @@ -142,15 +145,19 @@ def plot_group(group, ax):

return fig

def _grouped_plot(plotf, data, by=None, numeric_only=True, figsize=None,
sharex=True, sharey=True, layout=None):
def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
figsize=None, sharex=True, sharey=True, layout=None,
rot=0):
import matplotlib.pyplot as plt

# allow to specify mpl default with 'default'
if figsize is None or figsize == 'default':
figsize = (10, 5) # our default

grouped = data.groupby(by)
if column is not None:
grouped = grouped[column]

ngroups = len(grouped)

nrows, ncols = layout or _get_layout(ngroups)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@

MAJOR = 0
MINOR = 7
MICRO = 1
ISRELEASED = True
MICRO = 2
ISRELEASED = False
VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
QUALIFIER = ''

Expand Down

0 comments on commit de81cc1

Please sign in to comment.