Skip to content

Commit

Permalink
BUG: cut/qcut bin formatting bugs. close #1979
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Nov 3, 2012
1 parent 5907966 commit 99616eb
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 9 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pandas 0.9.1

**Bug fixes**

- Fix bar plot color cycle issues (#2082)
- Implement comparisons on date offsets with fixed delta (#2078)
- Handle inf/-inf correctly in read_* parser functions (#2041)
- Fix matplotlib unicode interaction bug
Expand All @@ -66,6 +67,7 @@ pandas 0.9.1
- Don't lose index names in Panel.to_frame/DataFrame.to_panel (#2163)
- Work around length-0 boolean indexing NumPy bug (#2096)
- Fix partial integer indexing bug in DataFrame.xs (#2107)
- Fix variety of cut/qcut string-bin formatting bugs (#1978, #1979)
pandas 0.9.0
============
Expand Down
1 change: 1 addition & 0 deletions pandas/tools/tests/cut_data.csv

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions pandas/tools/tests/test_tile.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import nose
import unittest

Expand Down Expand Up @@ -180,6 +181,43 @@ def test_label_formatting(self):

result = cut(np.arange(11.) / 1e10, 2)

# #1979, negative numbers

result = tmod._format_label(-117.9998, precision=3)
self.assertEquals(result, '-118')
result = tmod._format_label(117.9998, precision=3)
self.assertEquals(result, '118')


def test_qcut_binning_issues(self):
# #1978, 1979
path = os.path.join(curpath(), 'cut_data.csv')

arr = np.loadtxt(path)

result = qcut(arr, 20)

starts = []
ends = []
for lev in result.levels:
s, e = lev[1:-1].split(',')

self.assertTrue(s != e)

starts.append(float(s))
ends.append(float(e))

for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]),
zip(ends[:-1], ends[1:])):
self.assertTrue(sp < sn)
self.assertTrue(ep < en)
self.assertTrue(ep <= sn)

def curpath():
pth, _ = os.path.split(os.path.abspath(__file__))
return pth


if __name__ == '__main__':
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
exit=False)
Expand Down
52 changes: 43 additions & 9 deletions pandas/tools/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,18 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,

if labels is not False:
if labels is None:
fmt = lambda v: _format_label(v, precision=precision)
if right:
levels = ['(%s, %s]' % (fmt(a), fmt(b))
for a, b in zip(bins, bins[1:])]
if include_lowest:
levels[0] = '[' + levels[0][1:]
else:
levels = ['[%s, %s)' % (fmt(a), fmt(b))
for a, b in zip(bins, bins[1:])]
increases = 0
while True:
try:
levels = _format_levels(bins, precision, right=right,
include_lowest=include_lowest)
except ValueError:
increases += 1
precision += 1
if increases >= 20:
raise
else:
break

else:
if len(labels) != len(bins) - 1:
Expand All @@ -187,6 +190,29 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,

return fac, bins

def _format_levels(bins, prec, right=True,
include_lowest=False):
fmt = lambda v: _format_label(v, precision=prec)
if right:
levels = []
for a, b in zip(bins, bins[1:]):
fa, fb = fmt(a), fmt(b)

if a != b and fa == fb:
raise ValueError('precision too low')

formatted = '(%s, %s]' % (fa, fb)

levels.append(formatted)

if include_lowest:
levels[0] = '[' + levels[0][1:]
else:
levels = ['[%s, %s)' % (fmt(a), fmt(b))
for a, b in zip(bins, bins[1:])]

return levels


def _format_label(x, precision=3):
fmt_str = '%%.%dg' % precision
Expand All @@ -196,6 +222,14 @@ def _format_label(x, precision=3):
whole = abs(whole)
if frac != 0.0:
val = fmt_str % frac

# rounded up or down
if '.' not in val:
if x < 0:
return '%d' % (-whole - 1)
else:
return '%d' % (whole + 1)

if 'e' in val:
return _trim_zeros(fmt_str % x)
else:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
'tests/*.xls',
'tests/*.xlsx',
'tests/*.table'],
'pandas.tools': ['tests/*.csv'],
'pandas.tests' : ['data/*.pickle',
'data/*.csv'],
'pandas.tseries.tests' : ['data/*.pickle',
Expand Down

0 comments on commit 99616eb

Please sign in to comment.