Skip to content

Commit

Permalink
API/ENH: IntervalIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
shoyer authored and jreback committed Feb 8, 2017
1 parent 704cdbf commit 455b3fd
Show file tree
Hide file tree
Showing 19 changed files with 3,395 additions and 153 deletions.
1 change: 0 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
cat, bins = cut(values, bins, retbins=True)
except TypeError:
raise TypeError("bins argument only works with numeric data.")
values = cat.codes

if is_extension_type(values) and not is_datetimetz(values):
# handle Categorical and sparse,
Expand Down
1 change: 1 addition & 0 deletions pandas/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
UInt64Index, RangeIndex, Float64Index,
MultiIndex)
from pandas.core.interval import Interval, IntervalIndex

from pandas.core.series import Series, TimeSeries
from pandas.core.frame import DataFrame
Expand Down
24 changes: 16 additions & 8 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@
from pandas.core.categorical import Categorical
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
from pandas.core.index import (Index, MultiIndex, CategoricalIndex,
_ensure_index)
from pandas.core.interval import IntervalIndex
from pandas.core.internals import BlockManager, make_block
from pandas.core.series import Series
from pandas.core.panel import Panel
Expand Down Expand Up @@ -3065,12 +3064,20 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
if bins is None:
lab, lev = algos.factorize(val, sort=True)
else:
cat, bins = cut(val, bins, retbins=True)
raise NotImplementedError('this is broken')
lab, bins = cut(val, bins, retbins=True)
# bins[:-1] for backward compat;
# o.w. cat.categories could be better
lab, lev, dropna = cat.codes, bins[:-1], False

sorter = np.lexsort((lab, ids))
# cat = Categorical(cat)
# lab, lev, dropna = cat.codes, bins[:-1], False

if (lab.dtype == object
and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
lab_index = Index(lab)
assert isinstance(lab, IntervalIndex)
sorter = np.lexsort((lab_index.left, lab_index.right, ids))
else:
sorter = np.lexsort((lab, ids))
ids, lab = ids[sorter], lab[sorter]

# group boundaries are where group ids change
Expand Down Expand Up @@ -3111,12 +3118,13 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
acc = rep(d)
out /= acc

if sort and bins is None:
if sort: # and bins is None:
cat = ids[inc][mask] if dropna else ids[inc]
sorter = np.lexsort((out if ascending else -out, cat))
out, labels[-1] = out[sorter], labels[-1][sorter]

if bins is None:
# if bins is None:
if True:
mi = MultiIndex(levels=levels, labels=labels, names=names,
verify_integrity=False)

Expand Down
Loading

0 comments on commit 455b3fd

Please sign in to comment.