Skip to content

Commit

Permalink
API/ENH: IntervalIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
shoyer authored and jreback committed Mar 20, 2017
1 parent 771e36c commit a4b82bd
Show file tree
Hide file tree
Showing 18 changed files with 3,376 additions and 160 deletions.
3 changes: 0 additions & 3 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ PyDateTime_IMPORT
cdef extern from "Python.h":
int PySlice_Check(object)

cdef size_t _INIT_VEC_CAP = 128


include "hashtable_class_helper.pxi"
include "hashtable_func_helper.pxi"

Expand Down
6 changes: 5 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ cpdef bint isscalar(object val):
- instances of datetime.timedelta
- Period
- instances of decimal.Decimal
- Interval
"""

Expand All @@ -327,7 +328,8 @@ cpdef bint isscalar(object val):
or PyDelta_Check(val)
or PyTime_Check(val)
or util.is_period_object(val)
or is_decimal(val))
or is_decimal(val),
or is_interval(val))


def item_from_zerodim(object val):
Expand Down Expand Up @@ -1965,4 +1967,6 @@ cdef class BlockPlacement:

include "reduce.pyx"
include "properties.pyx"
include "interval.pyx"
include "intervaltree.pyx"
include "inference.pyx"
21 changes: 21 additions & 0 deletions pandas/_libs/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,10 @@ def infer_dtype(object _values):
if is_period_array(values):
return 'period'

elif is_interval(val):
if is_interval_array_fixed_closed(values):
return 'interval'

for i in range(n):
val = util.get_value_1d(values, i)
if (util.is_integer_object(val) and
Expand Down Expand Up @@ -742,6 +746,23 @@ cpdef bint is_period_array(ndarray[object] values):
return False
return null_count != n

cdef inline bint is_interval(object o):
return isinstance(o, Interval)

def is_interval_array_fixed_closed(ndarray[object] values):
cdef Py_ssize_t i, n = len(values)
cdef str closed
if n == 0:
return False
for i in range(n):
if not is_interval(values[i]):
return False
if i == 0:
closed = values[0].closed
elif closed != values[i].closed:
return False
return True


cdef extern from "parse_helper.h":
inline int floatify(object, double *result, int *maybe_int) except -1
Expand Down
1 change: 0 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,6 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
cat, bins = cut(values, bins, retbins=True)
except TypeError:
raise TypeError("bins argument only works with numeric data.")
values = cat.codes

if is_extension_type(values) and not is_datetimetz(values):
# handle Categorical and sparse,
Expand Down
1 change: 1 addition & 0 deletions pandas/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
UInt64Index, RangeIndex, Float64Index,
MultiIndex)
from pandas.core.interval import Interval, IntervalIndex

from pandas.core.series import Series
from pandas.core.frame import DataFrame
Expand Down
24 changes: 16 additions & 8 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@
from pandas.core.categorical import Categorical
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
from pandas.core.index import (Index, MultiIndex, CategoricalIndex,
_ensure_index)
from pandas.core.interval import IntervalIndex
from pandas.core.internals import BlockManager, make_block
from pandas.core.series import Series
from pandas.core.panel import Panel
Expand Down Expand Up @@ -3059,12 +3058,20 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
if bins is None:
lab, lev = algorithms.factorize(val, sort=True)
else:
cat, bins = cut(val, bins, retbins=True)
raise NotImplementedError('this is broken')
lab, bins = cut(val, bins, retbins=True)
# bins[:-1] for backward compat;
# o.w. cat.categories could be better
lab, lev, dropna = cat.codes, bins[:-1], False

sorter = np.lexsort((lab, ids))
# cat = Categorical(cat)
# lab, lev, dropna = cat.codes, bins[:-1], False

if (lab.dtype == object
and lib.is_interval_array_fixed_closed(lab[notnull(lab)])):
lab_index = Index(lab)
assert isinstance(lab, IntervalIndex)
sorter = np.lexsort((lab_index.left, lab_index.right, ids))
else:
sorter = np.lexsort((lab, ids))
ids, lab = ids[sorter], lab[sorter]

# group boundaries are where group ids change
Expand Down Expand Up @@ -3105,12 +3112,13 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
acc = rep(d)
out /= acc

if sort and bins is None:
if sort: # and bins is None:
cat = ids[inc][mask] if dropna else ids[inc]
sorter = np.lexsort((out if ascending else -out, cat))
out, labels[-1] = out[sorter], labels[-1][sorter]

if bins is None:
# if bins is None:
if True:
mi = MultiIndex(levels=levels, labels=labels, names=names,
verify_integrity=False)

Expand Down
Loading

0 comments on commit a4b82bd

Please sign in to comment.