Skip to content

Commit

Permalink
Fix GH 14922
Browse files Browse the repository at this point in the history
    having the int equivalent of NaT in an int64 column caused wrong
    sorting because this special value was considered as "missing
    value".
  • Loading branch information
uweschmitt committed Dec 21, 2016
1 parent 3ccb501 commit 1afdbb8
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
3 changes: 2 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):

table = hash_klass(size_hint or len(vals))
uniques = vec_klass()
labels = table.get_labels(vals, uniques, 0, na_sentinel, True)
check_nulls = not is_integer_dtype(values)
labels = table.get_labels(vals, uniques, 0, na_sentinel, check_nulls)

labels = _ensure_platform_int(labels)

Expand Down
20 changes: 18 additions & 2 deletions pandas/tests/frame/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

from pandas.compat import lrange
from pandas import (DataFrame, Series, MultiIndex, Timestamp,
date_range)
date_range, NaT)

from pandas.util.testing import (assert_series_equal,
assert_frame_equal,
assertRaisesRegexp)
assertRaisesRegexp,
is_sorted)

import pandas.util.testing as tm

Expand Down Expand Up @@ -491,3 +492,18 @@ def test_frame_column_inplace_sort_exception(self):

cp = s.copy()
cp.sort_values() # it works!

def test_sort_nat_values_in_int_column(self):

# GH 14922, sorting with large float and multiple columns incorrect
int_values = (2, int(NaT))
float_values = (2.0, -1.797693e308)

df = DataFrame(dict(int=int_values, float=float_values),
columns=["int", "float"])

df_sorted = df.sort_values(["int", "float"])
df_expected = DataFrame(dict(int=int_values[::-1], float=float_values[::-1]),
columns=["int", "float"], index=[1, 0])

assert_frame_equal(df_sorted, df_expected)

0 comments on commit 1afdbb8

Please sign in to comment.