Skip to content

Commit

Permalink
BUG: DataFrame sort_values and multiple "by" columns fails to order N…
Browse files Browse the repository at this point in the history
…aT correctly

closes pandas-dev#16836

Author: Jean-Mathieu Deschenes <jean-mathieu.deschenes@cae.com>

This patch had conflicts when merged, resolved by
Committer: Jeff Reback <jeff@reback.net>

Closes pandas-dev#16995 from jdeschenes/datetime_sort_issues and squashes the following commits:

257e10a [Jean-Mathieu Deschenes] Changes requested by @jreback
c6d55e2 [Jean-Mathieu Deschenes] Fix for pandas-dev#16836
  • Loading branch information
Jean-Mathieu Deschenes authored and alanbato committed Nov 10, 2017
1 parent 55516a8 commit 87f69e5
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,7 @@ Reshaping
- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`)
- Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`)

Numeric
^^^^^^^
Expand Down
7 changes: 1 addition & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3453,18 +3453,13 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
if len(by) > 1:
from pandas.core.sorting import lexsort_indexer

def trans(v):
if needs_i8_conversion(v):
return v.view('i8')
return v

keys = []
for x in by:
k = self.xs(x, axis=other_axis).values
if k.ndim == 2:
raise ValueError('Cannot sort by duplicate column %s' %
str(x))
keys.append(trans(k))
keys.append(k)
indexer = lexsort_indexer(keys, orders=ascending,
na_position=na_position)
indexer = _ensure_platform_int(indexer)
Expand Down
29 changes: 28 additions & 1 deletion pandas/tests/frame/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ def test_sort_datetimes(self):
df2 = df.sort_values(by=['B'])
assert_frame_equal(df1, df2)

df1 = df.sort_values(by='B')

df2 = df.sort_values(by=['C', 'B'])
assert_frame_equal(df1, df2)

def test_frame_column_inplace_sort_exception(self):
s = self.frame['A']
with tm.assert_raises_regex(ValueError, "This Series is a view"):
Expand Down Expand Up @@ -321,7 +326,29 @@ def test_sort_nat_values_in_int_column(self):
assert_frame_equal(df_sorted, df_reversed)

df_sorted = df.sort_values(["datetime", "float"], na_position="last")
assert_frame_equal(df_sorted, df_reversed)
assert_frame_equal(df_sorted, df)

# Ascending should not affect the results.
df_sorted = df.sort_values(["datetime", "float"], ascending=False)
assert_frame_equal(df_sorted, df)

def test_sort_nat(self):

# GH 16836

d1 = [Timestamp(x) for x in ['2016-01-01', '2015-01-01',
np.nan, '2016-01-01']]
d2 = [Timestamp(x) for x in ['2017-01-01', '2014-01-01',
'2016-01-01', '2015-01-01']]
df = pd.DataFrame({'a': d1, 'b': d2}, index=[0, 1, 2, 3])

d3 = [Timestamp(x) for x in ['2015-01-01', '2016-01-01',
'2016-01-01', np.nan]]
d4 = [Timestamp(x) for x in ['2014-01-01', '2015-01-01',
'2017-01-01', '2016-01-01']]
expected = pd.DataFrame({'a': d3, 'b': d4}, index=[1, 3, 0, 2])
sorted_df = df.sort_values(by=['a', 'b'], )
tm.assert_frame_equal(sorted_df, expected)


class TestDataFrameSortIndexKinds(TestData):
Expand Down

0 comments on commit 87f69e5

Please sign in to comment.