Skip to content

Commit

Permalink
ENH: Add ignore_index for df.sort_values and series.sort_values (#30402)
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesdong1991 authored and jreback committed Dec 27, 2019
1 parent 980d0da commit f738581
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ Other enhancements
- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`)
- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)


Build Changes
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4731,6 +4731,7 @@ def sort_values(
inplace=False,
kind="quicksort",
na_position="last",
ignore_index=False,
):
inplace = validate_bool_kwarg(inplace, "inplace")
axis = self._get_axis_number(axis)
Expand Down Expand Up @@ -4764,6 +4765,9 @@ def sort_values(
indexer, axis=self._get_block_manager_axis(axis), verify=False
)

if ignore_index:
new_data.axes[1] = ibase.default_index(len(indexer))

if inplace:
return self._update_inplace(new_data)
else:
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4050,6 +4050,7 @@ def sort_values(
inplace: bool_t = False,
kind: str = "quicksort",
na_position: str = "last",
ignore_index: bool_t = False,
):
"""
Sort by the values along either axis.
Expand All @@ -4072,6 +4073,10 @@ def sort_values(
na_position : {'first', 'last'}, default 'last'
Puts NaNs at the beginning if `first`; `last` puts NaNs at the
end.
ignore_index : bool, default False
If True, the resulting axis will be labeled 0, 1, …, n - 1.
.. versionadded:: 1.0.0
Returns
-------
Expand Down
20 changes: 14 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2698,6 +2698,7 @@ def sort_values(
inplace=False,
kind="quicksort",
na_position="last",
ignore_index=False,
):
"""
Sort by the values.
Expand All @@ -2720,6 +2721,10 @@ def sort_values(
na_position : {'first' or 'last'}, default 'last'
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
the end.
ignore_index : bool, default False
If True, the resulting axis will be labeled 0, 1, …, n - 1.
.. versionadded:: 1.0.0
Returns
-------
Expand Down Expand Up @@ -2825,7 +2830,7 @@ def _try_kind_sort(arr):
return arr.argsort(kind="quicksort")

arr = self._values
sortedIdx = np.empty(len(self), dtype=np.int32)
sorted_index = np.empty(len(self), dtype=np.int32)

bad = isna(arr)

Expand All @@ -2849,16 +2854,19 @@ def _try_kind_sort(arr):

if na_position == "last":
n = good.sum()
sortedIdx[:n] = idx[good][argsorted]
sortedIdx[n:] = idx[bad]
sorted_index[:n] = idx[good][argsorted]
sorted_index[n:] = idx[bad]
elif na_position == "first":
n = bad.sum()
sortedIdx[n:] = idx[good][argsorted]
sortedIdx[:n] = idx[bad]
sorted_index[n:] = idx[good][argsorted]
sorted_index[:n] = idx[bad]
else:
raise ValueError(f"invalid na_position: {na_position}")

result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx])
result = self._constructor(arr[sorted_index], index=self.index[sorted_index])

if ignore_index:
result.index = ibase.default_index(len(sorted_index))

if inplace:
self._update_inplace(result)
Expand Down
42 changes: 42 additions & 0 deletions pandas/tests/frame/methods/test_sort_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,45 @@ def test_sort_values_na_position_with_categories_raises(self):

with pytest.raises(ValueError):
df.sort_values(by="c", ascending=False, na_position="bad_position")

@pytest.mark.parametrize(
"original_dict, sorted_dict, ignore_index, output_index",
[
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]),
({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]),
(
{"A": [1, 2, 3], "B": [2, 3, 4]},
{"A": [3, 2, 1], "B": [4, 3, 2]},
True,
[0, 1, 2],
),
(
{"A": [1, 2, 3], "B": [2, 3, 4]},
{"A": [3, 2, 1], "B": [4, 3, 2]},
False,
[2, 1, 0],
),
],
)
def test_sort_values_ignore_index(
self, original_dict, sorted_dict, ignore_index, output_index
):
# GH 30114
df = DataFrame(original_dict)
expected = DataFrame(sorted_dict, index=output_index)

# Test when inplace is False
sorted_df = df.sort_values("A", ascending=False, ignore_index=ignore_index)
tm.assert_frame_equal(sorted_df, expected)

tm.assert_frame_equal(df, DataFrame(original_dict))

# Test when inplace is True
copied_df = df.copy()

copied_df.sort_values(
"A", ascending=False, ignore_index=ignore_index, inplace=True
)
tm.assert_frame_equal(copied_df, expected)

tm.assert_frame_equal(df, DataFrame(original_dict))
27 changes: 27 additions & 0 deletions pandas/tests/series/methods/test_sort_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,30 @@ def test_sort_values_categorical(self):
result = df.sort_values(by=["grade", "id"])
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"original_list, sorted_list, ignore_index, output_index",
[
([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]),
([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]),
],
)
def test_sort_values_ignore_index(
self, original_list, sorted_list, ignore_index, output_index
):
# GH 30114
sr = Series(original_list)
expected = Series(sorted_list, index=output_index)

# Test when inplace is False
sorted_sr = sr.sort_values(ascending=False, ignore_index=ignore_index)
tm.assert_series_equal(sorted_sr, expected)

tm.assert_series_equal(sr, Series(original_list))

# Test when inplace is True
copied_sr = sr.copy()
copied_sr.sort_values(ascending=False, ignore_index=ignore_index, inplace=True)
tm.assert_series_equal(copied_sr, expected)

tm.assert_series_equal(sr, Series(original_list))

0 comments on commit f738581

Please sign in to comment.