Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python)!: Update set_sorted to only accept a single column #16800

Merged
merged 2 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10313,26 +10313,32 @@ def merge_sorted(self, other: DataFrame, key: str) -> DataFrame:

def set_sorted(
self,
column: str | Iterable[str],
*more_columns: str,
column: str,
*,
descending: bool = False,
) -> DataFrame:
"""
Indicate that one or multiple columns are sorted.

This can speed up future operations.

Parameters
----------
column
Columns that are sorted
more_columns
Additional columns that are sorted, specified as positional arguments.
Column that are sorted
descending
Whether the columns are sorted in descending order.

Warnings
--------
This can lead to incorrect results if the data is NOT sorted!!
Use with care!

"""
# NOTE: Only accepts 1 column on purpose! User think they are sorted by
# the combined multicolumn values.
return (
self.lazy()
.set_sorted(column, *more_columns, descending=descending)
.collect(_eager=True)
self.lazy().set_sorted(column, descending=descending).collect(_eager=True)
)

@unstable()
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9825,7 +9825,7 @@ def set_sorted(self, *, descending: bool = False) -> Self:

Warnings
--------
This can lead to incorrect results if this `Series` is not sorted!!
This can lead to incorrect results if the data is NOT sorted!!
Use with care!

Examples
Expand Down
25 changes: 16 additions & 9 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5905,27 +5905,34 @@ def merge_sorted(self, other: LazyFrame, key: str) -> Self:

def set_sorted(
self,
column: str | Iterable[str],
*more_columns: str,
column: str,
*,
descending: bool = False,
) -> Self:
"""
Indicate that one or multiple columns are sorted.

This can speed up future operations.

Parameters
----------
column
Columns that are sorted
more_columns
Additional columns that are sorted, specified as positional arguments.
descending
Whether the columns are sorted in descending order.
"""
columns = parse_as_list_of_expressions(column, *more_columns)

return self.with_columns(
wrap_expr(e).set_sorted(descending=descending) for e in columns
)
Warnings
--------
This can lead to incorrect results if the data is NOT sorted!!
Use with care!

"""
# NOTE: Only accepts 1 column on purpose! User think they are sorted by
# the combined multicolumn values.
if not isinstance(column, str):
msg = "expected a 'str' for argument 'column' in 'set_sorted'"
raise TypeError(msg)
return self.with_columns(F.col(column).set_sorted(descending=descending))

@unstable()
def update(
Expand Down
7 changes: 7 additions & 0 deletions py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,3 +658,10 @@ def test_raise_invalid_arithmetic() -> None:

with pytest.raises(pl.InvalidOperationError):
df.select(pl.col("a") - pl.col("a"))


def test_raise_on_sorted_multi_args() -> None:
with pytest.raises(TypeError):
pl.DataFrame({"a": [1], "b": [1]}).set_sorted(
["a", "b"] # type: ignore[arg-type]
)
Loading