Skip to content

Commit

Permalink
feat(python): More ergnomic sort args
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Feb 16, 2023
1 parent 1f547e5 commit 5e57179
Show file tree
Hide file tree
Showing 9 changed files with 387 additions and 224 deletions.
137 changes: 78 additions & 59 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@
ParallelStrategy,
ParquetCompression,
PivotAgg,
PolarsExprType,
PythonLiteral,
RollingInterpolationMethod,
SizeUnit,
StartBy,
Expand Down Expand Up @@ -3060,71 +3058,99 @@ def replace_at_idx(self, index: int, series: pli.Series) -> Self:

def sort(
self,
by: str | pli.Expr | Sequence[str] | Sequence[pli.Expr],
*,
reverse: bool | list[bool] = False,
by: IntoExpr | Iterable[IntoExpr],
*more_by: IntoExpr,
reverse: bool | Sequence[bool] = False,
nulls_last: bool = False,
) -> Self:
"""
Sort the DataFrame by column.
Sort the dataframe by the given columns.
Parameters
----------
by
By which column to sort. Only accepts string.
Column(s) to sort by. Accepts expression input. Strings are parsed as column
names.
*more_by
Additional columns to sort by, specified as positional arguments.
reverse
Reverse/descending sort.
Sort in descending order. When sorting by multiple columns, can be specified
per column by passing a sequence of booleans.
nulls_last
Place null values last. Can only be used if sorted by a single column.
Place null values last. Can only be used when sorting by a single column.
Examples
--------
Pass a single column name to sort by that column.
>>> df = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
... "ham": ["a", "b", "c"],
... "a": [1, 2, None],
... "b": [6.0, 5.0, 4.0],
... "c": ["a", "c", "b"],
... }
... )
>>> df.sort("foo", reverse=True)
>>> df.sort("a")
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 3 ┆ 8.0 ┆ c │
│ 2 ┆ 7.0 ┆ b │
│ 1 ┆ 6.0 ┆ a │
└─────┴─────┴─────┘
**Sort by multiple columns.**
For multiple columns we can also use expression syntax.
>>> df.sort(
... [pl.col("foo"), pl.col("bar") ** 2],
... reverse=[True, False],
... )
┌──────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞══════╪═════╪═════╡
│ null ┆ 4.0 ┆ b │
│ 1 ┆ 6.0 ┆ a │
│ 2 ┆ 5.0 ┆ c │
└──────┴─────┴─────┘
Sorting by expressions is also supported.
>>> df.sort(pl.col("a") + pl.col("b") * 2, nulls_last=True)
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 3 ┆ 8.0 ┆ c │
│ 2 ┆ 7.0 ┆ b │
│ 1 ┆ 6.0 ┆ a │
└─────┴─────┴─────┘
┌──────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞══════╪═════╪═════╡
│ 2 ┆ 5.0 ┆ c │
│ 1 ┆ 6.0 ┆ a │
│ null ┆ 4.0 ┆ b │
└──────┴─────┴─────┘
Sort by multiple columns by passing a list of columns.
>>> df.sort(["c", "a"], reverse=True)
shape: (3, 3)
┌──────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞══════╪═════╪═════╡
│ 2 ┆ 5.0 ┆ c │
│ null ┆ 4.0 ┆ b │
│ 1 ┆ 6.0 ┆ a │
└──────┴─────┴─────┘
Or use positional arguments to sort by multiple columns in the same way.
>>> df.sort("c", "a", reverse=[False, True])
shape: (3, 3)
┌──────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞══════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
│ null ┆ 4.0 ┆ b │
│ 2 ┆ 5.0 ┆ c │
└──────┴─────┴─────┘
"""
if not isinstance(by, str) and isinstance(by, (Sequence, pli.Expr)):
df = (
self.lazy()
.sort(by, reverse=reverse, nulls_last=nulls_last)
.collect(no_optimization=True)
)
return self._from_pydf(df._df)
return self._from_pydf(self._df.sort(by, reverse, nulls_last))
return self._from_pydf(
self.lazy()
.sort(by, *more_by, reverse=reverse, nulls_last=nulls_last)
.collect(no_optimization=True)
._df
)

def frame_equal(self, other: DataFrame, null_equal: bool = True) -> bool:
"""
Expand Down Expand Up @@ -5731,25 +5757,18 @@ def lazy(self) -> pli.LazyFrame:

def select(
self,
exprs: (
str
| PolarsExprType
| PythonLiteral
| pli.Series
| Iterable[str | PolarsExprType | PythonLiteral | pli.Series | None]
| None
) = None,
*more_exprs: str | PolarsExprType | PythonLiteral | pli.Series | None,
**named_exprs: str | PolarsExprType | PythonLiteral | pli.Series | None,
exprs: IntoExpr | Iterable[IntoExpr] | None = None,
*more_exprs: IntoExpr,
**named_exprs: IntoExpr,
) -> Self:
"""
Select columns from this DataFrame.
Parameters
----------
exprs
Column or columns to select. Accepts expression input. Strings are parsed
as column names, other non-expression inputs are parsed as literals.
Column(s) to select. Accepts expression input. Strings are parsed as column
names, other non-expression inputs are parsed as literals.
*more_exprs
Additional columns to select, specified as positional arguments.
**named_exprs
Expand Down
Loading

0 comments on commit 5e57179

Please sign in to comment.