Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Feb 15, 2023
1 parent b1da17e commit 50550f9
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 76 deletions.
48 changes: 20 additions & 28 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@
ParallelStrategy,
ParquetCompression,
PivotAgg,
PolarsExprType,
PythonLiteral,
RollingInterpolationMethod,
SizeUnit,
StartBy,
Expand Down Expand Up @@ -3059,20 +3057,23 @@ def replace_at_idx(self, index: int, series: pli.Series) -> Self:

def sort(
self,
by: str | pli.Expr | Sequence[str] | Sequence[pli.Expr],
*,
reverse: bool | list[bool] = False,
by: IntoExpr | Iterable[IntoExpr],
*more_by: IntoExpr,
reverse: bool | Sequence[bool] = False,
nulls_last: bool = False,
) -> Self:
"""
Sort the DataFrame by column.
Sort the dataframe by the given columns.
Parameters
----------
by
By which column to sort. Only accepts string.
Column(s) to sort by. Accepts expression input. Strings are parsed as column
names.
*more_by
Additional columns to sort by, specified as positional arguments.
reverse
Reverse/descending sort.
Sort in descending order.
nulls_last
Place null values last. Can only be used if sorted by a single column.
Expand Down Expand Up @@ -3116,14 +3117,12 @@ def sort(
└─────┴─────┴─────┘
"""
if not isinstance(by, str) and isinstance(by, (Sequence, pli.Expr)):
df = (
self.lazy()
.sort(by, reverse=reverse, nulls_last=nulls_last)
.collect(no_optimization=True)
)
return self._from_pydf(df._df)
return self._from_pydf(self._df.sort(by, reverse, nulls_last))
return self._from_pydf(
self.lazy()
.sort(by, *more_by, reverse=reverse, nulls_last=nulls_last)
.collect(no_optimization=True)
._df
)

def frame_equal(self, other: DataFrame, null_equal: bool = True) -> bool:
"""
Expand Down Expand Up @@ -5730,25 +5729,18 @@ def lazy(self) -> pli.LazyFrame:

def select(
self,
exprs: (
str
| PolarsExprType
| PythonLiteral
| pli.Series
| Iterable[str | PolarsExprType | PythonLiteral | pli.Series | None]
| None
) = None,
*more_exprs: str | PolarsExprType | PythonLiteral | pli.Series | None,
**named_exprs: str | PolarsExprType | PythonLiteral | pli.Series | None,
exprs: IntoExpr | Iterable[IntoExpr] | None = None,
*more_exprs: IntoExpr,
**named_exprs: IntoExpr,
) -> Self:
"""
Select columns from this DataFrame.
Parameters
----------
exprs
Column or columns to select. Accepts expression input. Strings are parsed
as column names, other non-expression inputs are parsed as literals.
Column(s) to select. Accepts expression input. Strings are parsed as column
names, other non-expression inputs are parsed as literals.
*more_exprs
Additional columns to select, specified as positional arguments.
**named_exprs
Expand Down
57 changes: 26 additions & 31 deletions py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,37 +814,30 @@ def inspect(s: pli.DataFrame) -> pli.DataFrame:

def sort(
self,
by: (
str
| pli.Expr
| Sequence[str]
| Sequence[pli.Expr]
| Sequence[str | pli.Expr]
),
*,
by: IntoExpr | Iterable[IntoExpr],
*more_by: IntoExpr,
reverse: bool | Sequence[bool] = False,
nulls_last: bool = False,
) -> Self:
"""
Sort the DataFrame.
Sorting can be done by:
- A single column name
- An expression
- Multiple expressions
Sort the dataframe by the given columns.
Parameters
----------
by
Column (expressions) to sort by.
Column(s) to sort by. Accepts expression input. Strings are parsed as column
names.
*more_by
Additional columns to sort by, specified as positional arguments.
reverse
Sort in descending order.
nulls_last
Place null values last. Can only be used if sorted by a single column.
Examples
--------
Sort by a single column.
>>> df = pl.DataFrame(
... {
... "foo": [1, 2, 3, None],
Expand Down Expand Up @@ -909,12 +902,21 @@ def sort(
└──────┴─────┴─────┘
"""
if type(by) is str:
# Fast path for sorting by a single existing column
if isinstance(by, str):
return self._from_pyldf(self._ldf.sort(by, reverse, nulls_last))
if type(reverse) is bool:
reverse = [reverse]

by = pli.selection_to_pyexpr_list(by)
by.extend(pli.selection_to_pyexpr_list(more_by))

# TODO: Do this check on the Rust side
if nulls_last and len(by) > 1:
raise ValueError(
"`nulls_last=True` only works when sorting by a single column"
)

if isinstance(reverse, bool):
reverse = [reverse]
return self._from_pyldf(self._ldf.sort_by_exprs(by, reverse, nulls_last))

def profile(
Expand Down Expand Up @@ -1558,25 +1560,18 @@ def filter(self, predicate: pli.Expr | str | pli.Series | list[bool]) -> Self:

def select(
self,
exprs: (
str
| PolarsExprType
| PythonLiteral
| pli.Series
| Iterable[str | PolarsExprType | PythonLiteral | pli.Series | None]
| None
) = None,
*more_exprs: str | PolarsExprType | PythonLiteral | pli.Series | None,
**named_exprs: str | PolarsExprType | PythonLiteral | pli.Series | None,
exprs: IntoExpr | Iterable[IntoExpr] | None = None,
*more_exprs: IntoExpr,
**named_exprs: IntoExpr,
) -> Self:
"""
Select columns from this DataFrame.
Parameters
----------
exprs
Column or columns to select. Accepts expression input. Strings are parsed
as column names, other non-expression inputs are parsed as literals.
Column(s) to select. Accepts expression input. Strings are parsed as column
names, other non-expression inputs are parsed as literals.
*more_exprs
Additional columns to select, specified as positional arguments.
**named_exprs
Expand Down
15 changes: 0 additions & 15 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -967,21 +967,6 @@ impl PyDataFrame {
Ok(PyDataFrame::new(df))
}

pub fn sort(&self, by_column: &str, reverse: bool, nulls_last: bool) -> PyResult<Self> {
let df = self
.df
.sort_with_options(
by_column,
SortOptions {
descending: reverse,
nulls_last,
multithreaded: true,
},
)
.map_err(PyPolarsErr::from)?;
Ok(PyDataFrame::new(df))
}

pub fn replace(&mut self, column: &str, new_col: PySeries) -> PyResult<()> {
self.df
.replace(column, new_col.series)
Expand Down
5 changes: 3 additions & 2 deletions py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,14 +417,15 @@ impl PyLazyFrame {

pub fn sort_by_exprs(
&self,
by_column: Vec<PyExpr>,
by: Vec<PyExpr>,
reverse: Vec<bool>,
nulls_last: bool,
) -> PyLazyFrame {
let ldf = self.ldf.clone();
let exprs = py_exprs_to_exprs(by_column);
let exprs = py_exprs_to_exprs(by);
ldf.sort_by_exprs(exprs, reverse, nulls_last).into()
}

pub fn cache(&self) -> PyLazyFrame {
let ldf = self.ldf.clone();
ldf.cache().into()
Expand Down

0 comments on commit 50550f9

Please sign in to comment.