pola-rs · ritchie46 · Feb 17, 2023 · Feb 15, 2023
diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py
@@ -132,8 +132,6 @@
         ParallelStrategy,
         ParquetCompression,
         PivotAgg,
-        PolarsExprType,
-        PythonLiteral,
         RollingInterpolationMethod,
         SizeUnit,
         StartBy,
@@ -3060,71 +3058,99 @@ def replace_at_idx(self, index: int, series: pli.Series) -> Self:
 
     def sort(
         self,
-        by: str | pli.Expr | Sequence[str] | Sequence[pli.Expr],
-        *,
-        reverse: bool | list[bool] = False,
+        by: IntoExpr | Iterable[IntoExpr],
+        *more_by: IntoExpr,
+        reverse: bool | Sequence[bool] = False,
         nulls_last: bool = False,
     ) -> Self:
         """
-        Sort the DataFrame by column.
+        Sort the dataframe by the given columns.
 
         Parameters
         ----------
         by
-            By which column to sort. Only accepts string.
+            Column(s) to sort by. Accepts expression input. Strings are parsed as column
+            names.
+        *more_by
+            Additional columns to sort by, specified as positional arguments.
         reverse
-            Reverse/descending sort.
+            Sort in descending order. When sorting by multiple columns, can be specified
+            per column by passing a sequence of booleans.
         nulls_last
-            Place null values last. Can only be used if sorted by a single column.
+            Place null values last. Can only be used when sorting by a single column.
 
         Examples
         --------
+        Pass a single column name to sort by that column.
+
         >>> df = pl.DataFrame(
         ...     {
-        ...         "foo": [1, 2, 3],
-        ...         "bar": [6.0, 7.0, 8.0],
-        ...         "ham": ["a", "b", "c"],
+        ...         "a": [1, 2, None],
+        ...         "b": [6.0, 5.0, 4.0],
+        ...         "c": ["a", "c", "b"],
         ...     }
         ... )
-        >>> df.sort("foo", reverse=True)
+        >>> df.sort("a")
         shape: (3, 3)
-        ┌─────┬─────┬─────┐
-        │ foo ┆ bar ┆ ham │
-        │ --- ┆ --- ┆ --- │
-        │ i64 ┆ f64 ┆ str │
-        ╞═════╪═════╪═════╡
-        │ 3   ┆ 8.0 ┆ c   │
-        │ 2   ┆ 7.0 ┆ b   │
-        │ 1   ┆ 6.0 ┆ a   │
-        └─────┴─────┴─────┘
-
-        **Sort by multiple columns.**
-        For multiple columns we can also use expression syntax.
-
-        >>> df.sort(
-        ...     [pl.col("foo"), pl.col("bar") ** 2],
-        ...     reverse=[True, False],
-        ... )
+        ┌──────┬─────┬─────┐
+        │ a    ┆ b   ┆ c   │
+        │ ---  ┆ --- ┆ --- │
+        │ i64  ┆ f64 ┆ str │
+        ╞══════╪═════╪═════╡
+        │ null ┆ 4.0 ┆ b   │
+        │ 1    ┆ 6.0 ┆ a   │
+        │ 2    ┆ 5.0 ┆ c   │
+        └──────┴─────┴─────┘
+
+        Sorting by expressions is also supported.
+
+        >>> df.sort(pl.col("a") + pl.col("b") * 2, nulls_last=True)
         shape: (3, 3)
-        ┌─────┬─────┬─────┐
-        │ foo ┆ bar ┆ ham │
-        │ --- ┆ --- ┆ --- │
-        │ i64 ┆ f64 ┆ str │
-        ╞═════╪═════╪═════╡
-        │ 3   ┆ 8.0 ┆ c   │
-        │ 2   ┆ 7.0 ┆ b   │
-        │ 1   ┆ 6.0 ┆ a   │
-        └─────┴─────┴─────┘
+        ┌──────┬─────┬─────┐
+        │ a    ┆ b   ┆ c   │
+        │ ---  ┆ --- ┆ --- │
+        │ i64  ┆ f64 ┆ str │
+        ╞══════╪═════╪═════╡
+        │ 2    ┆ 5.0 ┆ c   │
+        │ 1    ┆ 6.0 ┆ a   │
+        │ null ┆ 4.0 ┆ b   │
+        └──────┴─────┴─────┘
+
+        Sort by multiple columns by passing a list of columns.
+
+        >>> df.sort(["c", "a"], reverse=True)
+        shape: (3, 3)
+        ┌──────┬─────┬─────┐
+        │ a    ┆ b   ┆ c   │
+        │ ---  ┆ --- ┆ --- │
+        │ i64  ┆ f64 ┆ str │
+        ╞══════╪═════╪═════╡
+        │ 2    ┆ 5.0 ┆ c   │
+        │ null ┆ 4.0 ┆ b   │
+        │ 1    ┆ 6.0 ┆ a   │
+        └──────┴─────┴─────┘
+
+        Or use positional arguments to sort by multiple columns in the same way.
+
+        >>> df.sort("c", "a", reverse=[False, True])
+        shape: (3, 3)
+        ┌──────┬─────┬─────┐
+        │ a    ┆ b   ┆ c   │
+        │ ---  ┆ --- ┆ --- │
+        │ i64  ┆ f64 ┆ str │
+        ╞══════╪═════╪═════╡
+        │ 1    ┆ 6.0 ┆ a   │
+        │ null ┆ 4.0 ┆ b   │
+        │ 2    ┆ 5.0 ┆ c   │
+        └──────┴─────┴─────┘
 
         """
-        if not isinstance(by, str) and isinstance(by, (Sequence, pli.Expr)):
-            df = (
-                self.lazy()
-                .sort(by, reverse=reverse, nulls_last=nulls_last)
-                .collect(no_optimization=True)
-            )
-            return self._from_pydf(df._df)
-        return self._from_pydf(self._df.sort(by, reverse, nulls_last))
+        return self._from_pydf(
+            self.lazy()
+            .sort(by, *more_by, reverse=reverse, nulls_last=nulls_last)
+            .collect(no_optimization=True)
+            ._df
+        )
 
     def frame_equal(self, other: DataFrame, null_equal: bool = True) -> bool:
         """
@@ -5731,25 +5757,18 @@ def lazy(self) -> pli.LazyFrame:
 
     def select(
         self,
-        exprs: (
-            str
-            | PolarsExprType
-            | PythonLiteral
-            | pli.Series
-            | Iterable[str | PolarsExprType | PythonLiteral | pli.Series | None]
-            | None
-        ) = None,
-        *more_exprs: str | PolarsExprType | PythonLiteral | pli.Series | None,
-        **named_exprs: str | PolarsExprType | PythonLiteral | pli.Series | None,
+        exprs: IntoExpr | Iterable[IntoExpr] | None = None,
+        *more_exprs: IntoExpr,
+        **named_exprs: IntoExpr,
     ) -> Self:
         """
         Select columns from this DataFrame.
 
         Parameters
         ----------
         exprs
-            Column or columns to select. Accepts expression input. Strings are parsed
-            as column names, other non-expression inputs are parsed as literals.
+            Column(s) to select. Accepts expression input. Strings are parsed as column
+            names, other non-expression inputs are parsed as literals.
         *more_exprs
             Additional columns to select, specified as positional arguments.
         **named_exprs