From e9db32161c16ccc92b566a4fd333fe0e63c67024 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 16 Jan 2024 13:50:46 +0000 Subject: [PATCH] docs: fixup incorrect "coming from pandas" syntax --- docs/user-guide/migration/pandas.md | 42 ++++++++++------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/docs/user-guide/migration/pandas.md b/docs/user-guide/migration/pandas.md index 4c65f3023917..dc57354c43ab 100644 --- a/docs/user-guide/migration/pandas.md +++ b/docs/user-guide/migration/pandas.md @@ -252,8 +252,7 @@ and then joins the result back to the original `DataFrame` producing: In Polars the same can be achieved with `window` functions: ```python -df.select( - pl.all(), +df.with_columns( pl.col("type").count().over("c").alias("size") ) ``` @@ -266,17 +265,11 @@ shape: (7, 3) │ i64 ┆ str ┆ u32 │ ╞═════╪══════╪══════╡ │ 1 ┆ m ┆ 3 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ 1 ┆ n ┆ 3 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ 1 ┆ o ┆ 3 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ 2 ┆ m ┆ 4 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ 2 ┆ m ┆ 4 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ 2 ┆ n ┆ 4 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤ │ 2 ┆ n ┆ 4 │ └─────┴──────┴──────┘ ``` @@ -285,15 +278,14 @@ Because we can store the whole operation in a single expression, we can combine `window` functions and even combine different groups! Polars will cache window expressions that are applied over the same group, so storing -them in a single `select` is both convenient **and** optimal. In the following example +them in a single `with_columns` is both convenient **and** optimal. In the following example we look at a case where we are calculating group statistics over `"c"` twice: ```python -df.select( - pl.all(), +df.with_columns( pl.col("c").count().over("c").alias("size"), pl.col("c").sum().over("type").alias("sum"), - pl.col("c").reverse().over("c").flatten().alias("reverse_type") + pl.col("type").reverse().over("c").alias("reverse_type") ) ``` @@ -302,21 +294,15 @@ shape: (7, 5) ┌─────┬──────┬──────┬─────┬──────────────┐ │ c ┆ type ┆ size ┆ sum ┆ reverse_type │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -│ i64 ┆ str ┆ u32 ┆ i64 ┆ i64 │ +│ i64 ┆ str ┆ u32 ┆ i64 ┆ str │ ╞═════╪══════╪══════╪═════╪══════════════╡ -│ 1 ┆ m ┆ 3 ┆ 5 ┆ 2 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 1 ┆ n ┆ 3 ┆ 5 ┆ 2 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 1 ┆ o ┆ 3 ┆ 1 ┆ 2 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 2 ┆ m ┆ 4 ┆ 5 ┆ 2 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 2 ┆ m ┆ 4 ┆ 5 ┆ 1 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 2 ┆ n ┆ 4 ┆ 5 ┆ 1 │ -├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ -│ 2 ┆ n ┆ 4 ┆ 5 ┆ 1 │ +│ 1 ┆ m ┆ 3 ┆ 5 ┆ o │ +│ 1 ┆ n ┆ 3 ┆ 5 ┆ n │ +│ 1 ┆ o ┆ 3 ┆ 1 ┆ m │ +│ 2 ┆ m ┆ 4 ┆ 5 ┆ n │ +│ 2 ┆ m ┆ 4 ┆ 5 ┆ n │ +│ 2 ┆ n ┆ 4 ┆ 5 ┆ m │ +│ 2 ┆ n ┆ 4 ┆ 5 ┆ m │ └─────┴──────┴──────┴─────┴──────────────┘ ``` @@ -355,7 +341,7 @@ def add_ham(df: pd.DataFrame) -> pd.DataFrame: .pipe(add_foo) .pipe(add_bar) .pipe(add_ham) - ) +) ``` If we do this in polars, we would create 3 `with_column` contexts, that forces Polars to run the 3 pipes sequentially, @@ -407,7 +393,7 @@ def get_ham(input_column: str) -> pl.Expr: return pl.col(input_column).some_computation().alias("ham") # Use pipe (just once) to get hold of the schema of the LazyFrame. -lf.pipe(lambda lf.with_columns( +lf.pipe(lambda lf: lf.with_columns( get_ham("col_a"), get_bar("col_b", lf.schema), get_foo("col_c", lf.schema),