Skip to content

Commit

Permalink
refactor(python): Deprecate with_column
Browse files Browse the repository at this point in the history
As `with_columns` has a superset of functionality.

Remove test script

Synchronize input arg parsing between DataFrame and LazyFrame with_columns

Update py-polars/polars/internals/dataframe/frame.py

Co-authored-by: Stijn de Gooijer <stijn@degooijer.io>

Update py-polars/polars/internals/lazyframe/frame.py

Co-authored-by: Stijn de Gooijer <stijn@degooijer.io>

Update py-polars/polars/internals/dataframe/frame.py

Co-authored-by: Stijn de Gooijer <stijn@degooijer.io>

Update docstrings

Fix doctests

Revert Cargo.lock

Specify v0.17 in deprecation warnings

Fix faulty merge
  • Loading branch information
zundertj committed Jan 22, 2023
1 parent 2382a32 commit e58febb
Show file tree
Hide file tree
Showing 47 changed files with 356 additions and 313 deletions.
85 changes: 48 additions & 37 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3302,7 +3302,7 @@ def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
Examples
--------
>>> def cast_str_to_int(data, col_name):
... return data.with_column(pl.col(col_name).cast(pl.Int64))
... return data.with_columns(pl.col(col_name).cast(pl.Int64))
...
>>> df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["10", "20", "30", "40"]})
>>> df.pipe(cast_str_to_int, col_name="b")
Expand Down Expand Up @@ -3525,7 +3525,7 @@ def groupby_rolling(
... "2020-01-03 19:45:32",
... "2020-01-08 23:16:43",
... ]
>>> df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).with_column(
>>> df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).with_columns(
... pl.col("dt").str.strptime(pl.Datetime)
... )
>>> out = df.groupby_rolling(index_column="dt", period="2d").agg(
Expand Down Expand Up @@ -4308,44 +4308,23 @@ def with_column(self, column: pli.Series | pli.Expr) -> DataFrame:
Creating a new DataFrame using this method does not create a new copy of
existing data.
.. deprecated:: 0.15.14
`with_column` will be removed in favor of the more generic `with_columns`
in version 0.17.0.
Parameters
----------
column
Series, where the name of the Series refers to the column in the DataFrame.
Examples
--------
>>> df = pl.DataFrame(
... {
... "a": [1, 3, 5],
... "b": [2, 4, 6],
... }
... )
>>> df.with_column((pl.col("b") ** 2).alias("b_squared")) # added
shape: (3, 3)
┌─────┬─────┬───────────┐
│ a ┆ b ┆ b_squared │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ f64 │
╞═════╪═════╪═══════════╡
│ 1 ┆ 2 ┆ 4.0 │
│ 3 ┆ 4 ┆ 16.0 │
│ 5 ┆ 6 ┆ 36.0 │
└─────┴─────┴───────────┘
>>> df.with_column(pl.col("a") ** 2) # replaced
shape: (3, 2)
┌──────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ i64 │
╞══════╪═════╡
│ 1.0 ┆ 2 │
│ 9.0 ┆ 4 │
│ 25.0 ┆ 6 │
└──────┴─────┘
"""
return self.lazy().with_column(column).collect(no_optimization=True)
warnings.warn(
"`with_column` has been deprecated in favor of `with_columns`."
" This method will be removed in version 0.17.0",
category=DeprecationWarning,
stacklevel=2,
)
return self.lazy().with_columns(column).collect(no_optimization=True)

def hstack(
self: DF,
Expand Down Expand Up @@ -5166,7 +5145,7 @@ def unstack(

if how == "horizontal":
df = (
df.with_column( # type: ignore[assignment]
df.with_columns( # type: ignore[assignment]
(pli.arange(0, n_cols * n_rows, eager=True) % n_cols).alias(
"__sort_order"
),
Expand Down Expand Up @@ -5621,6 +5600,40 @@ def with_columns(
... "c": [True, True, False, True],
... }
... )
Passing in a single expression, adding the column as we give it a new name:
>>> df.with_columns((pl.col("a") ** 2).alias("a^2"))
shape: (4, 4)
┌─────┬──────┬───────┬──────┐
│ a ┆ b ┆ c ┆ a^2 │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ bool ┆ f64 │
╞═════╪══════╪═══════╪══════╡
│ 1 ┆ 0.5 ┆ true ┆ 1.0 │
│ 2 ┆ 4.0 ┆ true ┆ 4.0 │
│ 3 ┆ 10.0 ┆ false ┆ 9.0 │
│ 4 ┆ 13.0 ┆ true ┆ 16.0 │
└─────┴──────┴───────┴──────┘
We can also override a column, by giving the expression a name that already
exists:
>>> df.with_columns((pl.col("a") ** 2).alias("c"))
shape: (4, 3)
┌─────┬──────┬──────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ f64 │
╞═════╪══════╪══════╡
│ 1 ┆ 0.5 ┆ 1.0 │
│ 2 ┆ 4.0 ┆ 4.0 │
│ 3 ┆ 10.0 ┆ 9.0 │
│ 4 ┆ 13.0 ┆ 16.0 │
└─────┴──────┴──────┘
Passing in multiple expressions as a list:
>>> df.with_columns(
... [
... (pl.col("a") ** 2).alias("a^2"),
Expand Down Expand Up @@ -5661,8 +5674,6 @@ def with_columns(
└─────┴──────┴───────┴──────┴───────┘
"""
if exprs is not None and not isinstance(exprs, Sequence):
exprs = [exprs]
return (
self.lazy().with_columns(exprs, **named_exprs).collect(no_optimization=True)
)
Expand Down
32 changes: 16 additions & 16 deletions py-polars/polars/internals/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ def is_null(self) -> Expr:
... "b": [1.0, 2.0, float("nan"), 1.0, 5.0],
... }
... )
>>> df.with_column(pl.all().is_null().suffix("_isnull")) # nan != null
>>> df.with_columns(pl.all().is_null().suffix("_isnull")) # nan != null
shape: (5, 4)
┌──────┬─────┬──────────┬──────────┐
│ a ┆ b ┆ a_isnull ┆ b_isnull │
Expand Down Expand Up @@ -857,7 +857,7 @@ def is_not_null(self) -> Expr:
... "b": [1.0, 2.0, float("nan"), 1.0, 5.0],
... }
... )
>>> df.with_column(pl.all().is_not_null().suffix("_not_null")) # nan != null
>>> df.with_columns(pl.all().is_not_null().suffix("_not_null")) # nan != null
shape: (5, 4)
┌──────┬─────┬────────────┬────────────┐
│ a ┆ b ┆ a_not_null ┆ b_not_null │
Expand Down Expand Up @@ -953,7 +953,7 @@ def is_nan(self) -> Expr:
... "b": [1.0, 2.0, float("nan"), 1.0, 5.0],
... }
... )
>>> df.with_column(pl.col(pl.Float64).is_nan().suffix("_isnan"))
>>> df.with_columns(pl.col(pl.Float64).is_nan().suffix("_isnan"))
shape: (5, 3)
┌──────┬─────┬─────────┐
│ a ┆ b ┆ b_isnan │
Expand Down Expand Up @@ -987,7 +987,7 @@ def is_not_nan(self) -> Expr:
... "b": [1.0, 2.0, float("nan"), 1.0, 5.0],
... }
... )
>>> df.with_column(pl.col(pl.Float64).is_not_nan().suffix("_is_not_nan"))
>>> df.with_columns(pl.col(pl.Float64).is_not_nan().suffix("_is_not_nan"))
shape: (5, 3)
┌──────┬─────┬──────────────┐
│ a ┆ b ┆ b_is_not_nan │
Expand Down Expand Up @@ -2693,7 +2693,7 @@ def over(self, expr: str | Expr | list[Expr | str]) -> Expr:
... }
... )
>>> (
... df.with_column(
... df.with_columns(
... pl.col("values").max().over("groups").alias("max_by_group")
... )
... )
Expand Down Expand Up @@ -2781,7 +2781,7 @@ def is_first(self) -> Expr:
... "num": [1, 2, 3, 1, 5],
... }
... )
>>> (df.with_column(pl.col("num").is_first().alias("is_first")))
>>> (df.with_columns(pl.col("num").is_first().alias("is_first")))
shape: (5, 2)
┌─────┬──────────┐
│ num ┆ is_first │
Expand Down Expand Up @@ -3076,7 +3076,7 @@ def apply(
In a selection context, the function is applied by row.
>>> (
... df.with_column(
... df.with_columns(
... pl.col("a").apply(lambda x: x * 2).alias("a_times_2"),
... )
... )
Expand All @@ -3095,7 +3095,7 @@ def apply(
It is better to implement this with an expression:
>>> (
... df.with_column(
... df.with_columns(
... (pl.col("a") * 2).alias("a_times_2"),
... )
... ) # doctest: +IGNORE_RESULT
Expand Down Expand Up @@ -3438,7 +3438,7 @@ def is_between(
Examples
--------
>>> df = pl.DataFrame({"num": [1, 2, 3, 4, 5]})
>>> df.with_column(pl.col("num").is_between(2, 4))
>>> df.with_columns(pl.col("num").is_between(2, 4))
shape: (5, 2)
┌─────┬────────────┐
│ num ┆ is_between │
Expand All @@ -3454,7 +3454,7 @@ def is_between(
Use the ``closed`` argument to include or exclude the values at the bounds.
>>> df.with_column(pl.col("num").is_between(2, 4, closed="left"))
>>> df.with_columns(pl.col("num").is_between(2, 4, closed="left"))
shape: (5, 2)
┌─────┬────────────┐
│ num ┆ is_between │
Expand Down Expand Up @@ -3545,7 +3545,7 @@ def hash(
... "b": ["x", None, "z"],
... }
... )
>>> df.with_column(pl.all().hash(10, 20, 30, 40)) # doctest: +IGNORE_RESULT
>>> df.with_columns(pl.all().hash(10, 20, 30, 40)) # doctest: +IGNORE_RESULT
shape: (3, 2)
┌──────────────────────┬──────────────────────┐
│ a ┆ b │
Expand Down Expand Up @@ -3675,7 +3675,7 @@ def interpolate(self, method: InterpolationMethod = "linear") -> Expr:
>>> (
... df_new_grid.join(
... df_original_grid, on="grid_points", how="left"
... ).with_column(pl.col("values").interpolate())
... ).with_columns(pl.col("values").interpolate())
... )
shape: (10, 2)
┌─────────────┬────────┐
Expand Down Expand Up @@ -4740,7 +4740,7 @@ def pct_change(self, n: int = 1) -> Expr:
... "a": [10, 11, 12, None, 12],
... }
... )
>>> df.with_column(pl.col("a").pct_change().alias("pct_change"))
>>> df.with_columns(pl.col("a").pct_change().alias("pct_change"))
shape: (5, 2)
┌──────┬────────────┐
│ a ┆ pct_change │
Expand Down Expand Up @@ -4866,7 +4866,7 @@ def clip(self, min_val: int | float, max_val: int | float) -> Expr:
Examples
--------
>>> df = pl.DataFrame({"foo": [-50, 5, None, 50]})
>>> df.with_column(pl.col("foo").clip(1, 10).alias("foo_clipped"))
>>> df.with_columns(pl.col("foo").clip(1, 10).alias("foo_clipped"))
shape: (4, 2)
┌──────┬─────────────┐
│ foo ┆ foo_clipped │
Expand Down Expand Up @@ -4899,7 +4899,7 @@ def clip_min(self, min_val: int | float) -> Expr:
Examples
--------
>>> df = pl.DataFrame({"foo": [-50, 5, None, 50]})
>>> df.with_column(pl.col("foo").clip_min(0).alias("foo_clipped"))
>>> df.with_columns(pl.col("foo").clip_min(0).alias("foo_clipped"))
shape: (4, 2)
┌──────┬─────────────┐
│ foo ┆ foo_clipped │
Expand Down Expand Up @@ -4932,7 +4932,7 @@ def clip_max(self, max_val: int | float) -> Expr:
Examples
--------
>>> df = pl.DataFrame({"foo": [-50, 5, None, 50]})
>>> df.with_column(pl.col("foo").clip_max(0).alias("foo_clipped"))
>>> df.with_columns(pl.col("foo").clip_max(0).alias("foo_clipped"))
shape: (4, 2)
┌──────┬─────────────┐
│ foo ┆ foo_clipped │
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/internals/expr/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ def eval(self, expr: pli.Expr, parallel: bool = False) -> pli.Expr:
Examples
--------
>>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
>>> df.with_column(
>>> df.with_columns(
... pl.concat_list(["a", "b"]).arr.eval(pl.element().rank()).alias("rank")
... )
shape: (3, 3)
Expand Down
16 changes: 8 additions & 8 deletions py-polars/polars/internals/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def strptime(
... ],
... )
>>> (
... s.to_frame().with_column(
... s.to_frame().with_columns(
... pl.col("date")
... .str.strptime(pl.Date, "%F", strict=False)
... .fill_null(
Expand Down Expand Up @@ -401,7 +401,7 @@ def zfill(self, alignment: int) -> pli.Expr:
... "num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],
... }
... )
>>> df.with_column(pl.col("num").cast(str).str.zfill(5))
>>> df.with_columns(pl.col("num").cast(str).str.zfill(5))
shape: (11, 1)
┌─────────┐
│ num │
Expand Down Expand Up @@ -543,7 +543,7 @@ def ends_with(self, sub: str) -> pli.Expr:
Examples
--------
>>> df = pl.DataFrame({"fruits": ["apple", "mango", None]})
>>> df.with_column(
>>> df.with_columns(
... pl.col("fruits").str.ends_with("go").alias("has_suffix"),
... )
shape: (3, 2)
Expand Down Expand Up @@ -589,7 +589,7 @@ def starts_with(self, sub: str | pli.Expr) -> pli.Expr:
Examples
--------
>>> df = pl.DataFrame({"fruits": ["apple", "mango", None]})
>>> df.with_column(
>>> df.with_columns(
... pl.col("fruits").str.starts_with("app").alias("has_prefix"),
... )
shape: (3, 2)
Expand Down Expand Up @@ -1034,7 +1034,7 @@ def replace(
Examples
--------
>>> df = pl.DataFrame({"id": [1, 2], "text": ["123abc", "abc456"]})
>>> df.with_column(
>>> df.with_columns(
... pl.col("text").str.replace(r"abc\b", "ABC")
... ) # doctest: +IGNORE_RESULT
shape: (2, 2)
Expand Down Expand Up @@ -1076,7 +1076,7 @@ def replace_all(
Examples
--------
>>> df = pl.DataFrame({"id": [1, 2], "text": ["abcabc", "123a123"]})
>>> df.with_column(pl.col("text").str.replace_all("a", "-"))
>>> df.with_columns(pl.col("text").str.replace_all("a", "-"))
shape: (2, 2)
┌─────┬─────────┐
│ id ┆ text │
Expand Down Expand Up @@ -1115,7 +1115,7 @@ def slice(self, offset: int, length: int | None = None) -> pli.Expr:
Examples
--------
>>> df = pl.DataFrame({"s": ["pear", None, "papaya", "dragonfruit"]})
>>> df.with_column(
>>> df.with_columns(
... pl.col("s").str.slice(-3).alias("s_sliced"),
... )
shape: (4, 2)
Expand All @@ -1132,7 +1132,7 @@ def slice(self, offset: int, length: int | None = None) -> pli.Expr:
Using the optional `length` parameter
>>> df.with_column(
>>> df.with_columns(
... pl.col("s").str.slice(4, length=3).alias("s_sliced"),
... )
shape: (4, 2)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/internals/expr/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def rename_fields(self, names: list[str]) -> pli.Expr:
... .to_struct("my_struct")
... .to_frame()
... )
>>> df = df.with_column(
>>> df = df.with_columns(
... pl.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
... )
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,17 +551,17 @@ def cut(
if labels:
if len(labels) != len(bins) + 1:
raise ValueError("expected more labels")
cuts_df = cuts_df.with_column(pli.Series(name=category_label, values=labels))
cuts_df = cuts_df.with_columns(pli.Series(name=category_label, values=labels))
else:
cuts_df = cuts_df.with_column(
cuts_df = cuts_df.with_columns(
pli.format(
"({}, {}]",
pli.col(break_point_label).shift_and_fill(1, float("-inf")),
pli.col(break_point_label),
).alias(category_label)
)

cuts_df = cuts_df.with_column(pli.col(category_label).cast(Categorical))
cuts_df = cuts_df.with_columns(pli.col(category_label).cast(Categorical))

result = (
s.cast(Float64)
Expand Down
Loading

0 comments on commit e58febb

Please sign in to comment.