refactor(python): Deprecate with_column

As `with_columns` has a superset of functionality. Remove test script Synchronize input arg parsing between DataFrame and LazyFrame with_columns Update py-polars/polars/internals/dataframe/frame.py Co-authored-by: Stijn de Gooijer <stijn@degooijer.io> Update py-polars/polars/internals/lazyframe/frame.py Co-authored-by: Stijn de Gooijer <stijn@degooijer.io> Update py-polars/polars/internals/dataframe/frame.py Co-authored-by: Stijn de Gooijer <stijn@degooijer.io> Update docstrings Fix doctests Revert Cargo.lock Specify v0.17 in deprecation warnings Fix faulty merge
pola-rs · Jan 22, 2023 · e58febb · e58febb
1 parent 2382a32
commit e58febb
Show file tree

Hide file tree

Showing 47 changed files with 356 additions and 313 deletions.
diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py
@@ -3302,7 +3302,7 @@ def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
         Examples
         --------
         >>> def cast_str_to_int(data, col_name):
-        ...     return data.with_column(pl.col(col_name).cast(pl.Int64))
+        ...     return data.with_columns(pl.col(col_name).cast(pl.Int64))
         ...
         >>> df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["10", "20", "30", "40"]})
         >>> df.pipe(cast_str_to_int, col_name="b")
@@ -3525,7 +3525,7 @@ def groupby_rolling(
         ...     "2020-01-03 19:45:32",
         ...     "2020-01-08 23:16:43",
         ... ]
-        >>> df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).with_column(
+        >>> df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).with_columns(
         ...     pl.col("dt").str.strptime(pl.Datetime)
         ... )
         >>> out = df.groupby_rolling(index_column="dt", period="2d").agg(
@@ -4308,44 +4308,23 @@ def with_column(self, column: pli.Series | pli.Expr) -> DataFrame:
         Creating a new DataFrame using this method does not create a new copy of
         existing data.
 
+        .. deprecated:: 0.15.14
+            `with_column` will be removed in favor of the more generic `with_columns`
+            in version 0.17.0.
+
         Parameters
         ----------
         column
             Series, where the name of the Series refers to the column in the DataFrame.
 
-        Examples
-        --------
-        >>> df = pl.DataFrame(
-        ...     {
-        ...         "a": [1, 3, 5],
-        ...         "b": [2, 4, 6],
-        ...     }
-        ... )
-        >>> df.with_column((pl.col("b") ** 2).alias("b_squared"))  # added
-        shape: (3, 3)
-        ┌─────┬─────┬───────────┐
-        │ a   ┆ b   ┆ b_squared │
-        │ --- ┆ --- ┆ ---       │
-        │ i64 ┆ i64 ┆ f64       │
-        ╞═════╪═════╪═══════════╡
-        │ 1   ┆ 2   ┆ 4.0       │
-        │ 3   ┆ 4   ┆ 16.0      │
-        │ 5   ┆ 6   ┆ 36.0      │
-        └─────┴─────┴───────────┘
-        >>> df.with_column(pl.col("a") ** 2)  # replaced
-        shape: (3, 2)
-        ┌──────┬─────┐
-        │ a    ┆ b   │
-        │ ---  ┆ --- │
-        │ f64  ┆ i64 │
-        ╞══════╪═════╡
-        │ 1.0  ┆ 2   │
-        │ 9.0  ┆ 4   │
-        │ 25.0 ┆ 6   │
-        └──────┴─────┘
-
         """
-        return self.lazy().with_column(column).collect(no_optimization=True)
+        warnings.warn(
+            "`with_column` has been deprecated in favor of `with_columns`."
+            " This method will be removed in version 0.17.0",
+            category=DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.lazy().with_columns(column).collect(no_optimization=True)
 
     def hstack(
         self: DF,
@@ -5166,7 +5145,7 @@ def unstack(
 
         if how == "horizontal":
             df = (
-                df.with_column(  # type: ignore[assignment]
+                df.with_columns(  # type: ignore[assignment]
                     (pli.arange(0, n_cols * n_rows, eager=True) % n_cols).alias(
                         "__sort_order"
                     ),
@@ -5621,6 +5600,40 @@ def with_columns(
         ...         "c": [True, True, False, True],
         ...     }
         ... )
+
+        Passing in a single expression, adding the column as we give it a new name:
+
+        >>> df.with_columns((pl.col("a") ** 2).alias("a^2"))
+        shape: (4, 4)
+        ┌─────┬──────┬───────┬──────┐
+        │ a   ┆ b    ┆ c     ┆ a^2  │
+        │ --- ┆ ---  ┆ ---   ┆ ---  │
+        │ i64 ┆ f64  ┆ bool  ┆ f64  │
+        ╞═════╪══════╪═══════╪══════╡
+        │ 1   ┆ 0.5  ┆ true  ┆ 1.0  │
+        │ 2   ┆ 4.0  ┆ true  ┆ 4.0  │
+        │ 3   ┆ 10.0 ┆ false ┆ 9.0  │
+        │ 4   ┆ 13.0 ┆ true  ┆ 16.0 │
+        └─────┴──────┴───────┴──────┘
+
+        We can also override a column, by giving the expression a name that already
+        exists:
+
+        >>> df.with_columns((pl.col("a") ** 2).alias("c"))
+        shape: (4, 3)
+        ┌─────┬──────┬──────┐
+        │ a   ┆ b    ┆ c    │
+        │ --- ┆ ---  ┆ ---  │
+        │ i64 ┆ f64  ┆ f64  │
+        ╞═════╪══════╪══════╡
+        │ 1   ┆ 0.5  ┆ 1.0  │
+        │ 2   ┆ 4.0  ┆ 4.0  │
+        │ 3   ┆ 10.0 ┆ 9.0  │
+        │ 4   ┆ 13.0 ┆ 16.0 │
+        └─────┴──────┴──────┘
+
+        Passing in multiple expressions as a list:
+
         >>> df.with_columns(
         ...     [
         ...         (pl.col("a") ** 2).alias("a^2"),
@@ -5661,8 +5674,6 @@ def with_columns(
         └─────┴──────┴───────┴──────┴───────┘
 
         """
-        if exprs is not None and not isinstance(exprs, Sequence):
-            exprs = [exprs]
         return (
             self.lazy().with_columns(exprs, **named_exprs).collect(no_optimization=True)
         )

diff --git a/py-polars/polars/internals/expr/expr.py b/py-polars/polars/internals/expr/expr.py
@@ -828,7 +828,7 @@ def is_null(self) -> Expr:
         ...         "b": [1.0, 2.0, float("nan"), 1.0, 5.0],
         ...     }
         ... )
-        >>> df.with_column(pl.all().is_null().suffix("_isnull"))  # nan != null
+        >>> df.with_columns(pl.all().is_null().suffix("_isnull"))  # nan != null
         shape: (5, 4)
         ┌──────┬─────┬──────────┬──────────┐
         │ a    ┆ b   ┆ a_isnull ┆ b_isnull │
@@ -857,7 +857,7 @@ def is_not_null(self) -> Expr:
         ...         "b": [1.0, 2.0, float("nan"), 1.0, 5.0],
         ...     }
         ... )
-        >>> df.with_column(pl.all().is_not_null().suffix("_not_null"))  # nan != null
+        >>> df.with_columns(pl.all().is_not_null().suffix("_not_null"))  # nan != null
         shape: (5, 4)
         ┌──────┬─────┬────────────┬────────────┐
         │ a    ┆ b   ┆ a_not_null ┆ b_not_null │
@@ -953,7 +953,7 @@ def is_nan(self) -> Expr:
         ...         "b": [1.0, 2.0, float("nan"), 1.0, 5.0],
         ...     }
         ... )
-        >>> df.with_column(pl.col(pl.Float64).is_nan().suffix("_isnan"))
+        >>> df.with_columns(pl.col(pl.Float64).is_nan().suffix("_isnan"))
         shape: (5, 3)
         ┌──────┬─────┬─────────┐
         │ a    ┆ b   ┆ b_isnan │
@@ -987,7 +987,7 @@ def is_not_nan(self) -> Expr:
         ...         "b": [1.0, 2.0, float("nan"), 1.0, 5.0],
         ...     }
         ... )
-        >>> df.with_column(pl.col(pl.Float64).is_not_nan().suffix("_is_not_nan"))
+        >>> df.with_columns(pl.col(pl.Float64).is_not_nan().suffix("_is_not_nan"))
         shape: (5, 3)
         ┌──────┬─────┬──────────────┐
         │ a    ┆ b   ┆ b_is_not_nan │
@@ -2693,7 +2693,7 @@ def over(self, expr: str | Expr | list[Expr | str]) -> Expr:
         ...     }
         ... )
         >>> (
-        ...     df.with_column(
+        ...     df.with_columns(
         ...         pl.col("values").max().over("groups").alias("max_by_group")
         ...     )
         ... )
@@ -2781,7 +2781,7 @@ def is_first(self) -> Expr:
         ...         "num": [1, 2, 3, 1, 5],
         ...     }
         ... )
-        >>> (df.with_column(pl.col("num").is_first().alias("is_first")))
+        >>> (df.with_columns(pl.col("num").is_first().alias("is_first")))
         shape: (5, 2)
         ┌─────┬──────────┐
         │ num ┆ is_first │
@@ -3076,7 +3076,7 @@ def apply(
         In a selection context, the function is applied by row.
 
         >>> (
-        ...     df.with_column(
+        ...     df.with_columns(
         ...         pl.col("a").apply(lambda x: x * 2).alias("a_times_2"),
         ...     )
         ... )
@@ -3095,7 +3095,7 @@ def apply(
         It is better to implement this with an expression:
 
         >>> (
-        ...     df.with_column(
+        ...     df.with_columns(
         ...         (pl.col("a") * 2).alias("a_times_2"),
         ...     )
         ... )  # doctest: +IGNORE_RESULT
@@ -3438,7 +3438,7 @@ def is_between(
         Examples
         --------
         >>> df = pl.DataFrame({"num": [1, 2, 3, 4, 5]})
-        >>> df.with_column(pl.col("num").is_between(2, 4))
+        >>> df.with_columns(pl.col("num").is_between(2, 4))
         shape: (5, 2)
         ┌─────┬────────────┐
         │ num ┆ is_between │
@@ -3454,7 +3454,7 @@ def is_between(
 
         Use the ``closed`` argument to include or exclude the values at the bounds.
 
-        >>> df.with_column(pl.col("num").is_between(2, 4, closed="left"))
+        >>> df.with_columns(pl.col("num").is_between(2, 4, closed="left"))
         shape: (5, 2)
         ┌─────┬────────────┐
         │ num ┆ is_between │
@@ -3545,7 +3545,7 @@ def hash(
         ...         "b": ["x", None, "z"],
         ...     }
         ... )
-        >>> df.with_column(pl.all().hash(10, 20, 30, 40))  # doctest: +IGNORE_RESULT
+        >>> df.with_columns(pl.all().hash(10, 20, 30, 40))  # doctest: +IGNORE_RESULT
         shape: (3, 2)
         ┌──────────────────────┬──────────────────────┐
         │ a                    ┆ b                    │
@@ -3675,7 +3675,7 @@ def interpolate(self, method: InterpolationMethod = "linear") -> Expr:
         >>> (
         ...     df_new_grid.join(
         ...         df_original_grid, on="grid_points", how="left"
-        ...     ).with_column(pl.col("values").interpolate())
+        ...     ).with_columns(pl.col("values").interpolate())
         ... )
         shape: (10, 2)
         ┌─────────────┬────────┐
@@ -4740,7 +4740,7 @@ def pct_change(self, n: int = 1) -> Expr:
         ...         "a": [10, 11, 12, None, 12],
         ...     }
         ... )
-        >>> df.with_column(pl.col("a").pct_change().alias("pct_change"))
+        >>> df.with_columns(pl.col("a").pct_change().alias("pct_change"))
         shape: (5, 2)
         ┌──────┬────────────┐
         │ a    ┆ pct_change │
@@ -4866,7 +4866,7 @@ def clip(self, min_val: int | float, max_val: int | float) -> Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"foo": [-50, 5, None, 50]})
-        >>> df.with_column(pl.col("foo").clip(1, 10).alias("foo_clipped"))
+        >>> df.with_columns(pl.col("foo").clip(1, 10).alias("foo_clipped"))
         shape: (4, 2)
         ┌──────┬─────────────┐
         │ foo  ┆ foo_clipped │
@@ -4899,7 +4899,7 @@ def clip_min(self, min_val: int | float) -> Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"foo": [-50, 5, None, 50]})
-        >>> df.with_column(pl.col("foo").clip_min(0).alias("foo_clipped"))
+        >>> df.with_columns(pl.col("foo").clip_min(0).alias("foo_clipped"))
         shape: (4, 2)
         ┌──────┬─────────────┐
         │ foo  ┆ foo_clipped │
@@ -4932,7 +4932,7 @@ def clip_max(self, max_val: int | float) -> Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"foo": [-50, 5, None, 50]})
-        >>> df.with_column(pl.col("foo").clip_max(0).alias("foo_clipped"))
+        >>> df.with_columns(pl.col("foo").clip_max(0).alias("foo_clipped"))
         shape: (4, 2)
         ┌──────┬─────────────┐
         │ foo  ┆ foo_clipped │

diff --git a/py-polars/polars/internals/expr/list.py b/py-polars/polars/internals/expr/list.py
@@ -699,7 +699,7 @@ def eval(self, expr: pli.Expr, parallel: bool = False) -> pli.Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
-        >>> df.with_column(
+        >>> df.with_columns(
         ...     pl.concat_list(["a", "b"]).arr.eval(pl.element().rank()).alias("rank")
         ... )
         shape: (3, 3)

diff --git a/py-polars/polars/internals/expr/string.py b/py-polars/polars/internals/expr/string.py
@@ -76,7 +76,7 @@ def strptime(
         ...     ],
         ... )
         >>> (
-        ...     s.to_frame().with_column(
+        ...     s.to_frame().with_columns(
         ...         pl.col("date")
         ...         .str.strptime(pl.Date, "%F", strict=False)
         ...         .fill_null(
@@ -401,7 +401,7 @@ def zfill(self, alignment: int) -> pli.Expr:
         ...         "num": [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, None],
         ...     }
         ... )
-        >>> df.with_column(pl.col("num").cast(str).str.zfill(5))
+        >>> df.with_columns(pl.col("num").cast(str).str.zfill(5))
         shape: (11, 1)
         ┌─────────┐
         │ num     │
@@ -543,7 +543,7 @@ def ends_with(self, sub: str) -> pli.Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"fruits": ["apple", "mango", None]})
-        >>> df.with_column(
+        >>> df.with_columns(
         ...     pl.col("fruits").str.ends_with("go").alias("has_suffix"),
         ... )
         shape: (3, 2)
@@ -589,7 +589,7 @@ def starts_with(self, sub: str | pli.Expr) -> pli.Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"fruits": ["apple", "mango", None]})
-        >>> df.with_column(
+        >>> df.with_columns(
         ...     pl.col("fruits").str.starts_with("app").alias("has_prefix"),
         ... )
         shape: (3, 2)
@@ -1034,7 +1034,7 @@ def replace(
         Examples
         --------
         >>> df = pl.DataFrame({"id": [1, 2], "text": ["123abc", "abc456"]})
-        >>> df.with_column(
+        >>> df.with_columns(
         ...     pl.col("text").str.replace(r"abc\b", "ABC")
         ... )  # doctest: +IGNORE_RESULT
         shape: (2, 2)
@@ -1076,7 +1076,7 @@ def replace_all(
         Examples
         --------
         >>> df = pl.DataFrame({"id": [1, 2], "text": ["abcabc", "123a123"]})
-        >>> df.with_column(pl.col("text").str.replace_all("a", "-"))
+        >>> df.with_columns(pl.col("text").str.replace_all("a", "-"))
         shape: (2, 2)
         ┌─────┬─────────┐
         │ id  ┆ text    │
@@ -1115,7 +1115,7 @@ def slice(self, offset: int, length: int | None = None) -> pli.Expr:
         Examples
         --------
         >>> df = pl.DataFrame({"s": ["pear", None, "papaya", "dragonfruit"]})
-        >>> df.with_column(
+        >>> df.with_columns(
         ...     pl.col("s").str.slice(-3).alias("s_sliced"),
         ... )
         shape: (4, 2)
@@ -1132,7 +1132,7 @@ def slice(self, offset: int, length: int | None = None) -> pli.Expr:
 
         Using the optional `length` parameter
 
-        >>> df.with_column(
+        >>> df.with_columns(
         ...     pl.col("s").str.slice(4, length=3).alias("s_sliced"),
         ... )
         shape: (4, 2)

diff --git a/py-polars/polars/internals/expr/struct.py b/py-polars/polars/internals/expr/struct.py
@@ -79,7 +79,7 @@ def rename_fields(self, names: list[str]) -> pli.Expr:
         ...     .to_struct("my_struct")
         ...     .to_frame()
         ... )
-        >>> df = df.with_column(
+        >>> df = df.with_columns(
         ...     pl.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
         ... )
 

diff --git a/py-polars/polars/internals/functions.py b/py-polars/polars/internals/functions.py
@@ -551,17 +551,17 @@ def cut(
     if labels:
         if len(labels) != len(bins) + 1:
             raise ValueError("expected more labels")
-        cuts_df = cuts_df.with_column(pli.Series(name=category_label, values=labels))
+        cuts_df = cuts_df.with_columns(pli.Series(name=category_label, values=labels))
     else:
-        cuts_df = cuts_df.with_column(
+        cuts_df = cuts_df.with_columns(
             pli.format(
                 "({}, {}]",
                 pli.col(break_point_label).shift_and_fill(1, float("-inf")),
                 pli.col(break_point_label),
             ).alias(category_label)
         )
 
-    cuts_df = cuts_df.with_column(pli.col(category_label).cast(Categorical))
+    cuts_df = cuts_df.with_columns(pli.col(category_label).cast(Categorical))
 
     result = (
         s.cast(Float64)