From c538971d1709ac30e26c99942768e55483a604e2 Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Wed, 23 Nov 2022 11:45:09 +0100
Subject: [PATCH] refactor(python)!: rollup breaking changes

- Remove deprecated function arguments.
- Deprecate 'allow_streaming' in favor of 'streaming'
- Make 'collect', 'fetch', and 'show_graph'
kwargs only
---
 py-polars/polars/internals/dataframe/frame.py | 30 ++++------------
 .../polars/internals/dataframe/groupby.py     | 10 +++---
 py-polars/polars/internals/lazyframe/frame.py | 34 +++++++++----------
 py-polars/polars/internals/series/series.py   |  4 ---
 py-polars/tests/unit/test_df.py               |  4 ---
 py-polars/tests/unit/test_groupby.py          |  6 ++--
 py-polars/tests/unit/test_projections.py      |  2 +-
 7 files changed, 31 insertions(+), 59 deletions(-)

diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py
index eb548f4556d7..c86cd9304f77 100644
--- a/py-polars/polars/internals/dataframe/frame.py
+++ b/py-polars/polars/internals/dataframe/frame.py
@@ -65,7 +65,6 @@
     _prepare_row_count_args,
     _process_null_values,
     _timedelta_to_pl_duration,
-    deprecated_alias,
     format_path,
     handle_projection_columns,
     is_bool_sequence,
@@ -2465,7 +2464,7 @@ def filter(
         return (
             self.lazy()
             .filter(predicate)  # type: ignore[arg-type]
-            .collect(no_optimization=True, string_cache=False)
+            .collect(no_optimization=True)
         )
 
     def describe(self: DF) -> DF:
@@ -2668,11 +2667,7 @@ def sort(
 
         """
         if not isinstance(by, str) and isinstance(by, (Sequence, pli.Expr)):
-            df = (
-                self.lazy()
-                .sort(by, reverse, nulls_last)
-                .collect(no_optimization=True, string_cache=False)
-            )
+            df = self.lazy().sort(by, reverse, nulls_last).collect(no_optimization=True)
             return df
         return self._from_pydf(self._df.sort(by, reverse, nulls_last))
 
@@ -2782,7 +2777,6 @@ def slice(self: DF, offset: int, length: int | None = None) -> DF:
             length = self.height - offset + length
         return self._from_pydf(self._df.slice(offset, length))
 
-    @deprecated_alias(length="n")
     def limit(self: DF, n: int = 5) -> DF:
         """
         Get the first `n` rows.
@@ -2818,7 +2812,6 @@ def limit(self: DF, n: int = 5) -> DF:
         """
         return self.head(n)
 
-    @deprecated_alias(length="n")
     def head(self: DF, n: int = 5) -> DF:
         """
         Get the first `n` rows.
@@ -2854,7 +2847,6 @@ def head(self: DF, n: int = 5) -> DF:
         """
         return self._from_pydf(self._df.head(n))
 
-    @deprecated_alias(length="n")
     def tail(self: DF, n: int = 5) -> DF:
         """
         Get the last `n` rows.
@@ -4098,11 +4090,7 @@ def with_column(self, column: pli.Series | pli.Expr) -> DataFrame:
         └──────┴─────┘
 
         """
-        return (
-            self.lazy()
-            .with_column(column)
-            .collect(no_optimization=True, string_cache=False)
-        )
+        return self.lazy().with_column(column).collect(no_optimization=True)
 
     def hstack(
         self: DF,
@@ -4254,7 +4242,6 @@ def extend(self: DF, other: DF) -> DF:
         self._df.extend(other._df)
         return self
 
-    @deprecated_alias(name="columns")
     def drop(self: DF, columns: str | Sequence[str]) -> DF:
         """
         Remove column from DataFrame and return as new.
@@ -5234,7 +5221,7 @@ def shift_and_fill(self, periods: int, fill_value: int | str | float) -> DataFra
         return (
             self.lazy()
             .shift_and_fill(periods, fill_value)
-            .collect(no_optimization=True, string_cache=False)
+            .collect(no_optimization=True)
         )
 
     def is_duplicated(self) -> pli.Series:
@@ -5411,10 +5398,7 @@ def select(
 
         """
         return self._from_pydf(
-            self.lazy()
-            .select(exprs)
-            .collect(no_optimization=True, string_cache=False)
-            ._df
+            self.lazy().select(exprs).collect(no_optimization=True)._df
         )
 
     def with_columns(
@@ -5490,9 +5474,7 @@ def with_columns(
         if exprs is not None and not isinstance(exprs, Sequence):
             exprs = [exprs]
         return (
-            self.lazy()
-            .with_columns(exprs, **named_exprs)
-            .collect(no_optimization=True, string_cache=False)
+            self.lazy().with_columns(exprs, **named_exprs).collect(no_optimization=True)
         )
 
     @overload
diff --git a/py-polars/polars/internals/dataframe/groupby.py b/py-polars/polars/internals/dataframe/groupby.py
index 1d234a317de3..035864cfca59 100644
--- a/py-polars/polars/internals/dataframe/groupby.py
+++ b/py-polars/polars/internals/dataframe/groupby.py
@@ -298,7 +298,7 @@ def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
             .lazy()
             .groupby(self.by, maintain_order=self.maintain_order)
             .agg(aggs)
-            .collect(no_optimization=True, string_cache=False)
+            .collect(no_optimization=True)
         )
         return self._dataframe_class._from_pydf(df._df)
 
@@ -362,7 +362,7 @@ def head(self, n: int = 5) -> DF:
             .lazy()
             .groupby(self.by, self.maintain_order)
             .head(n)
-            .collect(no_optimization=True, string_cache=False)
+            .collect(no_optimization=True)
         )
         return self._dataframe_class._from_pydf(df._df)
 
@@ -426,7 +426,7 @@ def tail(self, n: int = 5) -> DF:
             .lazy()
             .groupby(self.by, self.maintain_order)
             .tail(n)
-            .collect(no_optimization=True, string_cache=False)
+            .collect(no_optimization=True)
         )
         return self._dataframe_class._from_pydf(df._df)
 
@@ -858,7 +858,7 @@ def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
                 self.time_column, self.period, self.offset, self.closed, self.by
             )
             .agg(aggs)
-            .collect(no_optimization=True, string_cache=False)
+            .collect(no_optimization=True)
         )
 
 
@@ -911,7 +911,7 @@ def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
                 self.by,
             )
             .agg(aggs)
-            .collect(no_optimization=True, string_cache=False)
+            .collect(no_optimization=True)
         )
 
 
diff --git a/py-polars/polars/internals/lazyframe/frame.py b/py-polars/polars/internals/lazyframe/frame.py
index ff24431c58f7..211683acbfe3 100644
--- a/py-polars/polars/internals/lazyframe/frame.py
+++ b/py-polars/polars/internals/lazyframe/frame.py
@@ -43,6 +43,7 @@
     _prepare_row_count_args,
     _process_null_values,
     _timedelta_to_pl_duration,
+    deprecated_alias,
     format_path,
 )
 
@@ -596,6 +597,7 @@ def describe_plan(self) -> str:
         """Create a string representation of the unoptimized query plan."""
         return self._ldf.describe_plan()
 
+    @deprecated_alias(streaming="allow_streaming")
     def describe_optimized_plan(
         self,
         type_coercion: bool = True,
@@ -604,7 +606,7 @@ def describe_optimized_plan(
         simplify_expression: bool = True,
         slice_pushdown: bool = True,
         common_subplan_elimination: bool = True,
-        allow_streaming: bool = False,
+        streaming: bool = False,
     ) -> str:
         """Create a string representation of the optimized query plan."""
         ldf = self._ldf.optimization_toggle(
@@ -614,14 +616,16 @@ def describe_optimized_plan(
             simplify_expression,
             slice_pushdown,
             common_subplan_elimination,
-            allow_streaming,
+            streaming,
         )
 
         return ldf.describe_optimized_plan()
 
+    @deprecated_alias(streaming="allow_streaming")
     def show_graph(
         self,
         optimized: bool = True,
+        *,
         show: bool = True,
         output_path: str | None = None,
         raw_output: bool = False,
@@ -632,7 +636,7 @@ def show_graph(
         simplify_expression: bool = True,
         slice_pushdown: bool = True,
         common_subplan_elimination: bool = True,
-        allow_streaming: bool = False,
+        streaming: bool = False,
     ) -> str | None:
         """
         Show a plot of the query plan. Note that you should have graphviz installed.
@@ -663,7 +667,7 @@ def show_graph(
             Will try to cache branching subplans that occur on self-joins or unions.
         common_subplan_elimination
             Will try to cache branching subplans that occur on self-joins or unions.
-        allow_streaming
+        streaming
             Run parts of the query in a streaming fashion (this is in an alpha state)
 
         """
@@ -674,7 +678,7 @@ def show_graph(
             simplify_expression,
             slice_pushdown,
             common_subplan_elimination,
-            allow_streaming,
+            streaming,
         )
 
         dot = _ldf.to_dot(optimized)
@@ -956,18 +960,18 @@ def profile(
 
         return df, timings
 
+    @deprecated_alias(allow_streaming="streaming")
     def collect(
         self,
+        *,
         type_coercion: bool = True,
         predicate_pushdown: bool = True,
         projection_pushdown: bool = True,
-        *,
         simplify_expression: bool = True,
-        string_cache: bool = False,
         no_optimization: bool = False,
         slice_pushdown: bool = True,
         common_subplan_elimination: bool = True,
-        allow_streaming: bool = False,
+        streaming: bool = False,
     ) -> pli.DataFrame:
         """
         Collect into a DataFrame.
@@ -985,16 +989,13 @@ def collect(
             Do projection pushdown optimization.
         simplify_expression
             Run simplify expressions optimization.
-        string_cache
-            This argument is deprecated. Please set the string cache globally.
-            The argument will be ignored
         no_optimization
             Turn off (certain) optimizations.
         slice_pushdown
             Slice pushdown optimization.
         common_subplan_elimination
             Will try to cache branching subplans that occur on self-joins or unions.
-        allow_streaming
+        streaming
             Run parts of the query in a streaming fashion (this is in an alpha state)
 
         Returns
@@ -1031,7 +1032,7 @@ def collect(
             slice_pushdown = False
             common_subplan_elimination = False
 
-        if allow_streaming:
+        if streaming:
             common_subplan_elimination = False
 
         ldf = self._ldf.optimization_toggle(
@@ -1041,18 +1042,18 @@ def collect(
             simplify_expression,
             slice_pushdown,
             common_subplan_elimination,
-            allow_streaming,
+            streaming,
         )
         return pli.wrap_df(ldf.collect())
 
     def fetch(
         self,
         n_rows: int = 500,
+        *,
         type_coercion: bool = True,
         predicate_pushdown: bool = True,
         projection_pushdown: bool = True,
         simplify_expression: bool = True,
-        string_cache: bool = False,
         no_optimization: bool = False,
         slice_pushdown: bool = True,
         common_subplan_elimination: bool = True,
@@ -1081,9 +1082,6 @@ def fetch(
             Run projection pushdown optimization.
         simplify_expression
             Run simplify expressions optimization.
-        string_cache
-            This argument is deprecated. Please set the string cache globally.
-            The argument will be ignored
         no_optimization
             Turn off optimizations.
         slice_pushdown
diff --git a/py-polars/polars/internals/series/series.py b/py-polars/polars/internals/series/series.py
index b7ce6d2d6dc5..7bdacf86ab1b 100644
--- a/py-polars/polars/internals/series/series.py
+++ b/py-polars/polars/internals/series/series.py
@@ -76,7 +76,6 @@
     _datetime_to_pl_timestamp,
     _time_to_pl_time,
     accessor,
-    deprecated_alias,
     is_bool_sequence,
     is_int_sequence,
     range_to_slice,
@@ -1607,7 +1606,6 @@ def cumprod(self, reverse: bool = False) -> Series:
 
         """
 
-    @deprecated_alias(num_elements="n")
     def limit(self, n: int = 10) -> Series:
         """
         Get the first `n` rows.
@@ -1746,7 +1744,6 @@ def filter(self, predicate: Series | list[bool]) -> Series:
             predicate = Series("", predicate)
         return wrap_s(self._s.filter(predicate._s))
 
-    @deprecated_alias(length="n")
     def head(self, n: int = 10) -> Series:
         """
         Get the first `n` rows.
@@ -1770,7 +1767,6 @@ def head(self, n: int = 10) -> Series:
         """
         return self.to_frame().select(pli.col(self.name).head(n)).to_series()
 
-    @deprecated_alias(length="n")
     def tail(self, n: int = 10) -> Series:
         """
         Get the last `n` rows.
diff --git a/py-polars/tests/unit/test_df.py b/py-polars/tests/unit/test_df.py
index 00d421ad5ee9..a16337ffafbc 100644
--- a/py-polars/tests/unit/test_df.py
+++ b/py-polars/tests/unit/test_df.py
@@ -679,10 +679,6 @@ def test_extend() -> None:
 
 def test_drop() -> None:
     df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"], "c": [1, 2, 3]})
-    with pytest.deprecated_call():
-        df = df.drop(name="a")  # type: ignore[call-arg]
-        assert df.shape == (3, 2)
-    df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"], "c": [1, 2, 3]})
     df = df.drop(columns="a")
     assert df.shape == (3, 2)
     df = pl.DataFrame({"a": [2, 1, 3], "b": ["a", "b", "c"], "c": [1, 2, 3]})
diff --git a/py-polars/tests/unit/test_groupby.py b/py-polars/tests/unit/test_groupby.py
index 36a69324ea88..ea4442d3f2d4 100644
--- a/py-polars/tests/unit/test_groupby.py
+++ b/py-polars/tests/unit/test_groupby.py
@@ -227,11 +227,11 @@ def test_streaming_non_streaming_gb() -> None:
     n = 100
     df = pl.DataFrame({"a": np.random.randint(0, 20, n)})
     q = df.lazy().groupby("a").agg(pl.count()).sort("a")
-    assert q.collect(allow_streaming=True).frame_equal(q.collect())
+    assert q.collect(streaming=True).frame_equal(q.collect())
 
     q = df.lazy().with_column(pl.col("a").cast(pl.Utf8))
     q = q.groupby("a").agg(pl.count()).sort("a")
-    assert q.collect(allow_streaming=True).frame_equal(q.collect())
+    assert q.collect(streaming=True).frame_equal(q.collect())
     q = df.lazy().with_column(pl.col("a").alias("b"))
     q = q.groupby(["a", "b"]).agg(pl.count()).sort("a")
-    assert q.collect(allow_streaming=True).frame_equal(q.collect())
+    assert q.collect(streaming=True).frame_equal(q.collect())
diff --git a/py-polars/tests/unit/test_projections.py b/py-polars/tests/unit/test_projections.py
index 79d990192930..1663141d5128 100644
--- a/py-polars/tests/unit/test_projections.py
+++ b/py-polars/tests/unit/test_projections.py
@@ -99,7 +99,7 @@ def test_unnest_columns_available() -> None:
 def test_streaming_duplicate_cols_5537() -> None:
     assert pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}).lazy().with_columns(
         [(pl.col("a") * 2).alias("foo"), (pl.col("a") * 3)]
-    ).collect(allow_streaming=True).to_dict(False) == {
+    ).collect(streaming=True).to_dict(False) == {
         "a": [3, 6, 9],
         "b": [1, 2, 3],
         "foo": [2, 4, 6],