From e921030c0201d441453ea0fd7953386725a36cfa Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Mon, 10 Jun 2024 22:03:18 +0200
Subject: [PATCH] depr(python): Deprecate `LazyFrame.with_context`

---
 py-polars/polars/lazyframe/frame.py           |  10 +-
 .../tests/unit/lazyframe/test_with_context.py | 112 +++++++++++++++++-
 .../tests/unit/streaming/test_streaming.py    |  10 --
 py-polars/tests/unit/test_cse.py              |  43 -------
 py-polars/tests/unit/test_predicates.py       |  24 ----
 py-polars/tests/unit/test_schema.py           |  12 --
 6 files changed, 117 insertions(+), 94 deletions(-)

diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
index 49be56ea4423..9818301c0ee0 100644
--- a/py-polars/polars/lazyframe/frame.py
+++ b/py-polars/polars/lazyframe/frame.py
@@ -4221,10 +4221,16 @@ def with_columns_seq(
         )
         return self._from_pyldf(self._ldf.with_columns_seq(pyexprs))
 
+    @deprecate_function(
+        "Use `pl.concat(..., how='horizontal')` instead.", version="1.0.0"
+    )
     def with_context(self, other: Self | list[Self]) -> Self:
         """
         Add an external context to the computation graph.
 
+        .. deprecated:: 1.0.0
+            Use :func:`concat` instead with `how='horizontal'`
+
         This allows expressions to also access columns from DataFrames
         that are not part of this one.
 
@@ -4237,7 +4243,7 @@ def with_context(self, other: Self | list[Self]) -> Self:
         --------
         >>> lf = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "c", None]})
         >>> lf_other = pl.LazyFrame({"c": ["foo", "ham"]})
-        >>> lf.with_context(lf_other).select(
+        >>> lf.with_context(lf_other).select(  # doctest: +SKIP
         ...     pl.col("b") + pl.col("c").first()
         ... ).collect()
         shape: (3, 1)
@@ -4259,7 +4265,7 @@ def with_context(self, other: Self | list[Self]) -> Self:
         >>> test_lf = pl.LazyFrame(
         ...     {"feature_0": [-1.0, None, 1], "feature_1": [-1.0, 0, 1]}
         ... )
-        >>> test_lf.with_context(
+        >>> test_lf.with_context(  # doctest: +SKIP
         ...     train_lf.select(pl.all().name.suffix("_train"))
         ... ).select(
         ...     pl.col("feature_0").fill_null(pl.col("feature_0_train").median())
diff --git a/py-polars/tests/unit/lazyframe/test_with_context.py b/py-polars/tests/unit/lazyframe/test_with_context.py
index 6983b7a5060f..43eee761d28f 100644
--- a/py-polars/tests/unit/lazyframe/test_with_context.py
+++ b/py-polars/tests/unit/lazyframe/test_with_context.py
@@ -1,15 +1,121 @@
+from datetime import datetime
+
+import pytest
+
 import polars as pl
 from polars.testing import assert_frame_equal
 
 
+def test_with_context() -> None:
+    df_a = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "c", None]}).lazy()
+    df_b = pl.DataFrame({"c": ["foo", "ham"]})
+
+    with pytest.deprecated_call():
+        result = df_a.with_context(df_b.lazy()).select(
+            pl.col("b") + pl.col("c").first()
+        )
+    assert result.collect().to_dict(as_series=False) == {"b": ["afoo", "cfoo", None]}
+
+    with pytest.deprecated_call():
+        context = df_a.with_context(df_b.lazy())
+    with pytest.raises(pl.ComputeError):
+        context.select("a", "c").collect()
+
+
 # https://github.com/pola-rs/polars/issues/5867
 def test_with_context_ignore_5867() -> None:
     outer = pl.LazyFrame({"OtherCol": [1, 2, 3, 4]})
-    lf = pl.LazyFrame({"Category": [1, 1, 2, 2], "Counts": [1, 2, 3, 4]}).with_context(
-        outer
-    )
+    with pytest.deprecated_call():
+        lf = pl.LazyFrame(
+            {"Category": [1, 1, 2, 2], "Counts": [1, 2, 3, 4]}
+        ).with_context(outer)
 
     result = lf.group_by("Category", maintain_order=True).agg(pl.col("Counts").sum())
 
     expected = pl.LazyFrame({"Category": [1, 2], "Counts": [3, 7]})
     assert_frame_equal(result, expected)
+
+
+def test_predicate_pushdown_with_context_11014() -> None:
+    df1 = pl.LazyFrame(
+        {
+            "df1_c1": [1, 2, 3],
+            "df1_c2": [2, 3, 4],
+        }
+    )
+
+    df2 = pl.LazyFrame(
+        {
+            "df2_c1": [2, 3, 4],
+            "df2_c2": [3, 4, 5],
+        }
+    )
+
+    with pytest.deprecated_call():
+        out = (
+            df1.with_context(df2)
+            .filter(pl.col("df1_c1").is_in(pl.col("df2_c1")))
+            .collect(predicate_pushdown=True)
+        )
+
+    assert out.to_dict(as_series=False) == {"df1_c1": [2, 3], "df1_c2": [3, 4]}
+
+
+@pytest.mark.xdist_group("streaming")
+def test_streaming_11219() -> None:
+    # https://github.com/pola-rs/polars/issues/11219
+
+    lf = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "c", None]})
+    lf_other = pl.LazyFrame({"c": ["foo", "ham"]})
+    lf_other2 = pl.LazyFrame({"c": ["foo", "ham"]})
+
+    with pytest.deprecated_call():
+        context = lf.with_context([lf_other, lf_other2])
+
+    assert context.select(pl.col("b") + pl.col("c").first()).collect(
+        streaming=True
+    ).to_dict(as_series=False) == {"b": ["afoo", "cfoo", None]}
+
+
+def test_no_cse_in_with_context() -> None:
+    df1 = pl.DataFrame(
+        {
+            "timestamp": [
+                datetime(2023, 1, 1, 0, 0),
+                datetime(2023, 5, 1, 0, 0),
+                datetime(2023, 10, 1, 0, 0),
+            ],
+            "value": [2, 5, 9],
+        }
+    )
+    df2 = pl.DataFrame(
+        {
+            "date_start": [
+                datetime(2022, 12, 31, 0, 0),
+                datetime(2023, 1, 2, 0, 0),
+            ],
+            "date_end": [
+                datetime(2023, 4, 30, 0, 0),
+                datetime(2023, 5, 5, 0, 0),
+            ],
+            "label": [0, 1],
+        }
+    )
+
+    with pytest.deprecated_call():
+        context = df1.lazy().with_context(df2.lazy())
+
+    assert (
+        context.select(
+            pl.col("date_start", "label").gather(
+                pl.col("date_start").search_sorted(pl.col("timestamp")) - 1
+            ),
+        )
+    ).collect().to_dict(as_series=False) == {
+        "date_start": [
+            datetime(2022, 12, 31, 0, 0),
+            datetime(2023, 1, 2, 0, 0),
+            datetime(2023, 1, 2, 0, 0),
+        ],
+        "label": [0, 1, 1],
+    }
diff --git a/py-polars/tests/unit/streaming/test_streaming.py b/py-polars/tests/unit/streaming/test_streaming.py
index 46d968902a9c..b2fa4eacc9fe 100644
--- a/py-polars/tests/unit/streaming/test_streaming.py
+++ b/py-polars/tests/unit/streaming/test_streaming.py
@@ -284,16 +284,6 @@ def test_boolean_agg_schema() -> None:
         )
 
 
-def test_streaming_11219() -> None:
-    lf = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "c", None]})
-    lf_other = pl.LazyFrame({"c": ["foo", "ham"]})
-    lf_other2 = pl.LazyFrame({"c": ["foo", "ham"]})
-
-    assert lf.with_context([lf_other, lf_other2]).select(
-        pl.col("b") + pl.col("c").first()
-    ).collect(streaming=True).to_dict(as_series=False) == {"b": ["afoo", "cfoo", None]}
-
-
 @pytest.mark.write_disk()
 def test_streaming_csv_headers_but_no_data_13770(tmp_path: Path) -> None:
     with Path.open(tmp_path / "header_no_data.csv", "w") as f:
diff --git a/py-polars/tests/unit/test_cse.py b/py-polars/tests/unit/test_cse.py
index 0081bc15ca43..ca413318e3b3 100644
--- a/py-polars/tests/unit/test_cse.py
+++ b/py-polars/tests/unit/test_cse.py
@@ -489,49 +489,6 @@ def test_cse_count_in_group_by() -> None:
     }
 
 
-def test_no_cse_in_with_context() -> None:
-    df1 = pl.DataFrame(
-        {
-            "timestamp": [
-                datetime(2023, 1, 1, 0, 0),
-                datetime(2023, 5, 1, 0, 0),
-                datetime(2023, 10, 1, 0, 0),
-            ],
-            "value": [2, 5, 9],
-        }
-    )
-    df2 = pl.DataFrame(
-        {
-            "date_start": [
-                datetime(2022, 12, 31, 0, 0),
-                datetime(2023, 1, 2, 0, 0),
-            ],
-            "date_end": [
-                datetime(2023, 4, 30, 0, 0),
-                datetime(2023, 5, 5, 0, 0),
-            ],
-            "label": [0, 1],
-        }
-    )
-
-    assert (
-        df1.lazy()
-        .with_context(df2.lazy())
-        .select(
-            pl.col("date_start", "label").gather(
-                pl.col("date_start").search_sorted(pl.col("timestamp")) - 1
-            ),
-        )
-    ).collect().to_dict(as_series=False) == {
-        "date_start": [
-            datetime(2022, 12, 31, 0, 0),
-            datetime(2023, 1, 2, 0, 0),
-            datetime(2023, 1, 2, 0, 0),
-        ],
-        "label": [0, 1, 1],
-    }
-
-
 def test_cse_slice_11594() -> None:
     df = pl.LazyFrame({"a": [1, 2, 1, 2, 1, 2]})
 
diff --git a/py-polars/tests/unit/test_predicates.py b/py-polars/tests/unit/test_predicates.py
index bcf93fb654be..2c9a7ba3c842 100644
--- a/py-polars/tests/unit/test_predicates.py
+++ b/py-polars/tests/unit/test_predicates.py
@@ -132,30 +132,6 @@ def test_predicate_pushdown_block_8661() -> None:
     }
 
 
-def test_predicate_pushdown_with_context_11014() -> None:
-    df1 = pl.LazyFrame(
-        {
-            "df1_c1": [1, 2, 3],
-            "df1_c2": [2, 3, 4],
-        }
-    )
-
-    df2 = pl.LazyFrame(
-        {
-            "df2_c1": [2, 3, 4],
-            "df2_c2": [3, 4, 5],
-        }
-    )
-
-    out = (
-        df1.with_context(df2)
-        .filter(pl.col("df1_c1").is_in(pl.col("df2_c1")))
-        .collect(predicate_pushdown=True)
-    )
-
-    assert out.to_dict(as_series=False) == {"df1_c1": [2, 3], "df1_c2": [3, 4]}
-
-
 def test_predicate_pushdown_cumsum_9566() -> None:
     df = pl.DataFrame({"A": range(10), "B": ["b"] * 5 + ["a"] * 5})
 
diff --git a/py-polars/tests/unit/test_schema.py b/py-polars/tests/unit/test_schema.py
index d777af51a524..fd6b154f06cf 100644
--- a/py-polars/tests/unit/test_schema.py
+++ b/py-polars/tests/unit/test_schema.py
@@ -156,18 +156,6 @@ def test_bool_numeric_supertype() -> None:
         )
 
 
-def test_with_context() -> None:
-    df_a = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "c", None]}).lazy()
-    df_b = pl.DataFrame({"c": ["foo", "ham"]})
-
-    assert (
-        df_a.with_context(df_b.lazy()).select([pl.col("b") + pl.col("c").first()])
-    ).collect().to_dict(as_series=False) == {"b": ["afoo", "cfoo", None]}
-
-    with pytest.raises(pl.ComputeError):
-        (df_a.with_context(df_b.lazy()).select(["a", "c"])).collect()
-
-
 def test_from_dicts_nested_nulls() -> None:
     assert pl.from_dicts([{"a": [None, None]}, {"a": [1, 2]}]).to_dict(
         as_series=False