From 629c25bcdf0e3419ab4cb48ba376163a0deb56ff Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Fri, 18 Aug 2023 02:40:38 +0800 Subject: [PATCH] fix(rust, python): take input_schema to create physical expr for selection (#10571) --- .../polars-lazy/src/physical_plan/planner/lp.rs | 10 ++++++++-- py-polars/tests/unit/operations/test_filter.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/crates/polars-lazy/src/physical_plan/planner/lp.rs b/crates/polars-lazy/src/physical_plan/planner/lp.rs index 9a6804bbe932..df764fd7d2a5 100644 --- a/crates/polars-lazy/src/physical_plan/planner/lp.rs +++ b/crates/polars-lazy/src/physical_plan/planner/lp.rs @@ -165,10 +165,16 @@ pub fn create_physical_plan( Ok(Box::new(executors::SliceExec { input, offset, len })) }, Selection { input, predicate } => { + let input_schema = lp_arena.get(input).schema(lp_arena).into_owned(); let input = create_physical_plan(input, lp_arena, expr_arena)?; let mut state = ExpressionConversionState::default(); - let predicate = - create_physical_expr(predicate, Context::Default, expr_arena, None, &mut state)?; + let predicate = create_physical_expr( + predicate, + Context::Default, + expr_arena, + Some(&input_schema), + &mut state, + )?; Ok(Box::new(executors::FilterExec::new( predicate, input, diff --git a/py-polars/tests/unit/operations/test_filter.py b/py-polars/tests/unit/operations/test_filter.py index 163c970f7353..551138742e11 100644 --- a/py-polars/tests/unit/operations/test_filter.py +++ b/py-polars/tests/unit/operations/test_filter.py @@ -145,3 +145,19 @@ def test_clear_window_cache_after_filter_10499() -> None: assert df.lazy().filter((pl.col("a").null_count() < pl.count()).over("b")).filter( ((pl.col("a") == 0).sum() < pl.count()).over("b") ).collect().to_dict(False) == {"a": [3, None, 5, 0, 9, 10], "b": [2, 2, 3, 3, 5, 5]} + + +def test_agg_function_of_filter_10565() -> None: + df_int = pl.DataFrame(data={"a": []}, schema={"a": pl.Int16}) + assert df_int.filter(pl.col("a").n_unique().over("a") == 1).to_dict(False) == { + "a": [] + } + + df_str = pl.DataFrame(data={"a": []}, schema={"a": pl.Utf8}) + assert df_str.filter(pl.col("a").n_unique().over("a") == 1).to_dict(False) == { + "a": [] + } + + assert df_str.lazy().filter(pl.col("a").n_unique().over("a") == 1).collect( + predicate_pushdown=False + ).to_dict(False) == {"a": []}