diff --git a/crates/polars-lazy/src/physical_plan/expressions/apply.rs b/crates/polars-lazy/src/physical_plan/expressions/apply.rs index 5bfd6c4a77e80..62db3fc6853ee 100644 --- a/crates/polars-lazy/src/physical_plan/expressions/apply.rs +++ b/crates/polars-lazy/src/physical_plan/expressions/apply.rs @@ -509,6 +509,12 @@ impl ApplyExpr { let min = st.to_min()?; let max = st.to_max()?; + if ChunkCompare::equal(max, min).ok()?.all() { + let one_equals = + |value: &Series| Some(ChunkCompare::equal(input, value).ok()?.any()); + return one_equals(min); + } + let all_smaller = || Some(ChunkCompare::lt(input, min).ok()?.all()); let all_bigger = || Some(ChunkCompare::gt(input, max).ok()?.all()); Some(!all_smaller()? && !all_bigger()?) diff --git a/py-polars/tests/unit/io/test_hive.py b/py-polars/tests/unit/io/test_hive.py index e476d61aae178..f495780752173 100644 --- a/py-polars/tests/unit/io/test_hive.py +++ b/py-polars/tests/unit/io/test_hive.py @@ -65,6 +65,26 @@ def test_hive_partitioned_predicate_pushdown( ) +@pytest.mark.write_disk() +def test_hive_partitioned_predicate_pushdown_skips_correct_number_of_files( + io_files_path: Path, tmp_path: Path, monkeypatch: Any, capfd: Any +) -> None: + monkeypatch.setenv("POLARS_VERBOSE", "1") + df = pl.DataFrame({"d": pl.arange(0, 10_000, eager=True)}).with_columns( + a=pl.col("d") % 100 + ) + root = tmp_path / "test_int_partitions" + df.write_parquet( + root, + use_pyarrow=True, + pyarrow_options={"partition_cols": ["a"]}, + ) + + q = pl.scan_parquet(root / "**/*.parquet", hive_partitioning=True) + assert q.filter(pl.col("a").is_in([10, 99])).collect().shape == (200, 2) + assert "hive partitioning: skipped 98 files" in capfd.readouterr().err + + @pytest.mark.write_disk() def test_hive_partitioned_slice_pushdown(io_files_path: Path, tmp_path: Path) -> None: df = pl.read_ipc(io_files_path / "*.ipc")