Skip to content

Commit

Permalink
feat: improve hive partition pruning (#13358) (#13426)
Browse files Browse the repository at this point in the history
Co-authored-by: Boruch Chalk <boruch.chalk@mobileye.com>
  • Loading branch information
bchalk101 and Boruch Chalk committed Jan 8, 2024
1 parent e015350 commit 14ed705
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
6 changes: 6 additions & 0 deletions crates/polars-lazy/src/physical_plan/expressions/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,12 @@ impl ApplyExpr {
let min = st.to_min()?;
let max = st.to_max()?;

if max.get(0).unwrap() == min.get(0).unwrap() {
let one_equals =
|value: &Series| Some(ChunkCompare::equal(input, value).ok()?.any());
return one_equals(min);
}

let all_smaller = || Some(ChunkCompare::lt(input, min).ok()?.all());
let all_bigger = || Some(ChunkCompare::gt(input, max).ok()?.all());
Some(!all_smaller()? && !all_bigger()?)
Expand Down
20 changes: 20 additions & 0 deletions py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,26 @@ def test_hive_partitioned_predicate_pushdown(
)


@pytest.mark.write_disk()
def test_hive_partitioned_predicate_pushdown_skips_correct_number_of_files(
io_files_path: Path, tmp_path: Path, monkeypatch: Any, capfd: Any
) -> None:
monkeypatch.setenv("POLARS_VERBOSE", "1")
df = pl.DataFrame({"d": pl.arange(0, 5, eager=True)}).with_columns(
a=pl.col("d") % 5
)
root = tmp_path / "test_int_partitions"
df.write_parquet(
root,
use_pyarrow=True,
pyarrow_options={"partition_cols": ["a"]},
)

q = pl.scan_parquet(root / "**/*.parquet", hive_partitioning=True)
assert q.filter(pl.col("a").is_in([1, 4])).collect().shape == (2, 2)
assert "hive partitioning: skipped 3 files" in capfd.readouterr().err


@pytest.mark.write_disk()
def test_hive_partitioned_slice_pushdown(io_files_path: Path, tmp_path: Path) -> None:
df = pl.read_ipc(io_files_path / "*.ipc")
Expand Down

0 comments on commit 14ed705

Please sign in to comment.