Skip to content

Commit

Permalink
fix(rust): Fix hive partitioned files not being skipped (pola-rs#13358)
Browse files Browse the repository at this point in the history
fix(rust): Fix hive partitioned files not being skipped (pola-rs#13358)
  • Loading branch information
Boruch Chalk committed Jan 4, 2024
1 parent e9687b1 commit 822f0ca
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
6 changes: 6 additions & 0 deletions crates/polars-lazy/src/physical_plan/expressions/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,12 @@ impl ApplyExpr {
let min = st.to_min()?;
let max = st.to_max()?;

if ChunkCompare::equal(max, min).ok()?.all() {
let one_equals =
|value: &Series| Some(ChunkCompare::equal(input, value).ok()?.any());
return one_equals(min);
}

let all_smaller = || Some(ChunkCompare::lt(input, min).ok()?.all());
let all_bigger = || Some(ChunkCompare::gt(input, max).ok()?.all());
Some(!all_smaller()? && !all_bigger()?)
Expand Down
20 changes: 20 additions & 0 deletions py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,26 @@ def test_hive_partitioned_predicate_pushdown(
)


@pytest.mark.write_disk()
def test_hive_partitioned_predicate_pushdown_skips_correct_number_of_files(
io_files_path: Path, tmp_path: Path, monkeypatch: Any, capfd: Any
) -> None:
monkeypatch.setenv("POLARS_VERBOSE", "1")
df = pl.DataFrame({"d": pl.arange(0, 10_000, eager=True)}).with_columns(
a=pl.col("d") % 100
)
root = tmp_path / "test_int_partitions"
df.write_parquet(
root,
use_pyarrow=True,
pyarrow_options={"partition_cols": ["a"]},
)

q = pl.scan_parquet(root / "**/*.parquet", hive_partitioning=True)
assert q.filter(pl.col("a").is_in([10, 99])).collect().shape == (200, 2)
assert "hive partitioning: skipped 98 files" in capfd.readouterr().err


@pytest.mark.write_disk()
def test_hive_partitioned_slice_pushdown(io_files_path: Path, tmp_path: Path) -> None:
df = pl.read_ipc(io_files_path / "*.ipc")
Expand Down

0 comments on commit 822f0ca

Please sign in to comment.