Skip to content

Commit

Permalink
chore: add merge pushdown isin test
Browse files Browse the repository at this point in the history
  • Loading branch information
ion-elgreco committed Aug 21, 2024
1 parent 9304326 commit 9531181
Showing 1 changed file with 51 additions and 0 deletions.
51 changes: 51 additions & 0 deletions python/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,3 +985,54 @@ def test_struct_casting(tmp_path: pathlib.Path):
.execute()
)
assert result is not None


def test_merge_isin_partition_pruning(
tmp_path: pathlib.Path,
):
nrows = 5
data = pa.table(
{
"id": pa.array([str(x) for x in range(nrows)]),
"partition": pa.array(list(range(nrows)), pa.int64()),
"sold": pa.array(list(range(nrows)), pa.int32()),
}
)

write_deltalake(tmp_path, data, mode="append", partition_by="partition")

dt = DeltaTable(tmp_path)

source_table = pa.table(
{
"id": pa.array(["3", "4"]),
"partition": pa.array([3, 4], pa.int64()),
"sold": pa.array([10, 20], pa.int32()),
}
)

metrics = (
dt.merge(
source=source_table,
predicate="t.id = s.id and t.partition in (3,4)",
source_alias="s",
target_alias="t",
)
.when_matched_update_all()
.execute()
)

expected = pa.table(
{
"id": pa.array(["0", "1", "2", "3", "4"]),
"partition": pa.array([0, 1, 2, 3, 4], pa.int64()),
"sold": pa.array([0, 1, 2, 10, 20], pa.int32()),
}
)
result = dt.to_pyarrow_table().sort_by([("id", "ascending")])
last_action = dt.history(1)[0]

assert last_action["operation"] == "MERGE"
assert result == expected
assert metrics["num_target_files_scanned"] == 2
assert metrics["num_target_files_skipped_during_scan"] == 3

0 comments on commit 9531181

Please sign in to comment.