Skip to content

Commit

Permalink
fix(python): empty dataset fix for "pyarrow" engine (#2689)
Browse files Browse the repository at this point in the history
# Description
part of #2686  fix writing empty arrow dataset with pyarrow engine 

# Related Issue(s)
<!---
For example:

- closes #106
--->
part of #2686 

# Documentation

<!---
Share links to useful documentation
--->
  • Loading branch information
sherlockbeard authored Jul 21, 2024
1 parent d3642a6 commit 4de2da3
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
4 changes: 3 additions & 1 deletion python/deltalake/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ def _cast_schema_to_recordbatchreader(
) -> Generator[pa.RecordBatch, None, None]:
"""Creates recordbatch generator."""
for batch in reader:
yield pa.Table.from_batches([batch]).cast(schema).to_batches()[0]
batchs = pa.Table.from_batches([batch]).cast(schema).to_batches()
if len(batchs) > 0:
yield batchs[0]


def convert_pyarrow_recordbatchreader(
Expand Down
6 changes: 6 additions & 0 deletions python/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,3 +1838,9 @@ def test_roundtrip_cdc_evolution(tmp_path: pathlib.Path):
print(os.listdir(tmp_path))
# This is kind of a weak test to verify that CDFs were written
assert os.path.isdir(os.path.join(tmp_path, "_change_data"))


def test_empty_dataset_write(tmp_path: pathlib.Path, sample_data: pa.Table):
empty_arrow_table = sample_data.schema.empty_table()
empty_dataset = dataset(empty_arrow_table)
write_deltalake(tmp_path, empty_dataset, mode="append")

0 comments on commit 4de2da3

Please sign in to comment.