diff --git a/src/datasets/arrow_writer.py b/src/datasets/arrow_writer.py index 95b1e34e4da..becb28c66ea 100644 --- a/src/datasets/arrow_writer.py +++ b/src/datasets/arrow_writer.py @@ -450,7 +450,7 @@ def write_rows_on_file(self): """Write stored rows from the write-pool of rows. It concatenates the single-row tables and it writes the resulting table.""" if not self.current_rows: return - table = pa.concat_tables(self.current_rows).combine_chunks() + table = pa.concat_tables(self.current_rows) self.write_table(table) self.current_rows = [] @@ -564,6 +564,7 @@ def write_table(self, pa_table: pa.Table, writer_batch_size: Optional[int] = Non writer_batch_size = self.writer_batch_size if self.pa_writer is None: self._build_writer(inferred_schema=pa_table.schema) + pa_table = pa_table.combine_chunks() pa_table = table_cast(pa_table, self._schema) if self.embed_local_files: pa_table = embed_table_storage(pa_table)