From 3b1e0689001037f2faf0bf575a96d7bc652c1cb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 28 Dec 2020 19:05:06 -0500 Subject: [PATCH] Backport PR #38728: REGR: to_csv created corrupt ZIP files when chunksize None: + # write to actual handle and close write buffer + if self.multiple_write_buffer is None or self.multiple_write_buffer.closed: + return + # ZipFile needs a non-empty string archive_name = self.archive_name or self.filename or "zip" - super().writestr(archive_name, data) + with self.multiple_write_buffer: + super().writestr(archive_name, self.multiple_write_buffer.getvalue()) + + def close(self): + self.flush() + super().close() @property def closed(self): diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index a9673ded7c377..6416cb93c7ff5 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -640,3 +640,25 @@ def test_to_csv_encoding_binary_handle(self, mode): handle.seek(0) assert handle.read().startswith(b'\xef\xbb\xbf""') + + +def test_to_csv_iterative_compression_name(compression): + # GH 38714 + df = tm.makeDataFrame() + with tm.ensure_clean() as path: + df.to_csv(path, compression=compression, chunksize=1) + tm.assert_frame_equal( + pd.read_csv(path, compression=compression, index_col=0), df + ) + + +def test_to_csv_iterative_compression_buffer(compression): + # GH 38714 + df = tm.makeDataFrame() + with io.BytesIO() as buffer: + df.to_csv(buffer, compression=compression, chunksize=1) + buffer.seek(0) + tm.assert_frame_equal( + pd.read_csv(buffer, compression=compression, index_col=0), df + ) + assert not buffer.closed