From 0fa95802328aaa8466534308bd648e770bcf93d6 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Mon, 11 Mar 2019 09:10:58 -0700 Subject: [PATCH] Backport PR #25625: BUG: to_csv line endings with compression (#25663) --- doc/source/whatsnew/v0.24.2.rst | 1 + pandas/io/common.py | 2 +- pandas/tests/frame/test_to_csv.py | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 2c6d1e01ed89b..0f603515c61cc 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -32,6 +32,7 @@ Fixed Regressions - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) +- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`) .. _whatsnew_0242.enhancements: diff --git a/pandas/io/common.py b/pandas/io/common.py index ad054d77b3bc8..c1cacf39c5b08 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -434,7 +434,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, if (compat.PY3 and is_text and (compression or isinstance(f, need_text_wrapping))): from io import TextIOWrapper - f = TextIOWrapper(f, encoding=encoding) + f = TextIOWrapper(f, encoding=encoding, newline='') handles.append(f) if memory_map and hasattr(f, 'fileno'): diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 61eefccede5dd..42bfa75a01063 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1220,3 +1220,15 @@ def test_multi_index_header(self): '1,5,6,7,8'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected + + def test_gz_lineend(self): + # GH 25311 + df = pd.DataFrame({'a': [1, 2]}) + expected_rows = ['a', '1', '2'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + with ensure_clean('__test_gz_lineend.csv.gz') as path: + df.to_csv(path, index=False) + with tm.decompress_file(path, compression='gzip') as f: + result = f.read().decode('utf-8') + + assert result == expected