From 8689167c00338eec2ec6fdac36746113a8bc8dd6 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Sat, 21 Jul 2018 11:13:17 -0400 Subject: [PATCH 01/39] Default to_csv & to_json to compression='infer' --- pandas/core/frame.py | 4 ++-- pandas/io/json/json.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4578d2ac08199..e5a29e768a1e3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1713,7 +1713,7 @@ def to_panel(self): def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, - mode='w', encoding=None, compression=None, quoting=None, + mode='w', encoding=None, compression='infer', quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.'): @@ -1748,7 +1748,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, encoding : string, optional A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default infer If 'infer' and `path_or_buf` is path-like, then detect compression from the following extensions: '.gz', '.bz2' or '.xz' (otherwise no compression). diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 3ec5e8d9be955..760d40286c66f 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -28,7 +28,7 @@ # interface to/from def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False, compression=None, + default_handler=None, lines=False, compression='infer', index=True): if not index and orient not in ['split', 'table']: From 3ccfb00e4e566a1ab0d24c07533c8021246e0fc2 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Sat, 21 Jul 2018 14:43:07 -0400 Subject: [PATCH 02/39] to_json compression=infer in pandas/core/generic.py --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 38f334762fa88..3d76b5f550246 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1899,7 +1899,7 @@ def _repr_latex_(self): def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False, compression=None, + default_handler=None, lines=False, compression='infer', index=True): """ Convert the object to a JSON string. From 648bf4d1810a2c2b9cbff1d4b941ab7cb7bc0b35 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Sat, 21 Jul 2018 19:34:43 -0400 Subject: [PATCH 03/39] Simplify CSVFormatter.save --- pandas/io/formats/csvs.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 0796888554a46..15c6bd7163169 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -125,13 +125,9 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', def save(self): # create the writer & save - if self.encoding is None: - if compat.PY2: - encoding = 'ascii' - else: - encoding = 'utf-8' - else: - encoding = self.encoding + encoding = self.encoding + if encoding is None: + encoding = 'ascii' if compat.compat.PY2 else 'utf-8' # GH 21227 internal compression is not used when file-like passed. if self.compression and hasattr(self.path_or_buf, 'write'): From be724fa7edcb80056cf0bdbc94351ca39d8f3d23 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Sat, 21 Jul 2018 19:59:17 -0400 Subject: [PATCH 04/39] Exploratory commit of what CSVFormatter.save should look like --- pandas/io/formats/csvs.py | 51 +++++++-------------------------------- 1 file changed, 9 insertions(+), 42 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 15c6bd7163169..aec3b20d35dea 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -30,7 +30,7 @@ class CSVFormatter(object): def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, - compression=None, quoting=None, line_terminator='\n', + compression='infer', quoting=None, line_terminator='\n', chunksize=None, tupleize_cols=False, quotechar='"', date_format=None, doublequote=True, escapechar=None, decimal='.'): @@ -129,32 +129,12 @@ def save(self): if encoding is None: encoding = 'ascii' if compat.compat.PY2 else 'utf-8' - # GH 21227 internal compression is not used when file-like passed. - if self.compression and hasattr(self.path_or_buf, 'write'): - msg = ("compression has no effect when passing file-like " - "object as input.") - warnings.warn(msg, RuntimeWarning, stacklevel=2) - - # when zip compression is called. - is_zip = isinstance(self.path_or_buf, ZipFile) or ( - not hasattr(self.path_or_buf, 'write') - and self.compression == 'zip') - - if is_zip: - # zipfile doesn't support writing string to archive. uses string - # buffer to receive csv writing and dump into zip compression - # file handle. GH 21241, 21118 - f = StringIO() - close = False - elif hasattr(self.path_or_buf, 'write'): - f = self.path_or_buf - close = False - else: - f, handles = _get_handle(self.path_or_buf, self.mode, - encoding=encoding, - compression=self.compression) - close = True - + f, handles = _get_handle( + path_or_buf=self.path_or_buf, + mode=self.mode, + encoding=encoding, + compression=self.compression, + ) try: writer_kwargs = dict(lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, @@ -170,21 +150,8 @@ def save(self): self._save() finally: - if is_zip: - # GH 17778 handles zip compression separately. - buf = f.getvalue() - if hasattr(self.path_or_buf, 'write'): - self.path_or_buf.write(buf) - else: - f, handles = _get_handle(self.path_or_buf, self.mode, - encoding=encoding, - compression=self.compression) - f.write(buf) - close = True - if close: - f.close() - for _fh in handles: - _fh.close() + for handle in handles: + handle.close() def _save_header(self): From 9fe27c9083be85086183a667aff815e064a73c13 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 23 Jul 2018 14:52:29 -0400 Subject: [PATCH 05/39] fixup! Simplify CSVFormatter.save --- pandas/io/formats/csvs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index aec3b20d35dea..64ed8c25f736c 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -127,7 +127,7 @@ def save(self): # create the writer & save encoding = self.encoding if encoding is None: - encoding = 'ascii' if compat.compat.PY2 else 'utf-8' + encoding = 'ascii' if compat.PY2 else 'utf-8' f, handles = _get_handle( path_or_buf=self.path_or_buf, From 65f0689b0eefab64737da41010d2263c5923f8db Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 23 Jul 2018 14:59:23 -0400 Subject: [PATCH 06/39] "Revert changes not related to compression default --- pandas/io/formats/csvs.py | 61 +++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 64ed8c25f736c..abfd6096bc5a2 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -125,16 +125,40 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', def save(self): # create the writer & save - encoding = self.encoding - if encoding is None: - encoding = 'ascii' if compat.PY2 else 'utf-8' - - f, handles = _get_handle( - path_or_buf=self.path_or_buf, - mode=self.mode, - encoding=encoding, - compression=self.compression, - ) + if self.encoding is None: + if compat.PY2: + encoding = 'ascii' + else: + encoding = 'utf-8' + else: + encoding = self.encoding + + # GH 21227 internal compression is not used when file-like passed. + if self.compression and hasattr(self.path_or_buf, 'write'): + msg = ("compression has no effect when passing file-like " + "object as input.") + warnings.warn(msg, RuntimeWarning, stacklevel=2) + + # when zip compression is called. + is_zip = isinstance(self.path_or_buf, ZipFile) or ( + not hasattr(self.path_or_buf, 'write') + and self.compression == 'zip') + + if is_zip: + # zipfile doesn't support writing string to archive. uses string + # buffer to receive csv writing and dump into zip compression + # file handle. GH 21241, 21118 + f = StringIO() + close = False + elif hasattr(self.path_or_buf, 'write'): + f = self.path_or_buf + close = False + else: + f, handles = _get_handle(self.path_or_buf, self.mode, + encoding=encoding, + compression=self.compression) + close = True + try: writer_kwargs = dict(lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, @@ -150,8 +174,21 @@ def save(self): self._save() finally: - for handle in handles: - handle.close() + if is_zip: + # GH 17778 handles zip compression separately. + buf = f.getvalue() + if hasattr(self.path_or_buf, 'write'): + self.path_or_buf.write(buf) + else: + f, handles = _get_handle(self.path_or_buf, self.mode, + encoding=encoding, + compression=self.compression) + f.write(buf) + close = True + if close: + f.close() + for _fh in handles: + _fh.close() def _save_header(self): From 868e671efc06ca66bb8d9fa7597c9468630e3ed9 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 23 Jul 2018 15:17:45 -0400 Subject: [PATCH 07/39] TST: test to_csv infers compression by default --- pandas/tests/io/formats/test_to_csv.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 5fb356e48289f..658b2a09993da 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import gzip import sys import pytest @@ -351,3 +352,15 @@ def test_to_csv_compression(self, compression_only, result = pd.read_csv(path, index_col=0, compression=read_compression) tm.assert_frame_equal(result, df) + + def test_compression_defaults_to_infer(tmpdir): + """ + Test that to_csv defaults to inferring compression from paths. + https://github.com/pandas-dev/pandas/pull/22011 + """ + df = DataFrame({"A": [1]}) + with tm.ensure_clean('compressed.csv.gz') as path: + df.to_csv(path, index=False) + with gzip.open(path, 'rt') as read_file: + lines = read_file.read().splitlines() + assert lines == ['A', '1'] From c3b76ee9fdc73da9519c62a3d78e3f033708a532 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 23 Jul 2018 16:06:14 -0400 Subject: [PATCH 08/39] Debugging print statements Attempt to diagnose testing failure of Python 2 test_compression_warning https://travis-ci.org/pandas-dev/pandas/jobs/407300547#L3853 --- pandas/io/formats/csvs.py | 3 +++ pandas/tests/test_common.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index abfd6096bc5a2..e4228c572dd5d 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -134,6 +134,9 @@ def save(self): encoding = self.encoding # GH 21227 internal compression is not used when file-like passed. + print('debug_3', self.compression) + print('debug_4', self.path_or_buf) + print('debug_5', hasattr(self.path_or_buf, 'write')) if self.compression and hasattr(self.path_or_buf, 'write'): msg = ("compression has no effect when passing file-like " "object as input.") diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 61f838eeeeb30..238b780bb584d 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -257,8 +257,10 @@ def test_compression_warning(compression_only): [12.32112, 123123.2, 321321.2]], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as filename: + print('debug_1', compression_only) f, _handles = _get_handle(filename, 'w', compression=compression_only) with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): with f: + print('debug_2', compression_only) df.to_csv(f, compression=compression_only) From cebc0d98bd4ec7f68d3521f2814c0f41de818312 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 23 Jul 2018 17:08:10 -0400 Subject: [PATCH 09/39] Debugging: use logging rather than print --- pandas/io/formats/csvs.py | 9 ++++++--- pandas/tests/test_common.py | 5 +++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index e4228c572dd5d..5502d14bc05d2 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -134,10 +134,13 @@ def save(self): encoding = self.encoding # GH 21227 internal compression is not used when file-like passed. - print('debug_3', self.compression) - print('debug_4', self.path_or_buf) - print('debug_5', hasattr(self.path_or_buf, 'write')) + import logging + logging.warning('debug_3: {}'.format(self.compression)) + logging.warning('debug_4: {}'.format(self.path_or_buf)) + logging.warning( + 'debug_5: {}'.format(hasattr(self.path_or_buf, 'write'))) if self.compression and hasattr(self.path_or_buf, 'write'): + logging.warning('debug_6: in loop, should RuntimeWarn') msg = ("compression has no effect when passing file-like " "object as input.") warnings.warn(msg, RuntimeWarning, stacklevel=2) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 238b780bb584d..1177243df174c 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -257,10 +257,11 @@ def test_compression_warning(compression_only): [12.32112, 123123.2, 321321.2]], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as filename: - print('debug_1', compression_only) + import logging + logging.warning('debug_1: {}'.format(compression_only)) f, _handles = _get_handle(filename, 'w', compression=compression_only) with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): with f: - print('debug_2', compression_only) + logging.warning('debug_2: {}'.format(compression_only)) df.to_csv(f, compression=compression_only) From 8411eb21df5f5e2a2ca1731797f81f5c138e9051 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 07:25:37 -0400 Subject: [PATCH 10/39] _infer_compression in CSVFormatter Prevent CSVFormatter from raising a RuntimeWarning when compression='infer' and a file-like object is passed to path_or_buf. --- pandas/io/formats/csvs.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 5502d14bc05d2..11ab368b5d677 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -21,8 +21,13 @@ from pandas.core.dtypes.generic import ( ABCMultiIndex, ABCPeriodIndex, ABCDatetimeIndex, ABCIndexClass) -from pandas.io.common import (_get_handle, UnicodeWriter, _expand_user, - _stringify_path) +from pandas.io.common import ( + _expand_user, + _get_handle, + _infer_compression, + _stringify_path, + UnicodeWriter, +) class CSVFormatter(object): @@ -51,7 +56,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.index_label = index_label self.mode = mode self.encoding = encoding - self.compression = compression + self.compression = _infer_compression(self.path_or_buf, compression) if quoting is None: quoting = csvlib.QUOTE_MINIMAL From c098c8fdc8a53c5fb263fc8cee0a1a71681b87cb Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 07:29:32 -0400 Subject: [PATCH 11/39] CSVFormatter: process encoding in init for consistency --- pandas/io/formats/csvs.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 11ab368b5d677..cd48b04fa6fab 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -55,6 +55,8 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.index = index self.index_label = index_label self.mode = mode + if encoding is None: + encoding = 'ascii' if compat.PY2 else 'utf-8' self.encoding = encoding self.compression = _infer_compression(self.path_or_buf, compression) @@ -129,14 +131,9 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.nlevels = 0 def save(self): - # create the writer & save - if self.encoding is None: - if compat.PY2: - encoding = 'ascii' - else: - encoding = 'utf-8' - else: - encoding = self.encoding + """ + Create the writer & save + """ # GH 21227 internal compression is not used when file-like passed. import logging From 2f6601d278e4d741e95a27af783aafd05bd4b045 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 07:40:26 -0400 Subject: [PATCH 12/39] TST + DOC: test_compression_warning docstring --- pandas/tests/test_common.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 1177243df174c..7c897373daf3e 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -251,8 +251,18 @@ def test_compression_size_fh(obj, method, compression_only): assert uncompressed > compressed -# GH 21227 def test_compression_warning(compression_only): + """ + Assert that passing a file object to to_csv while explicitly specifying a + compression protocol triggers a RuntimeWarning, as per + https://github.com/pandas-dev/pandas/issues/21227. + + Note that pytest has an issue that causes assert_produces_warning to fail + in Python 2 if the warning has occurred in previous tests + (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this test + fail in just Python 2 builds, it likely indicates that other tests are + producing RuntimeWarnings, thereby triggering the pytest bug. + """ df = DataFrame(100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], columns=['X', 'Y', 'Z']) From eb7f9b5491defe035231e87f7902f1ea8786e86d Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 08:07:27 -0400 Subject: [PATCH 13/39] fixup! CSVFormatter: process encoding in init for consistency --- pandas/io/formats/csvs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index cd48b04fa6fab..7dcae6198cbc5 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -163,7 +163,7 @@ def save(self): close = False else: f, handles = _get_handle(self.path_or_buf, self.mode, - encoding=encoding, + encoding=self.encoding, compression=self.compression) close = True @@ -173,10 +173,10 @@ def save(self): doublequote=self.doublequote, escapechar=self.escapechar, quotechar=self.quotechar) - if encoding == 'ascii': + if self.encoding == 'ascii': self.writer = csvlib.writer(f, **writer_kwargs) else: - writer_kwargs['encoding'] = encoding + writer_kwargs['encoding'] = self.encoding self.writer = UnicodeWriter(f, **writer_kwargs) self._save() @@ -189,7 +189,7 @@ def save(self): self.path_or_buf.write(buf) else: f, handles = _get_handle(self.path_or_buf, self.mode, - encoding=encoding, + encoding=self.encoding, compression=self.compression) f.write(buf) close = True From d4a5c90712ab840eff883923fe7e65e56994b3da Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 08:42:55 -0400 Subject: [PATCH 14/39] Tests passing: remove debugging Passed in https://travis-ci.org/pandas-dev/pandas/builds/408474706 --- pandas/io/formats/csvs.py | 7 ------- pandas/tests/test_common.py | 3 --- 2 files changed, 10 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 7dcae6198cbc5..3c236fa10a5d6 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -134,15 +134,8 @@ def save(self): """ Create the writer & save """ - # GH 21227 internal compression is not used when file-like passed. - import logging - logging.warning('debug_3: {}'.format(self.compression)) - logging.warning('debug_4: {}'.format(self.path_or_buf)) - logging.warning( - 'debug_5: {}'.format(hasattr(self.path_or_buf, 'write'))) if self.compression and hasattr(self.path_or_buf, 'write'): - logging.warning('debug_6: in loop, should RuntimeWarn') msg = ("compression has no effect when passing file-like " "object as input.") warnings.warn(msg, RuntimeWarning, stacklevel=2) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 7c897373daf3e..7209477a56019 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -267,11 +267,8 @@ def test_compression_warning(compression_only): [12.32112, 123123.2, 321321.2]], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as filename: - import logging - logging.warning('debug_1: {}'.format(compression_only)) f, _handles = _get_handle(filename, 'w', compression=compression_only) with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): with f: - logging.warning('debug_2: {}'.format(compression_only)) df.to_csv(f, compression=compression_only) From abd19e3b02c77e1160d460ab50b34d4642d568c2 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 09:08:19 -0400 Subject: [PATCH 15/39] Parametrized test for compression='infer' is default --- pandas/tests/test_common.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 7209477a56019..1ad5aafb0112e 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -11,7 +11,10 @@ from pandas.compat import range, lmap import pandas.core.common as com from pandas.core import ops -from pandas.io.common import _get_handle +from pandas.io.common import ( + _compression_to_extension, + _get_handle, +) import pandas.util.testing as tm @@ -217,13 +220,23 @@ def test_standardize_mapping(): Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) @pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) def test_compression_size(obj, method, compression_only): - - with tm.ensure_clean() as filename: - getattr(obj, method)(filename, compression=compression_only) - compressed = os.path.getsize(filename) - getattr(obj, method)(filename, compression=None) - uncompressed = os.path.getsize(filename) - assert uncompressed > compressed + """ + Tests that compression is occurring by comparing to the bytes on disk of + the uncompressed file. + """ + extension = _compression_to_extension[compression_only] + to_method = getattr(obj, method) + with tm.ensure_clean('no-compression') as path: + to_method(path, compression=None) + no_compression_size = os.path.getsize(path) + with tm.ensure_clean('explicit-compression' + extension) as path: + to_method(path, compression=compression_only) + explicit_compression_size = os.path.getsize(path) + with tm.ensure_clean('inferred-compression' + extension) as path: + to_method(path) # assumes that compression='infer' is the default + inferred_compression_size = os.path.getsize(path) + assert (no_compression_size > explicit_compression_size == + inferred_compression_size) @pytest.mark.parametrize('obj', [ @@ -233,7 +246,6 @@ def test_compression_size(obj, method, compression_only): Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) @pytest.mark.parametrize('method', ['to_csv', 'to_json']) def test_compression_size_fh(obj, method, compression_only): - with tm.ensure_clean() as filename: f, _handles = _get_handle(filename, 'w', compression=compression_only) with f: From 2f670fedbe1694e64096c2a775d8e83799f04036 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 09:19:41 -0400 Subject: [PATCH 16/39] Default compression='infer' in series.to_csv --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3571e908fc6a7..f74c3abe19352 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3759,7 +3759,7 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, def to_csv(self, path=None, index=True, sep=",", na_rep='', float_format=None, header=False, index_label=None, - mode='w', encoding=None, compression=None, date_format=None, + mode='w', encoding=None, compression='infer', date_format=None, decimal='.'): """ Write Series to a comma-separated values (csv) file @@ -3790,7 +3790,7 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', compression : string, optional A string representing the compression to use in the output file. Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only - used when the first argument is a filename. + used when the first argument is a filename. Defaults to 'infer'. date_format: string, default None Format string for datetime objects. decimal: string, default '.' From aa9ce13dc24b03e34d0f410ff949e2ef4bd81e38 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 09:40:37 -0400 Subject: [PATCH 17/39] What's New Entry for v0.24.0 --- doc/source/whatsnew/v0.24.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 8fe3023e9537c..86bacc09e7860 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -176,7 +176,8 @@ Other Enhancements - :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`) - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) -- :func:`~DataFrame.to_csv` and :func:`~DataFrame.to_json` now support ``compression='infer'`` to infer compression based on filename (:issue:`15008`) +- :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename (:issue:`15008`). + The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - From a6aabaddec7b8dcae67ead7b1102d8d35b7572aa Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 11:17:56 -0400 Subject: [PATCH 18/39] Remove unused tmpdir fixture argument --- pandas/tests/io/formats/test_to_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 658b2a09993da..f452ceee9c881 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -353,7 +353,7 @@ def test_to_csv_compression(self, compression_only, compression=read_compression) tm.assert_frame_equal(result, df) - def test_compression_defaults_to_infer(tmpdir): + def test_compression_defaults_to_infer(): """ Test that to_csv defaults to inferring compression from paths. https://github.com/pandas-dev/pandas/pull/22011 From 8a0c97e8246cff9a68b6553dbe046cffae5d26c8 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 11:44:30 -0400 Subject: [PATCH 19/39] Update to_json docstring --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3d76b5f550246..ce44168375132 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1966,7 +1966,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.19.0 - compression : {'infer', 'gzip', 'bz2', 'xz', None}, default None + compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer' A string representing the compression to use in the output file, only used when the first argument is a filename. From 6be808d87f8470b733534969397613784eb35a1c Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 12:07:08 -0400 Subject: [PATCH 20/39] Change test docstrings to comments Refs https://github.com/pandas-dev/pandas/pull/22011/files#r205503220 --- pandas/tests/test_common.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 1ad5aafb0112e..a992e046c38c6 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -220,10 +220,8 @@ def test_standardize_mapping(): Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) @pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) def test_compression_size(obj, method, compression_only): - """ - Tests that compression is occurring by comparing to the bytes on disk of - the uncompressed file. - """ + # Tests that compression is occurring by comparing to the bytes on disk of + # the uncompressed file. extension = _compression_to_extension[compression_only] to_method = getattr(obj, method) with tm.ensure_clean('no-compression') as path: @@ -264,17 +262,14 @@ def test_compression_size_fh(obj, method, compression_only): def test_compression_warning(compression_only): - """ - Assert that passing a file object to to_csv while explicitly specifying a - compression protocol triggers a RuntimeWarning, as per - https://github.com/pandas-dev/pandas/issues/21227. - - Note that pytest has an issue that causes assert_produces_warning to fail - in Python 2 if the warning has occurred in previous tests - (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this test - fail in just Python 2 builds, it likely indicates that other tests are - producing RuntimeWarnings, thereby triggering the pytest bug. - """ + # Assert that passing a file object to to_csv while explicitly specifying a + # compression protocol triggers a RuntimeWarning, as per + # https://github.com/pandas-dev/pandas/issues/21227. + # Note that pytest has an issue that causes assert_produces_warning to fail + # in Python 2 if the warning has occurred in previous tests + # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this + # test fail in just Python 2 builds, it likely indicates that other tests + # are producing RuntimeWarnings, thereby triggering the pytest bug. df = DataFrame(100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], columns=['X', 'Y', 'Z']) From 63e6591b3b0327a67a220dde1a82b2f52d11189f Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 12:36:30 -0400 Subject: [PATCH 21/39] Consolidate testing to a single parametrized test --- pandas/tests/io/formats/test_to_csv.py | 13 -------- pandas/tests/test_common.py | 45 ++++++++++++++++++-------- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index f452ceee9c881..5fb356e48289f 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import gzip import sys import pytest @@ -352,15 +351,3 @@ def test_to_csv_compression(self, compression_only, result = pd.read_csv(path, index_col=0, compression=read_compression) tm.assert_frame_equal(result, df) - - def test_compression_defaults_to_infer(): - """ - Test that to_csv defaults to inferring compression from paths. - https://github.com/pandas-dev/pandas/pull/22011 - """ - df = DataFrame({"A": [1]}) - with tm.ensure_clean('compressed.csv.gz') as path: - df.to_csv(path, index=False) - with gzip.open(path, 'rt') as read_file: - lines = read_file.read().splitlines() - assert lines == ['A', '1'] diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index a992e046c38c6..967474b0fe7c9 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -7,6 +7,7 @@ import numpy as np +import pandas from pandas import Series, DataFrame, Timestamp from pandas.compat import range, lmap import pandas.core.common as com @@ -222,19 +223,12 @@ def test_standardize_mapping(): def test_compression_size(obj, method, compression_only): # Tests that compression is occurring by comparing to the bytes on disk of # the uncompressed file. - extension = _compression_to_extension[compression_only] - to_method = getattr(obj, method) - with tm.ensure_clean('no-compression') as path: - to_method(path, compression=None) - no_compression_size = os.path.getsize(path) - with tm.ensure_clean('explicit-compression' + extension) as path: - to_method(path, compression=compression_only) - explicit_compression_size = os.path.getsize(path) - with tm.ensure_clean('inferred-compression' + extension) as path: - to_method(path) # assumes that compression='infer' is the default - inferred_compression_size = os.path.getsize(path) - assert (no_compression_size > explicit_compression_size == - inferred_compression_size) + with tm.ensure_clean() as filename: + getattr(obj, method)(filename, compression=compression_only) + compressed = os.path.getsize(filename) + getattr(obj, method)(filename, compression=None) + uncompressed = os.path.getsize(filename) + assert uncompressed > compressed @pytest.mark.parametrize('obj', [ @@ -244,6 +238,7 @@ def test_compression_size(obj, method, compression_only): Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) @pytest.mark.parametrize('method', ['to_csv', 'to_json']) def test_compression_size_fh(obj, method, compression_only): + with tm.ensure_clean() as filename: f, _handles = _get_handle(filename, 'w', compression=compression_only) with f: @@ -261,6 +256,30 @@ def test_compression_size_fh(obj, method, compression_only): assert uncompressed > compressed +@pytest.mark.parametrize('input', [ + DataFrame([[1.0, 0, -4.4], + [3.4, 5, 2.4]], columns=['X', 'Y', 'Z']), + Series([0, 1, 2, 4], name='X'), +]) +@pytest.mark.parametrize('methods', [ + ('to_csv', pandas.read_csv), + ('to_json', pandas.read_json), + ('to_pickle', pandas.read_pickle), +]) +def test_compression_defaults_to_infer(input, methods, compression_only): + # Test that to_* methods default to inferring compression from paths. + # https://github.com/pandas-dev/pandas/pull/22011 + write_method, read_method = methods + extension = _compression_to_extension[compression_only] + with tm.ensure_clean('compressed' + extension) as path: + # assumes that compression='infer' is the default + getattr(input, write_method)(path) + output = read_method(path, compression=compression_only) + assert_equals = (tm.assert_frame_equal if isinstance(input, DataFrame) + else tm.assert_series_equal) + assert_equals(output, input) + + def test_compression_warning(compression_only): # Assert that passing a file object to to_csv while explicitly specifying a # compression protocol triggers a RuntimeWarning, as per From fadb943851e395cfd6e7dbe05ba86d15ea2ee6f2 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 14:04:31 -0400 Subject: [PATCH 22/39] Split test_compression_defaults_to_infer into Series & DataFrame tests --- pandas/tests/test_common.py | 41 +++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 967474b0fe7c9..580535f4e4827 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -256,28 +256,43 @@ def test_compression_size_fh(obj, method, compression_only): assert uncompressed > compressed -@pytest.mark.parametrize('input', [ - DataFrame([[1.0, 0, -4.4], - [3.4, 5, 2.4]], columns=['X', 'Y', 'Z']), - Series([0, 1, 2, 4], name='X'), -]) -@pytest.mark.parametrize('methods', [ +@pytest.mark.parametrize('write_method, read_method', [ ('to_csv', pandas.read_csv), ('to_json', pandas.read_json), ('to_pickle', pandas.read_pickle), ]) -def test_compression_defaults_to_infer(input, methods, compression_only): - # Test that to_* methods default to inferring compression from paths. - # https://github.com/pandas-dev/pandas/pull/22011 - write_method, read_method = methods +def test_dataframe_compression_defaults_to_infer( + write_method, read_method, compression_only): + # Test that DataFrame.to_* methods default to inferring compression from + # paths. https://github.com/pandas-dev/pandas/pull/22011 + input = DataFrame([[1.0, 0, -4.4], [3.4, 5, 2.4]], columns=['X', 'Y', 'Z']) + extension = _compression_to_extension[compression_only] + kwargs = {} + if write_method == 'to_csv': + kwargs['index'] = False + with tm.ensure_clean('compressed' + extension) as path: + # assumes that compression='infer' is the default + getattr(input, write_method)(path, **kwargs) + output = read_method(path, compression=compression_only) + tm.assert_frame_equal(output, input) + + +@pytest.mark.parametrize('write_method, read_method', [ + ('to_csv', pandas.Series.from_csv), + ('to_json', pandas.read_json), + ('to_pickle', pandas.read_pickle), +]) +def test_series_compression_defaults_to_infer( + write_method, read_method, compression_only): + # Test that Series.to_* methods default to inferring compression from + # paths. https://github.com/pandas-dev/pandas/pull/22011 + input = Series(100 * [0, 5, -2, 10]) extension = _compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: # assumes that compression='infer' is the default getattr(input, write_method)(path) output = read_method(path, compression=compression_only) - assert_equals = (tm.assert_frame_equal if isinstance(input, DataFrame) - else tm.assert_series_equal) - assert_equals(output, input) + tm.assert_series_equal(output, input) def test_compression_warning(compression_only): From 0edffc7e74a4ad45158d7f4b279a73ea7d534af2 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 14:12:21 -0400 Subject: [PATCH 23/39] Parametrize write_kwargs --- pandas/tests/test_common.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 580535f4e4827..b60bc244143df 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -256,41 +256,38 @@ def test_compression_size_fh(obj, method, compression_only): assert uncompressed > compressed -@pytest.mark.parametrize('write_method, read_method', [ - ('to_csv', pandas.read_csv), - ('to_json', pandas.read_json), - ('to_pickle', pandas.read_pickle), +@pytest.mark.parametrize('write_method, write_kwargs, read_method', [ + ('to_csv', {'index': False}, pandas.read_csv), + ('to_json', {}, pandas.read_json), + ('to_pickle', {}, pandas.read_pickle), ]) def test_dataframe_compression_defaults_to_infer( - write_method, read_method, compression_only): + write_method, write_kwargs, read_method, compression_only): # Test that DataFrame.to_* methods default to inferring compression from # paths. https://github.com/pandas-dev/pandas/pull/22011 input = DataFrame([[1.0, 0, -4.4], [3.4, 5, 2.4]], columns=['X', 'Y', 'Z']) extension = _compression_to_extension[compression_only] - kwargs = {} - if write_method == 'to_csv': - kwargs['index'] = False with tm.ensure_clean('compressed' + extension) as path: # assumes that compression='infer' is the default - getattr(input, write_method)(path, **kwargs) + getattr(input, write_method)(path, **write_kwargs) output = read_method(path, compression=compression_only) tm.assert_frame_equal(output, input) -@pytest.mark.parametrize('write_method, read_method', [ - ('to_csv', pandas.Series.from_csv), - ('to_json', pandas.read_json), - ('to_pickle', pandas.read_pickle), +@pytest.mark.parametrize('write_method, write_kwargs, read_method', [ + ('to_csv', {}, pandas.Series.from_csv), + ('to_json', {'typ': 'series'}, pandas.read_json), + ('to_pickle', {}, pandas.read_pickle), ]) def test_series_compression_defaults_to_infer( - write_method, read_method, compression_only): + write_method, write_kwargs, read_method, compression_only): # Test that Series.to_* methods default to inferring compression from # paths. https://github.com/pandas-dev/pandas/pull/22011 input = Series(100 * [0, 5, -2, 10]) extension = _compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: # assumes that compression='infer' is the default - getattr(input, write_method)(path) + getattr(input, write_method)(path, **write_kwargs) output = read_method(path, compression=compression_only) tm.assert_series_equal(output, input) From 97f5de5560890b493b6dbbaa8022fba86766403b Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 15:13:56 -0400 Subject: [PATCH 24/39] Fix kwargs in test_series_compression_defaults_to_infer --- pandas/tests/test_common.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index b60bc244143df..573915847e4cc 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -274,21 +274,21 @@ def test_dataframe_compression_defaults_to_infer( tm.assert_frame_equal(output, input) -@pytest.mark.parametrize('write_method, write_kwargs, read_method', [ - ('to_csv', {}, pandas.Series.from_csv), - ('to_json', {'typ': 'series'}, pandas.read_json), - ('to_pickle', {}, pandas.read_pickle), +@pytest.mark.parametrize('write_method, read_method, read_kwargs', [ + ('to_csv', pandas.Series.from_csv, {}), + ('to_json', pandas.read_json, {'typ': 'series'}), + ('to_pickle', pandas.read_pickle, {}), ]) def test_series_compression_defaults_to_infer( - write_method, write_kwargs, read_method, compression_only): + write_method, read_method, read_kwargs, compression_only): # Test that Series.to_* methods default to inferring compression from # paths. https://github.com/pandas-dev/pandas/pull/22011 input = Series(100 * [0, 5, -2, 10]) extension = _compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: # assumes that compression='infer' is the default - getattr(input, write_method)(path, **write_kwargs) - output = read_method(path, compression=compression_only) + getattr(input, write_method)(path) + output = read_method(path, compression=compression_only, **read_kwargs) tm.assert_series_equal(output, input) From 83bc0a8a6f71d24d7d69fa77f5ba90feac3f4a94 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 15:58:40 -0400 Subject: [PATCH 25/39] Attempt to fix CSV series roundtrip --- pandas/tests/test_common.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 573915847e4cc..6697b8ffba9e3 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -274,20 +274,20 @@ def test_dataframe_compression_defaults_to_infer( tm.assert_frame_equal(output, input) -@pytest.mark.parametrize('write_method, read_method, read_kwargs', [ - ('to_csv', pandas.Series.from_csv, {}), - ('to_json', pandas.read_json, {'typ': 'series'}), - ('to_pickle', pandas.read_pickle, {}), +@pytest.mark.parametrize('write_method,write_kwargs,read_method,read_kwargs', [ + ('to_csv', {'index': False, 'header': True}, pandas.read_csv, {'squeeze': True}), + ('to_json', {}, pandas.read_json, {'typ': 'series'}), + ('to_pickle', {}, pandas.read_pickle, {}), ]) def test_series_compression_defaults_to_infer( - write_method, read_method, read_kwargs, compression_only): + write_method, write_kwargs, read_method, read_kwargs, compression_only): # Test that Series.to_* methods default to inferring compression from # paths. https://github.com/pandas-dev/pandas/pull/22011 - input = Series(100 * [0, 5, -2, 10]) + input = Series([0, 5, -2, 10], name='X') extension = _compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: # assumes that compression='infer' is the default - getattr(input, write_method)(path) + getattr(input, write_method)(path, **write_kwargs) output = read_method(path, compression=compression_only, **read_kwargs) tm.assert_series_equal(output, input) From 874a4bfa799acaa51235d0b0ae046c702c523e14 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 16:08:30 -0400 Subject: [PATCH 26/39] Fix test failure Not sure why the series name did not round-trip. Could be a pandas bug, but one not related to compression inferrence. --- pandas/tests/test_common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 6697b8ffba9e3..7479852d8d3ec 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -275,12 +275,14 @@ def test_dataframe_compression_defaults_to_infer( @pytest.mark.parametrize('write_method,write_kwargs,read_method,read_kwargs', [ - ('to_csv', {'index': False, 'header': True}, pandas.read_csv, {'squeeze': True}), + ('to_csv', {'index': False, 'header': True}, + pandas.read_csv, {'squeeze': True}), ('to_json', {}, pandas.read_json, {'typ': 'series'}), ('to_pickle', {}, pandas.read_pickle, {}), ]) def test_series_compression_defaults_to_infer( - write_method, write_kwargs, read_method, read_kwargs, compression_only): + write_method, write_kwargs, read_method, read_kwargs, compression_only + ): # Test that Series.to_* methods default to inferring compression from # paths. https://github.com/pandas-dev/pandas/pull/22011 input = Series([0, 5, -2, 10], name='X') @@ -289,7 +291,7 @@ def test_series_compression_defaults_to_infer( # assumes that compression='infer' is the default getattr(input, write_method)(path, **write_kwargs) output = read_method(path, compression=compression_only, **read_kwargs) - tm.assert_series_equal(output, input) + tm.assert_series_equal(output, input, check_names=False) def test_compression_warning(compression_only): From 14c3945410288a8edd3ee989d5582b1e5df0e992 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Thu, 26 Jul 2018 17:55:11 -0400 Subject: [PATCH 27/39] Python 2 flake8 error 'https://travis-ci.org/pandas-dev/pandas/jobs/408671449#L3204' --- pandas/tests/test_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 7479852d8d3ec..31ea436ab158e 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -281,8 +281,8 @@ def test_dataframe_compression_defaults_to_infer( ('to_pickle', {}, pandas.read_pickle, {}), ]) def test_series_compression_defaults_to_infer( - write_method, write_kwargs, read_method, read_kwargs, compression_only - ): + write_method, write_kwargs, read_method, read_kwargs, + compression_only): # Test that Series.to_* methods default to inferring compression from # paths. https://github.com/pandas-dev/pandas/pull/22011 input = Series([0, 5, -2, 10], name='X') From 9a4dc4180335e447132d43584f5dbdf36f6e95d5 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Fri, 27 Jul 2018 10:38:38 -0400 Subject: [PATCH 28/39] Reduce / remove comments --- pandas/tests/test_common.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 31ea436ab158e..5816410a1e68d 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -221,8 +221,7 @@ def test_standardize_mapping(): Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) @pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) def test_compression_size(obj, method, compression_only): - # Tests that compression is occurring by comparing to the bytes on disk of - # the uncompressed file. + with tm.ensure_clean() as filename: getattr(obj, method)(filename, compression=compression_only) compressed = os.path.getsize(filename) @@ -264,11 +263,10 @@ def test_compression_size_fh(obj, method, compression_only): def test_dataframe_compression_defaults_to_infer( write_method, write_kwargs, read_method, compression_only): # Test that DataFrame.to_* methods default to inferring compression from - # paths. https://github.com/pandas-dev/pandas/pull/22011 + # paths. GH 22004 input = DataFrame([[1.0, 0, -4.4], [3.4, 5, 2.4]], columns=['X', 'Y', 'Z']) extension = _compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: - # assumes that compression='infer' is the default getattr(input, write_method)(path, **write_kwargs) output = read_method(path, compression=compression_only) tm.assert_frame_equal(output, input) @@ -284,11 +282,10 @@ def test_series_compression_defaults_to_infer( write_method, write_kwargs, read_method, read_kwargs, compression_only): # Test that Series.to_* methods default to inferring compression from - # paths. https://github.com/pandas-dev/pandas/pull/22011 + # paths. GH 22004 input = Series([0, 5, -2, 10], name='X') extension = _compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: - # assumes that compression='infer' is the default getattr(input, write_method)(path, **write_kwargs) output = read_method(path, compression=compression_only, **read_kwargs) tm.assert_series_equal(output, input, check_names=False) @@ -296,8 +293,7 @@ def test_series_compression_defaults_to_infer( def test_compression_warning(compression_only): # Assert that passing a file object to to_csv while explicitly specifying a - # compression protocol triggers a RuntimeWarning, as per - # https://github.com/pandas-dev/pandas/issues/21227. + # compression protocol triggers a RuntimeWarning, as per GH 21227. # Note that pytest has an issue that causes assert_produces_warning to fail # in Python 2 if the warning has occurred in previous tests # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this From 1ba8f3a281a71db33d8baa6a9a4c9fd7e40c625a Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 30 Jul 2018 11:45:42 -0400 Subject: [PATCH 29/39] DOC: versionchanged & tweaks --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/frame.py | 2 ++ pandas/core/generic.py | 5 ++++- pandas/core/series.py | 8 +++++--- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9aabe28bfe10b..213a4e91176c5 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -177,7 +177,7 @@ Other Enhancements - :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`) - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) -- :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename (:issue:`15008`). +- :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). - :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 56c5430ffccce..1ec203272bee4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1755,6 +1755,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, If 'infer' and `path_or_buf` is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip' or '.xz' (otherwise no compression). + .. versionchanged:: 0.24.0 + 'infer' option added and set to default line_terminator : string, default ``'\n'`` The newline character or character sequence to use in the output file diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 91a007b5778e1..5f41dc8eef789 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2000,11 +2000,14 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.19.0 - compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, + default 'infer' A string representing the compression to use in the output file, only used when the first argument is a filename. .. versionadded:: 0.21.0 + .. versionchanged:: 0.24.0 + 'infer' option added and set to default index : boolean, default True Whether to include the index values in the JSON string. Not diff --git a/pandas/core/series.py b/pandas/core/series.py index 74941874712f8..5ecacb744edcb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3795,10 +3795,12 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', encoding : string, optional a string representing the encoding to use if the contents are non-ascii, for python versions prior to 3 - compression : string, optional + compression : None or string, default 'infer' A string representing the compression to use in the output file. - Allowed values are 'gzip', 'bz2', 'zip', 'xz'. This input is only - used when the first argument is a filename. Defaults to 'infer'. + Allowed values are None, 'gzip', 'bz2', 'zip', 'xz', and 'infer'. + This input is only used when the first argument is a filename. + .. versionchanged:: 0.24.0 + 'infer' option added and set to default date_format: string, default None Format string for datetime objects. decimal: string, default '.' From 24e051e4d147d341de86e1919c74960a1b50253f Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 30 Jul 2018 11:58:21 -0400 Subject: [PATCH 30/39] Update doc/source/io.rst as needed --- doc/source/io.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/io.rst b/doc/source/io.rst index 9fe578524c8e0..d90df18f763b6 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -298,6 +298,7 @@ compression : {``'infer'``, ``'gzip'``, ``'bz2'``, ``'zip'``, ``'xz'``, ``None`` Set to ``None`` for no decompression. .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression. + .. versionchanged:: 0.24.0 'infer' option added and set to default. thousands : str, default ``None`` Thousands separator. From 387d1d29f2833e236e0d8c3e3167c94614676973 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 30 Jul 2018 12:22:16 -0400 Subject: [PATCH 31/39] Move tests from tests/test_common.py to tests/io/test_common.py --- pandas/tests/io/test_common.py | 154 ++++++++++++++++++++++++++------- pandas/tests/test_common.py | 105 +--------------------- 2 files changed, 126 insertions(+), 133 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 5c9739be73393..5568f008b180a 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -2,18 +2,18 @@ Tests for the pandas.io.common functionalities """ import mmap -import pytest import os -from os.path import isabs +import pytest import pandas as pd -import pandas.util.testing as tm +import pandas.io.common as cmn import pandas.util._test_decorators as td - -from pandas.io import common -from pandas.compat import is_platform_windows, StringIO, FileNotFoundError - -from pandas import read_csv, concat +import pandas.util.testing as tm +from pandas.compat import ( + is_platform_windows, + StringIO, + FileNotFoundError, +) class CustomFSPath(object): @@ -55,24 +55,24 @@ class TestCommonIOCapabilities(object): def test_expand_user(self): filename = '~/sometest' - expanded_name = common._expand_user(filename) + expanded_name = cmn._expand_user(filename) assert expanded_name != filename - assert isabs(expanded_name) + assert os.path.isabs(expanded_name) assert os.path.expanduser(filename) == expanded_name def test_expand_user_normal_path(self): filename = '/somefolder/sometest' - expanded_name = common._expand_user(filename) + expanded_name = cmn._expand_user(filename) assert expanded_name == filename assert os.path.expanduser(filename) == expanded_name @td.skip_if_no('pathlib') def test_stringify_path_pathlib(self): - rel_path = common._stringify_path(Path('.')) + rel_path = cmn._stringify_path(Path('.')) assert rel_path == '.' - redundant_path = common._stringify_path(Path('foo//bar')) + redundant_path = cmn._stringify_path(Path('foo//bar')) assert redundant_path == os.path.join('foo', 'bar') @td.skip_if_no('py.path') @@ -80,11 +80,11 @@ def test_stringify_path_localpath(self): path = os.path.join('foo', 'bar') abs_path = os.path.abspath(path) lpath = LocalPath(path) - assert common._stringify_path(lpath) == abs_path + assert cmn._stringify_path(lpath) == abs_path def test_stringify_path_fspath(self): p = CustomFSPath('foo/bar.csv') - result = common._stringify_path(p) + result = cmn._stringify_path(p) assert result == 'foo/bar.csv' @pytest.mark.parametrize('extension,expected', [ @@ -97,36 +97,36 @@ def test_stringify_path_fspath(self): @pytest.mark.parametrize('path_type', path_types) def test_infer_compression_from_path(self, extension, expected, path_type): path = path_type('foo/bar.csv' + extension) - compression = common._infer_compression(path, compression='infer') + compression = cmn._infer_compression(path, compression='infer') assert compression == expected def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' - filepath_or_buffer, _, _, should_close = common.get_filepath_or_buffer( + filepath_or_buffer, _, _, should_close = cmn.get_filepath_or_buffer( filename) assert filepath_or_buffer != filename - assert isabs(filepath_or_buffer) + assert os.path.isabs(filepath_or_buffer) assert os.path.expanduser(filename) == filepath_or_buffer assert not should_close def test_get_filepath_or_buffer_with_buffer(self): input_buffer = StringIO() - filepath_or_buffer, _, _, should_close = common.get_filepath_or_buffer( + filepath_or_buffer, _, _, should_close = cmn.get_filepath_or_buffer( input_buffer) assert filepath_or_buffer == input_buffer assert not should_close def test_iterator(self): - reader = read_csv(StringIO(self.data1), chunksize=1) - result = concat(reader, ignore_index=True) - expected = read_csv(StringIO(self.data1)) + reader = pd.read_csv(StringIO(self.data1), chunksize=1) + result = pd.concat(reader, ignore_index=True) + expected = pd.read_csv(StringIO(self.data1)) tm.assert_frame_equal(result, expected) # GH12153 - it = read_csv(StringIO(self.data1), chunksize=1) + it = pd.read_csv(StringIO(self.data1), chunksize=1) first = next(it) tm.assert_frame_equal(first, expected.iloc[[0]]) - tm.assert_frame_equal(concat(it), expected.iloc[1:]) + tm.assert_frame_equal(pd.concat(it), expected.iloc[1:]) @pytest.mark.parametrize('reader, module, error_class, fn_ext', [ (pd.read_csv, 'os', FileNotFoundError, 'csv'), @@ -246,18 +246,18 @@ def test_constructor_bad_file(self, mmap_file): msg = "[Errno 22]" err = mmap.error - tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file) + tm.assert_raises_regex(err, msg, cmn.MMapWrapper, non_file) target = open(mmap_file, 'r') target.close() msg = "I/O operation on closed file" tm.assert_raises_regex( - ValueError, msg, common.MMapWrapper, target) + ValueError, msg, cmn.MMapWrapper, target) def test_get_attr(self, mmap_file): with open(mmap_file, 'r') as target: - wrapper = common.MMapWrapper(target) + wrapper = cmn.MMapWrapper(target) attrs = dir(wrapper.mmap) attrs = [attr for attr in attrs @@ -271,7 +271,7 @@ def test_get_attr(self, mmap_file): def test_next(self, mmap_file): with open(mmap_file, 'r') as target: - wrapper = common.MMapWrapper(target) + wrapper = cmn.MMapWrapper(target) lines = target.readlines() for line in lines: @@ -285,4 +285,100 @@ def test_unknown_engine(self): df = tm.makeDataFrame() df.to_csv(path) with tm.assert_raises_regex(ValueError, 'Unknown engine'): - read_csv(path, engine='pyt') + pd.read_csv(path, engine='pyt') + + +@pytest.mark.parametrize('obj', [ + pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + columns=['X', 'Y', 'Z']), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) +@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) +def test_compression_size(obj, method, compression_only): + + with tm.ensure_clean() as path: + getattr(obj, method)(path, compression=compression_only) + compressed = os.path.getsize(path) + getattr(obj, method)(path, compression=None) + uncompressed = os.path.getsize(path) + assert uncompressed > compressed + + +@pytest.mark.parametrize('obj', [ + pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + columns=['X', 'Y', 'Z']), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) +@pytest.mark.parametrize('method', ['to_csv', 'to_json']) +def test_compression_size_fh(obj, method, compression_only): + + with tm.ensure_clean() as path: + f, handles = cmn._get_handle(path, 'w', compression=compression_only) + with f: + getattr(obj, method)(f) + assert not f.closed + assert f.closed + compressed = os.path.getsize(path) + with tm.ensure_clean() as path: + f, handles = cmn._get_handle(path, 'w', compression=None) + with f: + getattr(obj, method)(f) + assert not f.closed + assert f.closed + uncompressed = os.path.getsize(path) + assert uncompressed > compressed + + +@pytest.mark.parametrize('write_method, write_kwargs, read_method', [ + ('to_csv', {'index': False}, pd.read_csv), + ('to_json', {}, pd.read_json), + ('to_pickle', {}, pd.read_pickle), +]) +def test_dataframe_compression_defaults_to_infer( + write_method, write_kwargs, read_method, compression_only): + # Test that DataFrame.to_* methods default to inferring compression from + # paths. GH 22004 + input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=['X', 'Y', 'Z']) + extension = cmn._compression_to_extension[compression_only] + with tm.ensure_clean('compressed' + extension) as path: + getattr(input, write_method)(path, **write_kwargs) + output = read_method(path, compression=compression_only) + tm.assert_frame_equal(output, input) + + +@pytest.mark.parametrize('write_method,write_kwargs,read_method,read_kwargs', [ + ('to_csv', {'index': False, 'header': True}, + pd.read_csv, {'squeeze': True}), + ('to_json', {}, pd.read_json, {'typ': 'series'}), + ('to_pickle', {}, pd.read_pickle, {}), +]) +def test_series_compression_defaults_to_infer( + write_method, write_kwargs, read_method, read_kwargs, + compression_only): + # Test that Series.to_* methods default to inferring compression from + # paths. GH 22004 + input = pd.Series([0, 5, -2, 10], name='X') + extension = cmn._compression_to_extension[compression_only] + with tm.ensure_clean('compressed' + extension) as path: + getattr(input, write_method)(path, **write_kwargs) + output = read_method(path, compression=compression_only, **read_kwargs) + tm.assert_series_equal(output, input, check_names=False) + + +def test_compression_warning(compression_only): + # Assert that passing a file object to to_csv while explicitly specifying a + # compression protocol triggers a RuntimeWarning, as per GH 21227. + # Note that pytest has an issue that causes assert_produces_warning to fail + # in Python 2 if the warning has occurred in previous tests + # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this + # test fail in just Python 2 builds, it likely indicates that other tests + # are producing RuntimeWarnings, thereby triggering the pytest bug. + df = pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + columns=['X', 'Y', 'Z']) + with tm.ensure_clean() as path: + f, handles = cmn._get_handle(path, 'w', compression=compression_only) + with tm.assert_produces_warning(RuntimeWarning, + check_stacklevel=False): + with f: + df.to_csv(f, compression=compression_only) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 96c61ee1a7d80..5aaa2aed30dbc 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,21 +1,14 @@ # -*- coding: utf-8 -*- import pytest -import os import collections from functools import partial import numpy as np -import pandas -from pandas import Series, DataFrame, Timestamp +from pandas import Series, Timestamp import pandas.core.common as com from pandas.core import ops -from pandas.io.common import ( - _compression_to_extension, - _get_handle, -) -import pandas.util.testing as tm def test_get_callable_name(): @@ -115,99 +108,3 @@ def test_standardize_mapping(): dd = collections.defaultdict(list) assert isinstance(com.standardize_mapping(dd), partial) - - -@pytest.mark.parametrize('obj', [ - DataFrame(100 * [[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - columns=['X', 'Y', 'Z']), - Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) -@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) -def test_compression_size(obj, method, compression_only): - - with tm.ensure_clean() as filename: - getattr(obj, method)(filename, compression=compression_only) - compressed = os.path.getsize(filename) - getattr(obj, method)(filename, compression=None) - uncompressed = os.path.getsize(filename) - assert uncompressed > compressed - - -@pytest.mark.parametrize('obj', [ - DataFrame(100 * [[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - columns=['X', 'Y', 'Z']), - Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) -@pytest.mark.parametrize('method', ['to_csv', 'to_json']) -def test_compression_size_fh(obj, method, compression_only): - - with tm.ensure_clean() as filename: - f, _handles = _get_handle(filename, 'w', compression=compression_only) - with f: - getattr(obj, method)(f) - assert not f.closed - assert f.closed - compressed = os.path.getsize(filename) - with tm.ensure_clean() as filename: - f, _handles = _get_handle(filename, 'w', compression=None) - with f: - getattr(obj, method)(f) - assert not f.closed - assert f.closed - uncompressed = os.path.getsize(filename) - assert uncompressed > compressed - - -@pytest.mark.parametrize('write_method, write_kwargs, read_method', [ - ('to_csv', {'index': False}, pandas.read_csv), - ('to_json', {}, pandas.read_json), - ('to_pickle', {}, pandas.read_pickle), -]) -def test_dataframe_compression_defaults_to_infer( - write_method, write_kwargs, read_method, compression_only): - # Test that DataFrame.to_* methods default to inferring compression from - # paths. GH 22004 - input = DataFrame([[1.0, 0, -4.4], [3.4, 5, 2.4]], columns=['X', 'Y', 'Z']) - extension = _compression_to_extension[compression_only] - with tm.ensure_clean('compressed' + extension) as path: - getattr(input, write_method)(path, **write_kwargs) - output = read_method(path, compression=compression_only) - tm.assert_frame_equal(output, input) - - -@pytest.mark.parametrize('write_method,write_kwargs,read_method,read_kwargs', [ - ('to_csv', {'index': False, 'header': True}, - pandas.read_csv, {'squeeze': True}), - ('to_json', {}, pandas.read_json, {'typ': 'series'}), - ('to_pickle', {}, pandas.read_pickle, {}), -]) -def test_series_compression_defaults_to_infer( - write_method, write_kwargs, read_method, read_kwargs, - compression_only): - # Test that Series.to_* methods default to inferring compression from - # paths. GH 22004 - input = Series([0, 5, -2, 10], name='X') - extension = _compression_to_extension[compression_only] - with tm.ensure_clean('compressed' + extension) as path: - getattr(input, write_method)(path, **write_kwargs) - output = read_method(path, compression=compression_only, **read_kwargs) - tm.assert_series_equal(output, input, check_names=False) - - -def test_compression_warning(compression_only): - # Assert that passing a file object to to_csv while explicitly specifying a - # compression protocol triggers a RuntimeWarning, as per GH 21227. - # Note that pytest has an issue that causes assert_produces_warning to fail - # in Python 2 if the warning has occurred in previous tests - # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this - # test fail in just Python 2 builds, it likely indicates that other tests - # are producing RuntimeWarnings, thereby triggering the pytest bug. - df = DataFrame(100 * [[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - columns=['X', 'Y', 'Z']) - with tm.ensure_clean() as filename: - f, _handles = _get_handle(filename, 'w', compression=compression_only) - with tm.assert_produces_warning(RuntimeWarning, - check_stacklevel=False): - with f: - df.to_csv(f, compression=compression_only) From 12f14e286179ce7262f0ea2d09a5b345a46b35e8 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 30 Jul 2018 13:13:09 -0400 Subject: [PATCH 32/39] Organize / simplify pandas/tests/test_common.py imports --- pandas/tests/test_common.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 5aaa2aed30dbc..1267868b5ad03 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,14 +1,16 @@ # -*- coding: utf-8 -*- -import pytest import collections from functools import partial import numpy as np +import pytest from pandas import Series, Timestamp -import pandas.core.common as com -from pandas.core import ops +from pandas.core import ( + _maybe_match_name, + common as com, +) def test_get_callable_name(): @@ -74,7 +76,7 @@ def test_random_state(): (Series([1], name='x'), [2], 'x'), ([1], Series([2], name='y'), 'y')]) def test_maybe_match_name(left, right, expected): - assert ops._maybe_match_name(left, right) == expected + assert _maybe_match_name(left, right) == expected def test_dict_compat(): From 6db23d9b832b2e4b38acd890366f22e6ab50a81e Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 30 Jul 2018 13:14:37 -0400 Subject: [PATCH 33/39] Ignore flake error needed for test --- pandas/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 1267868b5ad03..890e622b6450f 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -19,7 +19,7 @@ def test_get_callable_name(): def fn(x): return x - lambda_ = lambda x: x + lambda_ = lambda x: x # noqa: E731 part1 = partial(fn) part2 = partial(part1) From e3a0f56ccbf9cb513b613de8d1d9c51793b066ff Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Mon, 30 Jul 2018 14:13:14 -0400 Subject: [PATCH 34/39] fixup! Organize / simplify pandas/tests/test_common.py imports --- pandas/tests/test_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 890e622b6450f..868525e818b62 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -8,8 +8,8 @@ from pandas import Series, Timestamp from pandas.core import ( - _maybe_match_name, common as com, + ops, ) @@ -76,7 +76,7 @@ def test_random_state(): (Series([1], name='x'), [2], 'x'), ([1], Series([2], name='y'), 'y')]) def test_maybe_match_name(left, right, expected): - assert _maybe_match_name(left, right) == expected + assert ops._maybe_match_name(left, right) == expected def test_dict_compat(): From af8c137f2d9fd643b4581e2c470d4388843bae13 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Tue, 31 Jul 2018 10:03:19 -0400 Subject: [PATCH 35/39] change import: cmn to icom --- pandas/tests/io/test_common.py | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 5568f008b180a..dc4dd3e1031b8 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -6,7 +6,7 @@ import pytest import pandas as pd -import pandas.io.common as cmn +import pandas.io.common as icom import pandas.util._test_decorators as td import pandas.util.testing as tm from pandas.compat import ( @@ -55,7 +55,7 @@ class TestCommonIOCapabilities(object): def test_expand_user(self): filename = '~/sometest' - expanded_name = cmn._expand_user(filename) + expanded_name = icom._expand_user(filename) assert expanded_name != filename assert os.path.isabs(expanded_name) @@ -63,16 +63,16 @@ def test_expand_user(self): def test_expand_user_normal_path(self): filename = '/somefolder/sometest' - expanded_name = cmn._expand_user(filename) + expanded_name = icom._expand_user(filename) assert expanded_name == filename assert os.path.expanduser(filename) == expanded_name @td.skip_if_no('pathlib') def test_stringify_path_pathlib(self): - rel_path = cmn._stringify_path(Path('.')) + rel_path = icom._stringify_path(Path('.')) assert rel_path == '.' - redundant_path = cmn._stringify_path(Path('foo//bar')) + redundant_path = icom._stringify_path(Path('foo//bar')) assert redundant_path == os.path.join('foo', 'bar') @td.skip_if_no('py.path') @@ -80,11 +80,11 @@ def test_stringify_path_localpath(self): path = os.path.join('foo', 'bar') abs_path = os.path.abspath(path) lpath = LocalPath(path) - assert cmn._stringify_path(lpath) == abs_path + assert icom._stringify_path(lpath) == abs_path def test_stringify_path_fspath(self): p = CustomFSPath('foo/bar.csv') - result = cmn._stringify_path(p) + result = icom._stringify_path(p) assert result == 'foo/bar.csv' @pytest.mark.parametrize('extension,expected', [ @@ -97,12 +97,12 @@ def test_stringify_path_fspath(self): @pytest.mark.parametrize('path_type', path_types) def test_infer_compression_from_path(self, extension, expected, path_type): path = path_type('foo/bar.csv' + extension) - compression = cmn._infer_compression(path, compression='infer') + compression = icom._infer_compression(path, compression='infer') assert compression == expected def test_get_filepath_or_buffer_with_path(self): filename = '~/sometest' - filepath_or_buffer, _, _, should_close = cmn.get_filepath_or_buffer( + filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer( filename) assert filepath_or_buffer != filename assert os.path.isabs(filepath_or_buffer) @@ -111,7 +111,7 @@ def test_get_filepath_or_buffer_with_path(self): def test_get_filepath_or_buffer_with_buffer(self): input_buffer = StringIO() - filepath_or_buffer, _, _, should_close = cmn.get_filepath_or_buffer( + filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer( input_buffer) assert filepath_or_buffer == input_buffer assert not should_close @@ -246,18 +246,18 @@ def test_constructor_bad_file(self, mmap_file): msg = "[Errno 22]" err = mmap.error - tm.assert_raises_regex(err, msg, cmn.MMapWrapper, non_file) + tm.assert_raises_regex(err, msg, icom.MMapWrapper, non_file) target = open(mmap_file, 'r') target.close() msg = "I/O operation on closed file" tm.assert_raises_regex( - ValueError, msg, cmn.MMapWrapper, target) + ValueError, msg, icom.MMapWrapper, target) def test_get_attr(self, mmap_file): with open(mmap_file, 'r') as target: - wrapper = cmn.MMapWrapper(target) + wrapper = icom.MMapWrapper(target) attrs = dir(wrapper.mmap) attrs = [attr for attr in attrs @@ -271,7 +271,7 @@ def test_get_attr(self, mmap_file): def test_next(self, mmap_file): with open(mmap_file, 'r') as target: - wrapper = cmn.MMapWrapper(target) + wrapper = icom.MMapWrapper(target) lines = target.readlines() for line in lines: @@ -313,14 +313,14 @@ def test_compression_size(obj, method, compression_only): def test_compression_size_fh(obj, method, compression_only): with tm.ensure_clean() as path: - f, handles = cmn._get_handle(path, 'w', compression=compression_only) + f, handles = icom._get_handle(path, 'w', compression=compression_only) with f: getattr(obj, method)(f) assert not f.closed assert f.closed compressed = os.path.getsize(path) with tm.ensure_clean() as path: - f, handles = cmn._get_handle(path, 'w', compression=None) + f, handles = icom._get_handle(path, 'w', compression=None) with f: getattr(obj, method)(f) assert not f.closed @@ -339,7 +339,7 @@ def test_dataframe_compression_defaults_to_infer( # Test that DataFrame.to_* methods default to inferring compression from # paths. GH 22004 input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=['X', 'Y', 'Z']) - extension = cmn._compression_to_extension[compression_only] + extension = icom._compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: getattr(input, write_method)(path, **write_kwargs) output = read_method(path, compression=compression_only) @@ -358,7 +358,7 @@ def test_series_compression_defaults_to_infer( # Test that Series.to_* methods default to inferring compression from # paths. GH 22004 input = pd.Series([0, 5, -2, 10], name='X') - extension = cmn._compression_to_extension[compression_only] + extension = icom._compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: getattr(input, write_method)(path, **write_kwargs) output = read_method(path, compression=compression_only, **read_kwargs) @@ -377,7 +377,7 @@ def test_compression_warning(compression_only): [12.32112, 123123.2, 321321.2]], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: - f, handles = cmn._get_handle(path, 'w', compression=compression_only) + f, handles = icom._get_handle(path, 'w', compression=compression_only) with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): with f: From f8829a60a24a17451134014da27bd5f1f93a3fea Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Tue, 31 Jul 2018 10:07:51 -0400 Subject: [PATCH 36/39] Blank lines after versionchanged --- pandas/core/frame.py | 1 + pandas/core/series.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1ec203272bee4..9a8ad7176aecd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1757,6 +1757,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, (otherwise no compression). .. versionchanged:: 0.24.0 'infer' option added and set to default + line_terminator : string, default ``'\n'`` The newline character or character sequence to use in the output file diff --git a/pandas/core/series.py b/pandas/core/series.py index 5ecacb744edcb..bc00a1717d931 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3801,6 +3801,7 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', This input is only used when the first argument is a filename. .. versionchanged:: 0.24.0 'infer' option added and set to default + date_format: string, default None Format string for datetime objects. decimal: string, default '.' From 918c0f8003151033a1a0b174e8ab59e0cf6611a4 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Tue, 31 Jul 2018 10:22:29 -0400 Subject: [PATCH 37/39] Move compression tests to new file tests/io/test_compression.py --- pandas/tests/io/test_common.py | 99 +-------------------------- pandas/tests/io/test_compression.py | 101 ++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 97 deletions(-) create mode 100644 pandas/tests/io/test_compression.py diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index dc4dd3e1031b8..ceaac9818354a 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -1,8 +1,9 @@ """ - Tests for the pandas.io.common functionalities +Tests for the pandas.io.common functionalities """ import mmap import os + import pytest import pandas as pd @@ -286,99 +287,3 @@ def test_unknown_engine(self): df.to_csv(path) with tm.assert_raises_regex(ValueError, 'Unknown engine'): pd.read_csv(path, engine='pyt') - - -@pytest.mark.parametrize('obj', [ - pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - columns=['X', 'Y', 'Z']), - pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) -@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) -def test_compression_size(obj, method, compression_only): - - with tm.ensure_clean() as path: - getattr(obj, method)(path, compression=compression_only) - compressed = os.path.getsize(path) - getattr(obj, method)(path, compression=None) - uncompressed = os.path.getsize(path) - assert uncompressed > compressed - - -@pytest.mark.parametrize('obj', [ - pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - columns=['X', 'Y', 'Z']), - pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) -@pytest.mark.parametrize('method', ['to_csv', 'to_json']) -def test_compression_size_fh(obj, method, compression_only): - - with tm.ensure_clean() as path: - f, handles = icom._get_handle(path, 'w', compression=compression_only) - with f: - getattr(obj, method)(f) - assert not f.closed - assert f.closed - compressed = os.path.getsize(path) - with tm.ensure_clean() as path: - f, handles = icom._get_handle(path, 'w', compression=None) - with f: - getattr(obj, method)(f) - assert not f.closed - assert f.closed - uncompressed = os.path.getsize(path) - assert uncompressed > compressed - - -@pytest.mark.parametrize('write_method, write_kwargs, read_method', [ - ('to_csv', {'index': False}, pd.read_csv), - ('to_json', {}, pd.read_json), - ('to_pickle', {}, pd.read_pickle), -]) -def test_dataframe_compression_defaults_to_infer( - write_method, write_kwargs, read_method, compression_only): - # Test that DataFrame.to_* methods default to inferring compression from - # paths. GH 22004 - input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=['X', 'Y', 'Z']) - extension = icom._compression_to_extension[compression_only] - with tm.ensure_clean('compressed' + extension) as path: - getattr(input, write_method)(path, **write_kwargs) - output = read_method(path, compression=compression_only) - tm.assert_frame_equal(output, input) - - -@pytest.mark.parametrize('write_method,write_kwargs,read_method,read_kwargs', [ - ('to_csv', {'index': False, 'header': True}, - pd.read_csv, {'squeeze': True}), - ('to_json', {}, pd.read_json, {'typ': 'series'}), - ('to_pickle', {}, pd.read_pickle, {}), -]) -def test_series_compression_defaults_to_infer( - write_method, write_kwargs, read_method, read_kwargs, - compression_only): - # Test that Series.to_* methods default to inferring compression from - # paths. GH 22004 - input = pd.Series([0, 5, -2, 10], name='X') - extension = icom._compression_to_extension[compression_only] - with tm.ensure_clean('compressed' + extension) as path: - getattr(input, write_method)(path, **write_kwargs) - output = read_method(path, compression=compression_only, **read_kwargs) - tm.assert_series_equal(output, input, check_names=False) - - -def test_compression_warning(compression_only): - # Assert that passing a file object to to_csv while explicitly specifying a - # compression protocol triggers a RuntimeWarning, as per GH 21227. - # Note that pytest has an issue that causes assert_produces_warning to fail - # in Python 2 if the warning has occurred in previous tests - # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this - # test fail in just Python 2 builds, it likely indicates that other tests - # are producing RuntimeWarnings, thereby triggering the pytest bug. - df = pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - columns=['X', 'Y', 'Z']) - with tm.ensure_clean() as path: - f, handles = icom._get_handle(path, 'w', compression=compression_only) - with tm.assert_produces_warning(RuntimeWarning, - check_stacklevel=False): - with f: - df.to_csv(f, compression=compression_only) diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py new file mode 100644 index 0000000000000..a3639b3e236df --- /dev/null +++ b/pandas/tests/io/test_compression.py @@ -0,0 +1,101 @@ +import os + +import pytest + +import pandas as pd +import pandas.io.common as icom +import pandas.util.testing as tm + + +@pytest.mark.parametrize('obj', [ + pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + columns=['X', 'Y', 'Z']), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) +@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv']) +def test_compression_size(obj, method, compression_only): + with tm.ensure_clean() as path: + getattr(obj, method)(path, compression=compression_only) + compressed_size = os.path.getsize(path) + getattr(obj, method)(path, compression=None) + uncompressed_size = os.path.getsize(path) + assert uncompressed_size > compressed_size + + +@pytest.mark.parametrize('obj', [ + pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + columns=['X', 'Y', 'Z']), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name='X')]) +@pytest.mark.parametrize('method', ['to_csv', 'to_json']) +def test_compression_size_fh(obj, method, compression_only): + with tm.ensure_clean() as path: + f, handles = icom._get_handle(path, 'w', compression=compression_only) + with f: + getattr(obj, method)(f) + assert not f.closed + assert f.closed + compressed_size = os.path.getsize(path) + with tm.ensure_clean() as path: + f, handles = icom._get_handle(path, 'w', compression=None) + with f: + getattr(obj, method)(f) + assert not f.closed + assert f.closed + uncompressed_size = os.path.getsize(path) + assert uncompressed_size > compressed_size + + +@pytest.mark.parametrize('write_method, write_kwargs, read_method', [ + ('to_csv', {'index': False}, pd.read_csv), + ('to_json', {}, pd.read_json), + ('to_pickle', {}, pd.read_pickle), +]) +def test_dataframe_compression_defaults_to_infer( + write_method, write_kwargs, read_method, compression_only): + # Test that DataFrame.to_* methods default to inferring compression from + # paths. GH 22004 + input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=['X', 'Y', 'Z']) + extension = icom._compression_to_extension[compression_only] + with tm.ensure_clean('compressed' + extension) as path: + getattr(input, write_method)(path, **write_kwargs) + output = read_method(path, compression=compression_only) + tm.assert_frame_equal(output, input) + + +@pytest.mark.parametrize('write_method,write_kwargs,read_method,read_kwargs', [ + ('to_csv', {'index': False, 'header': True}, + pd.read_csv, {'squeeze': True}), + ('to_json', {}, pd.read_json, {'typ': 'series'}), + ('to_pickle', {}, pd.read_pickle, {}), +]) +def test_series_compression_defaults_to_infer( + write_method, write_kwargs, read_method, read_kwargs, + compression_only): + # Test that Series.to_* methods default to inferring compression from + # paths. GH 22004 + input = pd.Series([0, 5, -2, 10], name='X') + extension = icom._compression_to_extension[compression_only] + with tm.ensure_clean('compressed' + extension) as path: + getattr(input, write_method)(path, **write_kwargs) + output = read_method(path, compression=compression_only, **read_kwargs) + tm.assert_series_equal(output, input, check_names=False) + + +def test_compression_warning(compression_only): + # Assert that passing a file object to to_csv while explicitly specifying a + # compression protocol triggers a RuntimeWarning, as per GH 21227. + # Note that pytest has an issue that causes assert_produces_warning to fail + # in Python 2 if the warning has occurred in previous tests + # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this + # test fail in just Python 2 builds, it likely indicates that other tests + # are producing RuntimeWarnings, thereby triggering the pytest bug. + df = pd.DataFrame(100 * [[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + columns=['X', 'Y', 'Z']) + with tm.ensure_clean() as path: + f, handles = icom._get_handle(path, 'w', compression=compression_only) + with tm.assert_produces_warning(RuntimeWarning, + check_stacklevel=False): + with f: + df.to_csv(f, compression=compression_only) From eadf68e4e23c4d6715347d3096e4b50e99baaa0e Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Tue, 31 Jul 2018 19:38:36 -0400 Subject: [PATCH 38/39] blank lines before .. versionchanged Refs https://github.com/pandas-dev/pandas/pull/22011#discussion_r206693049 Blanks are needed before but not after or in between. --- doc/source/io.rst | 1 - pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 -- pandas/core/series.py | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index d90df18f763b6..c2c8c1c17700f 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -299,7 +299,6 @@ compression : {``'infer'``, ``'gzip'``, ``'bz2'``, ``'zip'``, ``'xz'``, ``None`` .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression. .. versionchanged:: 0.24.0 'infer' option added and set to default. - thousands : str, default ``None`` Thousands separator. decimal : str, default ``'.'`` diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a8ad7176aecd..ebd35cb1a6a1a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1755,9 +1755,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, If 'infer' and `path_or_buf` is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip' or '.xz' (otherwise no compression). + .. versionchanged:: 0.24.0 'infer' option added and set to default - line_terminator : string, default ``'\n'`` The newline character or character sequence to use in the output file diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5f41dc8eef789..f62605c342702 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1999,7 +1999,6 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, like. .. versionadded:: 0.19.0 - compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' A string representing the compression to use in the output file, @@ -2008,7 +2007,6 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.21.0 .. versionchanged:: 0.24.0 'infer' option added and set to default - index : boolean, default True Whether to include the index values in the JSON string. Not including the index (``index=False``) is only supported when diff --git a/pandas/core/series.py b/pandas/core/series.py index bc00a1717d931..21dea15772cc0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3799,9 +3799,9 @@ def to_csv(self, path=None, index=True, sep=",", na_rep='', A string representing the compression to use in the output file. Allowed values are None, 'gzip', 'bz2', 'zip', 'xz', and 'infer'. This input is only used when the first argument is a filename. + .. versionchanged:: 0.24.0 'infer' option added and set to default - date_format: string, default None Format string for datetime objects. decimal: string, default '.' From cf5b62e31bacbc4b27744f45a60a10c6a7902396 Mon Sep 17 00:00:00 2001 From: Daniel Himmelstein Date: Wed, 1 Aug 2018 10:41:19 -0400 Subject: [PATCH 39/39] Remove comments and space after GH --- pandas/io/formats/csvs.py | 6 +++--- pandas/tests/io/test_compression.py | 8 +++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 3c236fa10a5d6..6fabd2573a7b4 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -134,7 +134,7 @@ def save(self): """ Create the writer & save """ - # GH 21227 internal compression is not used when file-like passed. + # GH21227 internal compression is not used when file-like passed. if self.compression and hasattr(self.path_or_buf, 'write'): msg = ("compression has no effect when passing file-like " "object as input.") @@ -148,7 +148,7 @@ def save(self): if is_zip: # zipfile doesn't support writing string to archive. uses string # buffer to receive csv writing and dump into zip compression - # file handle. GH 21241, 21118 + # file handle. GH21241, GH21118 f = StringIO() close = False elif hasattr(self.path_or_buf, 'write'): @@ -176,7 +176,7 @@ def save(self): finally: if is_zip: - # GH 17778 handles zip compression separately. + # GH17778 handles zip compression separately. buf = f.getvalue() if hasattr(self.path_or_buf, 'write'): self.path_or_buf.write(buf) diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index a3639b3e236df..76788ced44e84 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -53,8 +53,7 @@ def test_compression_size_fh(obj, method, compression_only): ]) def test_dataframe_compression_defaults_to_infer( write_method, write_kwargs, read_method, compression_only): - # Test that DataFrame.to_* methods default to inferring compression from - # paths. GH 22004 + # GH22004 input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=['X', 'Y', 'Z']) extension = icom._compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: @@ -72,8 +71,7 @@ def test_dataframe_compression_defaults_to_infer( def test_series_compression_defaults_to_infer( write_method, write_kwargs, read_method, read_kwargs, compression_only): - # Test that Series.to_* methods default to inferring compression from - # paths. GH 22004 + # GH22004 input = pd.Series([0, 5, -2, 10], name='X') extension = icom._compression_to_extension[compression_only] with tm.ensure_clean('compressed' + extension) as path: @@ -84,7 +82,7 @@ def test_series_compression_defaults_to_infer( def test_compression_warning(compression_only): # Assert that passing a file object to to_csv while explicitly specifying a - # compression protocol triggers a RuntimeWarning, as per GH 21227. + # compression protocol triggers a RuntimeWarning, as per GH21227. # Note that pytest has an issue that causes assert_produces_warning to fail # in Python 2 if the warning has occurred in previous tests # (see https://git.io/fNEBm & https://git.io/fNEBC). Hence, should this