diff --git a/doc/source/io.rst b/doc/source/io.rst index 0aa4ea72e3b13..08d00138b7cd8 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -343,6 +343,10 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None`` override values, a ParserWarning will be issued. See :class:`python:csv.Dialect` documentation for more details. tupleize_cols : boolean, default ``False`` + .. deprecated:: 0.21.0 + + This argument will be removed and will always convert to MultiIndex + Leave a list of tuples on columns as is (default is to convert to a MultiIndex on the columns). diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 595fab9e18ea4..f86847d8b8274 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -717,6 +717,7 @@ Deprecations - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). - :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`) +- :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`) - The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`) - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index c8b2987d591ef..4b6c358ea7dcd 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -260,8 +260,11 @@ override values, a ParserWarning will be issued. See csv.Dialect documentation for more details. tupleize_cols : boolean, default False + .. deprecated:: 0.21.0 + This argument will be removed and will always convert to MultiIndex + Leave a list of tuples on columns as is (default is to convert to - a Multi Index on the columns) + a MultiIndex on the columns) error_bad_lines : boolean, default True Lines with too many fields (e.g. a csv line with too many commas) will by default cause an exception to be raised, and no DataFrame will be returned. @@ -510,6 +513,7 @@ def _read(filepath_or_buffer, kwds): 'buffer_lines': None, 'error_bad_lines': True, 'warn_bad_lines': True, + 'tupleize_cols': False, 'float_precision': None } @@ -529,6 +533,7 @@ def _read(filepath_or_buffer, kwds): 'buffer_lines', 'compact_ints', 'use_unsigned', + 'tupleize_cols', } @@ -962,6 +967,9 @@ def _clean_options(self, options, engine): if arg == 'as_recarray': msg += ' Please call pd.to_csv(...).to_records() instead.' + elif arg == 'tupleize_cols': + msg += (' Column tuples will then ' + 'always be converted to MultiIndex') if result.get(arg, parser_default) != parser_default: depr_warning += msg + '\n\n' diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 6a4b1686a31e2..a61a157181253 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -555,8 +555,12 @@ def _make_frame(names=None): # tupleize_cols=True and index=False df = _make_frame(True) df.to_csv(path, tupleize_cols=True, index=False) - result = read_csv( - path, header=0, tupleize_cols=True, index_col=None) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = read_csv(path, header=0, + tupleize_cols=True, + index_col=None) result.columns = df.columns assert_frame_equal(df, result) @@ -576,8 +580,11 @@ def _make_frame(names=None): # column & index are multi-index (compatibility) df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) df.to_csv(path, tupleize_cols=True) - result = read_csv(path, header=0, index_col=[ - 0, 1], tupleize_cols=True) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = read_csv(path, header=0, index_col=[0, 1], + tupleize_cols=True) result.columns = df.columns assert_frame_equal(df, result) diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 50ae4dae541ac..ff3beb70b774f 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -105,13 +105,13 @@ def test_header_multi_index(self): R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2 """ - df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1]) tm.assert_frame_equal(df, expected) # skipping lines in the header - df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1]) tm.assert_frame_equal(df, expected) # INVALID OPTIONS @@ -121,25 +121,22 @@ def test_header_multi_index(self): FutureWarning, check_stacklevel=False): pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], as_recarray=True, - tupleize_cols=False) + index_col=[0, 1], as_recarray=True) # names pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], names=['foo', 'bar'], - tupleize_cols=False) + index_col=[0, 1], names=['foo', 'bar']) # usecols pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], usecols=['foo', 'bar'], - tupleize_cols=False) + index_col=[0, 1], usecols=['foo', 'bar']) # non-numeric index_col pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=['foo', 'bar'], tupleize_cols=False) + index_col=['foo', 'bar']) def test_header_multiindex_common_format(self): diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index c3dc91b3f188c..267b589ee91f4 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -232,9 +232,7 @@ def test_none_delimiter(self): result = self.read_csv(StringIO(data), header=0, sep=None, error_bad_lines=False, - warn_bad_lines=True, - engine='python', - tupleize_cols=True) + warn_bad_lines=True) tm.assert_frame_equal(result, expected) def test_skipfooter_bad_row(self): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 5d248f2fef59c..2e73ce6aa19b0 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -127,32 +127,25 @@ def read(self): class TestDeprecatedFeatures(object): - def test_deprecated_args(self): - data = '1,2,3' - - # deprecated arguments with non-default values - deprecated = { - 'as_recarray': True, - 'buffer_lines': True, - 'compact_ints': True, - 'use_unsigned': True, - 'skip_footer': 1, - } - - engines = 'c', 'python' - - for engine in engines: - for arg, non_default_val in deprecated.items(): - if engine == 'c' and arg == 'skip_footer': - # unsupported --> exception is raised - continue - - if engine == 'python' and arg == 'buffer_lines': - # unsupported --> exception is raised - continue - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - kwargs = {arg: non_default_val} - read_csv(StringIO(data), engine=engine, - **kwargs) + @pytest.mark.parametrize("engine", ["c", "python"]) + @pytest.mark.parametrize("kwargs", [{"as_recarray": True}, + {"buffer_lines": True}, + {"compact_ints": True}, + {"use_unsigned": True}, + {"tupleize_cols": True}, + {"skip_footer": 1}]) + def test_deprecated_args(self, engine, kwargs): + data = "1,2,3" + arg, _ = list(kwargs.items())[0] + + if engine == "c" and arg == "skip_footer": + # unsupported --> exception is raised + return + + if engine == "python" and arg == "buffer_lines": + # unsupported --> exception is raised + return + + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False): + read_csv(StringIO(data), engine=engine, **kwargs)