From 188c72cca7cd4c9679e42dba352dee8a0c329941 Mon Sep 17 00:00:00 2001 From: dahlbaek <30782351+dahlbaek@users.noreply.github.com> Date: Thu, 2 Aug 2018 12:49:06 +0200 Subject: [PATCH] DEPR: pd.read_table (#21954) * DEPR: pd.read_table - pd.read_table is deprecated and replaced by pd.read_csv. - add whatsnew note - change tests to test for warning messages - change DataFrame.from_csv to use pandas.read_csv instead of pandas.read_table - Change pandas.read_clipboard to use pandas.read_csv instead of pandas.read_table * Add sep note to whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/frame.py | 10 +++---- pandas/io/clipboards.py | 8 +++--- pandas/io/parsers.py | 31 ++++++++++++++++++---- pandas/tests/io/conftest.py | 4 +-- pandas/tests/io/formats/test_format.py | 6 ++--- pandas/tests/io/parser/test_network.py | 4 +-- pandas/tests/io/parser/test_parsers.py | 12 ++++++--- pandas/tests/io/parser/test_unsupported.py | 24 ++++++++--------- pandas/tests/io/test_common.py | 24 +++++++++++++++-- pandas/tests/test_multilevel.py | 8 +++--- 11 files changed, 89 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3cff1522274ef4..8a92db4c66fb59 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -478,6 +478,7 @@ Deprecations - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) - :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) +- :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`) .. _whatsnew_0240.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ebd35cb1a6a1ae..bbe84110fd0190 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1594,11 +1594,11 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, "for from_csv when changing your function calls", FutureWarning, stacklevel=2) - from pandas.io.parsers import read_table - return read_table(path, header=header, sep=sep, - parse_dates=parse_dates, index_col=index_col, - encoding=encoding, tupleize_cols=tupleize_cols, - infer_datetime_format=infer_datetime_format) + from pandas.io.parsers import read_csv + return read_csv(path, header=header, sep=sep, + parse_dates=parse_dates, index_col=index_col, + encoding=encoding, tupleize_cols=tupleize_cols, + infer_datetime_format=infer_datetime_format) def to_sparse(self, fill_value=None, kind='block'): """ diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 141a5d2389db50..0d564069c681fa 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -9,7 +9,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover r""" - Read text from clipboard and pass to read_table. See read_table for the + Read text from clipboard and pass to read_csv. See read_csv for the full argument list Parameters @@ -31,7 +31,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover 'reading from clipboard only supports utf-8 encoding') from pandas.io.clipboard import clipboard_get - from pandas.io.parsers import read_table + from pandas.io.parsers import read_csv text = clipboard_get() # try to decode (if needed on PY3) @@ -51,7 +51,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover # that this came from excel and set 'sep' accordingly lines = text[:10000].split('\n')[:-1][:10] - # Need to remove leading white space, since read_table + # Need to remove leading white space, since read_csv # accepts: # a b # 0 1 2 @@ -80,7 +80,7 @@ def read_clipboard(sep=r'\s+', **kwargs): # pragma: no cover if kwargs.get('engine') == 'python' and PY2: text = text.encode('utf-8') - return read_table(StringIO(text), sep=sep, **kwargs) + return read_csv(StringIO(text), sep=sep, **kwargs) def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 88358ff392cb65..4b3fa08e5e4af8 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -331,6 +331,10 @@ """ % (_parser_params % (_sep_doc.format(default="','"), _engine_doc)) _read_table_doc = """ + +.. deprecated:: 0.24.0 + Use :func:`pandas.read_csv` instead, passing ``sep='\t'`` if necessary. + Read general delimited file into DataFrame %s @@ -540,9 +544,13 @@ def _read(filepath_or_buffer, kwds): } -def _make_parser_function(name, sep=','): +def _make_parser_function(name, default_sep=','): - default_sep = sep + # prepare read_table deprecation + if name == "read_table": + sep = False + else: + sep = default_sep def parser_f(filepath_or_buffer, sep=sep, @@ -611,11 +619,24 @@ def parser_f(filepath_or_buffer, memory_map=False, float_precision=None): + # deprecate read_table GH21948 + if name == "read_table": + if sep is False and delimiter is None: + warnings.warn("read_table is deprecated, use read_csv " + "instead, passing sep='\\t'.", + FutureWarning, stacklevel=2) + else: + warnings.warn("read_table is deprecated, use read_csv " + "instead.", + FutureWarning, stacklevel=2) + if sep is False: + sep = default_sep + # Alias sep -> delimiter. if delimiter is None: delimiter = sep - if delim_whitespace and delimiter is not default_sep: + if delim_whitespace and delimiter != default_sep: raise ValueError("Specified a delimiter with both sep and" " delim_whitespace=True; you can only" " specify one.") @@ -687,10 +708,10 @@ def parser_f(filepath_or_buffer, return parser_f -read_csv = _make_parser_function('read_csv', sep=',') +read_csv = _make_parser_function('read_csv', default_sep=',') read_csv = Appender(_read_csv_doc)(read_csv) -read_table = _make_parser_function('read_table', sep='\t') +read_table = _make_parser_function('read_table', default_sep='\t') read_table = Appender(_read_table_doc)(read_table) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 7623587803b418..b0cdbe2b5bedbc 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -1,5 +1,5 @@ import pytest -from pandas.io.parsers import read_table +from pandas.io.parsers import read_csv @pytest.fixture @@ -17,7 +17,7 @@ def jsonl_file(datapath): @pytest.fixture def salaries_table(datapath): """DataFrame with the salaries dataset""" - return read_table(datapath('io', 'parser', 'data', 'salaries.csv')) + return read_csv(datapath('io', 'parser', 'data', 'salaries.csv'), sep='\t') @pytest.fixture diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 191e3f37f1c37a..3218742aa76361 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -21,7 +21,7 @@ import numpy as np import pandas as pd from pandas import (DataFrame, Series, Index, Timestamp, MultiIndex, - date_range, NaT, read_table) + date_range, NaT, read_csv) from pandas.compat import (range, zip, lrange, StringIO, PY3, u, lzip, is_platform_windows, is_platform_32bit) @@ -1225,8 +1225,8 @@ def test_to_string(self): lines = result.split('\n') header = lines[0].strip().split() joined = '\n'.join(re.sub(r'\s+', ' ', x).strip() for x in lines[1:]) - recons = read_table(StringIO(joined), names=header, - header=None, sep=' ') + recons = read_csv(StringIO(joined), names=header, + header=None, sep=' ') tm.assert_series_equal(recons['B'], biggie['B']) assert recons['A'].count() == biggie['A'].count() assert (np.abs(recons['A'].dropna() - diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index f6a31008bca5c3..a7cc3ad989ea16 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -12,7 +12,7 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td from pandas import DataFrame -from pandas.io.parsers import read_csv, read_table +from pandas.io.parsers import read_csv from pandas.compat import BytesIO, StringIO @@ -44,7 +44,7 @@ def check_compressed_urls(salaries_table, compression, extension, mode, if mode != 'explicit': compression = mode - url_table = read_table(url, compression=compression, engine=engine) + url_table = read_csv(url, sep='\t', compression=compression, engine=engine) tm.assert_frame_equal(url_table, salaries_table) diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index b6f13039641a27..8535a51657abf5 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -70,7 +70,9 @@ def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine kwds['low_memory'] = self.low_memory - return read_table(*args, **kwds) + with tm.assert_produces_warning(FutureWarning): + df = read_table(*args, **kwds) + return df class TestCParserLowMemory(BaseParser, CParserTests): @@ -88,7 +90,9 @@ def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine kwds['low_memory'] = True - return read_table(*args, **kwds) + with tm.assert_produces_warning(FutureWarning): + df = read_table(*args, **kwds) + return df class TestPythonParser(BaseParser, PythonParserTests): @@ -103,7 +107,9 @@ def read_csv(self, *args, **kwds): def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine - return read_table(*args, **kwds) + with tm.assert_produces_warning(FutureWarning): + df = read_table(*args, **kwds) + return df class TestUnsortedUsecols(object): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 3117f6fae55da0..1c64c1516077d9 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -14,7 +14,7 @@ from pandas.compat import StringIO from pandas.errors import ParserError -from pandas.io.parsers import read_csv, read_table +from pandas.io.parsers import read_csv import pytest @@ -43,24 +43,24 @@ def test_c_engine(self): # specify C engine with unsupported options (raise) with tm.assert_raises_regex(ValueError, msg): - read_table(StringIO(data), engine='c', - sep=None, delim_whitespace=False) + read_csv(StringIO(data), engine='c', + sep=None, delim_whitespace=False) with tm.assert_raises_regex(ValueError, msg): - read_table(StringIO(data), engine='c', sep=r'\s') + read_csv(StringIO(data), engine='c', sep=r'\s') with tm.assert_raises_regex(ValueError, msg): - read_table(StringIO(data), engine='c', quotechar=chr(128)) + read_csv(StringIO(data), engine='c', sep='\t', quotechar=chr(128)) with tm.assert_raises_regex(ValueError, msg): - read_table(StringIO(data), engine='c', skipfooter=1) + read_csv(StringIO(data), engine='c', skipfooter=1) # specify C-unsupported options without python-unsupported options with tm.assert_produces_warning(parsers.ParserWarning): - read_table(StringIO(data), sep=None, delim_whitespace=False) + read_csv(StringIO(data), sep=None, delim_whitespace=False) with tm.assert_produces_warning(parsers.ParserWarning): - read_table(StringIO(data), quotechar=chr(128)) + read_csv(StringIO(data), sep=r'\s') with tm.assert_produces_warning(parsers.ParserWarning): - read_table(StringIO(data), sep=r'\s') + read_csv(StringIO(data), sep='\t', quotechar=chr(128)) with tm.assert_produces_warning(parsers.ParserWarning): - read_table(StringIO(data), skipfooter=1) + read_csv(StringIO(data), skipfooter=1) text = """ A B C D E one two three four @@ -70,9 +70,9 @@ def test_c_engine(self): msg = 'Error tokenizing data' with tm.assert_raises_regex(ParserError, msg): - read_table(StringIO(text), sep='\\s+') + read_csv(StringIO(text), sep='\\s+') with tm.assert_raises_regex(ParserError, msg): - read_table(StringIO(text), engine='c', sep='\\s+') + read_csv(StringIO(text), engine='c', sep='\\s+') msg = "Only length-1 thousands markers supported" data = """A|B|C diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ceaac9818354a9..991b8ee5087609 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -131,7 +131,6 @@ def test_iterator(self): @pytest.mark.parametrize('reader, module, error_class, fn_ext', [ (pd.read_csv, 'os', FileNotFoundError, 'csv'), - (pd.read_table, 'os', FileNotFoundError, 'csv'), (pd.read_fwf, 'os', FileNotFoundError, 'txt'), (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'), (pd.read_feather, 'feather', Exception, 'feather'), @@ -149,9 +148,14 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): with pytest.raises(error_class): reader(path) + def test_read_non_existant_read_table(self): + path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv') + with pytest.raises(FileNotFoundError): + with tm.assert_produces_warning(FutureWarning): + pd.read_table(path) + @pytest.mark.parametrize('reader, module, path', [ (pd.read_csv, 'os', ('io', 'data', 'iris.csv')), - (pd.read_table, 'os', ('io', 'data', 'iris.csv')), (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')), (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')), (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')), @@ -170,6 +174,22 @@ def test_read_fspath_all(self, reader, module, path, datapath): mypath = CustomFSPath(path) result = reader(mypath) expected = reader(path) + + if path.endswith('.pickle'): + # categorical + tm.assert_categorical_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + def test_read_fspath_all_read_table(self, datapath): + path = datapath('io', 'data', 'iris.csv') + + mypath = CustomFSPath(path) + with tm.assert_produces_warning(FutureWarning): + result = pd.read_table(mypath) + with tm.assert_produces_warning(FutureWarning): + expected = pd.read_table(path) + if path.endswith('.pickle'): # categorical tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 3caee2b44c5798..dcfeab55f94fc6 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -10,7 +10,7 @@ import numpy as np from pandas.core.index import Index, MultiIndex -from pandas import Panel, DataFrame, Series, notna, isna, Timestamp +from pandas import Panel, DataFrame, Series, notna, isna, Timestamp, read_csv from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas.core.common as com @@ -512,14 +512,13 @@ def f(x): pytest.raises(com.SettingWithCopyError, f, result) def test_xs_level_multiple(self): - from pandas import read_table text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" - df = read_table(StringIO(text), sep=r'\s+', engine='python') + df = read_csv(StringIO(text), sep=r'\s+', engine='python') result = df.xs(('a', 4), level=['one', 'four']) expected = df.xs('a').xs(4, level='four') @@ -547,14 +546,13 @@ def f(x): tm.assert_frame_equal(rs, xp) def test_xs_level0(self): - from pandas import read_table text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" - df = read_table(StringIO(text), sep=r'\s+', engine='python') + df = read_csv(StringIO(text), sep=r'\s+', engine='python') result = df.xs('a', level=0) expected = df.xs('a')