From 043b60920ede5a2589ed29685ba7247fbbc4ea5f Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Fri, 29 May 2020 02:00:44 +0100 Subject: [PATCH] DEPR: deprecate non keyword arguments to read_excel (#34418) --- doc/source/whatsnew/v1.1.0.rst | 4 + pandas/io/excel/_base.py | 3 +- pandas/tests/io/excel/test_odf.py | 2 +- pandas/tests/io/excel/test_readers.py | 265 ++++++++++++++++---------- pandas/tests/io/excel/test_writers.py | 131 +++++++------ pandas/tests/io/excel/test_xlrd.py | 4 +- 6 files changed, 246 insertions(+), 163 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index bc0214a03ec55..88bf0e005a221 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -686,6 +686,10 @@ Deprecations version 1.1. All other arguments should be given as keyword arguments (:issue:`27573`). +- Passing any arguments but the first 2 to :func:`read_excel` as + positional arguments is deprecated since version 1.1. All other + arguments should be given as keyword arguments (:issue:`27573`). + - :func:`pandas.api.types.is_categorical` is deprecated and will be removed in a future version; use `:func:pandas.api.types.is_categorical_dtype` instead (:issue:`33385`) - :meth:`Index.get_value` is deprecated and will be removed in a future version (:issue:`19728`) - :meth:`Series.dt.week` and `Series.dt.weekofyear` are deprecated and will be removed in a future version, use :meth:`Series.dt.isocalendar().week` instead (:issue:`33595`) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d1139f640cef4..d55bdffe689f2 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -8,7 +8,7 @@ from pandas._libs.parsers import STR_NA_VALUES from pandas.errors import EmptyDataError -from pandas.util._decorators import Appender +from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments from pandas.core.dtypes.common import is_bool, is_float, is_integer, is_list_like @@ -266,6 +266,7 @@ ) +@deprecate_nonkeyword_arguments(allowed_args=2, version="2.0") @Appender(_read_excel_doc) def read_excel( io, diff --git a/pandas/tests/io/excel/test_odf.py b/pandas/tests/io/excel/test_odf.py index b9a3e8b59b133..d6c6399f082c6 100644 --- a/pandas/tests/io/excel/test_odf.py +++ b/pandas/tests/io/excel/test_odf.py @@ -33,7 +33,7 @@ def test_read_writer_table(): columns=["Column 1", "Unnamed: 2", "Column 3"], ) - result = pd.read_excel("writertable.odt", "Table1", index_col=0) + result = pd.read_excel("writertable.odt", sheet_name="Table1", index_col=0) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 99447c03e89af..fd1533dd65dc4 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1,4 +1,3 @@ -from collections import OrderedDict import contextlib from datetime import datetime, time from functools import partial @@ -136,13 +135,19 @@ def test_usecols_int(self, read_ext, df_ref): msg = "Passing an integer for `usecols`" with pytest.raises(ValueError, match=msg): with ignore_xlrd_time_clock_warning(): - pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols=3) + pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=3 + ) # usecols as int with pytest.raises(ValueError, match=msg): with ignore_xlrd_time_clock_warning(): pd.read_excel( - "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols=3 + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols=3, ) def test_usecols_list(self, read_ext, df_ref): @@ -151,10 +156,14 @@ def test_usecols_list(self, read_ext, df_ref): df_ref = df_ref.reindex(columns=["B", "C"]) df1 = pd.read_excel( - "test1" + read_ext, "Sheet1", index_col=0, usecols=[0, 2, 3] + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=[0, 2, 3] ) df2 = pd.read_excel( - "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols=[0, 2, 3] + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols=[0, 2, 3], ) # TODO add index to xls file) @@ -166,9 +175,15 @@ def test_usecols_str(self, read_ext, df_ref): pytest.xfail("Sheets containing datetimes not supported by pyxlsb") df1 = df_ref.reindex(columns=["A", "B", "C"]) - df2 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols="A:D") + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A:D" + ) df3 = pd.read_excel( - "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols="A:D" + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A:D", ) # TODO add index to xls, read xls ignores index name ? @@ -176,18 +191,30 @@ def test_usecols_str(self, read_ext, df_ref): tm.assert_frame_equal(df3, df1, check_names=False) df1 = df_ref.reindex(columns=["B", "C"]) - df2 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols="A,C,D") + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,C,D" + ) df3 = pd.read_excel( - "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols="A,C,D" + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A,C,D", ) # TODO add index to xls file tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) df1 = df_ref.reindex(columns=["B", "C"]) - df2 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols="A,C:D") + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,C:D" + ) df3 = pd.read_excel( - "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols="A,C:D" + "test1" + read_ext, + sheet_name="Sheet2", + skiprows=[1], + index_col=0, + usecols="A,C:D", ) tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) @@ -201,7 +228,7 @@ def test_usecols_diff_positional_int_columns_order(self, read_ext, usecols, df_r expected = df_ref[["A", "C"]] result = pd.read_excel( - "test1" + read_ext, "Sheet1", index_col=0, usecols=usecols + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=usecols ) tm.assert_frame_equal(result, expected, check_names=False) @@ -210,7 +237,7 @@ def test_usecols_diff_positional_str_columns_order(self, read_ext, usecols, df_r expected = df_ref[["B", "D"]] expected.index = range(len(expected)) - result = pd.read_excel("test1" + read_ext, "Sheet1", usecols=usecols) + result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", usecols=usecols) tm.assert_frame_equal(result, expected, check_names=False) def test_read_excel_without_slicing(self, read_ext, df_ref): @@ -218,7 +245,7 @@ def test_read_excel_without_slicing(self, read_ext, df_ref): pytest.xfail("Sheets containing datetimes not supported by pyxlsb") expected = df_ref - result = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0) + result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0) tm.assert_frame_equal(result, expected, check_names=False) def test_usecols_excel_range_str(self, read_ext, df_ref): @@ -227,7 +254,7 @@ def test_usecols_excel_range_str(self, read_ext, df_ref): expected = df_ref[["C", "D"]] result = pd.read_excel( - "test1" + read_ext, "Sheet1", index_col=0, usecols="A,D:E" + "test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,D:E" ) tm.assert_frame_equal(result, expected, check_names=False) @@ -235,19 +262,24 @@ def test_usecols_excel_range_str_invalid(self, read_ext): msg = "Invalid column name: E1" with pytest.raises(ValueError, match=msg): - pd.read_excel("test1" + read_ext, "Sheet1", usecols="D:E1") + pd.read_excel("test1" + read_ext, sheet_name="Sheet1", usecols="D:E1") def test_index_col_label_error(self, read_ext): msg = "list indices must be integers.*, not str" with pytest.raises(TypeError, match=msg): pd.read_excel( - "test1" + read_ext, "Sheet1", index_col=["A"], usecols=["A", "C"] + "test1" + read_ext, + sheet_name="Sheet1", + index_col=["A"], + usecols=["A", "C"], ) def test_index_col_empty(self, read_ext): # see gh-9208 - result = pd.read_excel("test1" + read_ext, "Sheet3", index_col=["A", "B", "C"]) + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet3", index_col=["A", "B", "C"] + ) expected = DataFrame( columns=["D", "E", "F"], index=MultiIndex(levels=[[]] * 3, codes=[[]] * 3, names=["A", "B", "C"]), @@ -257,7 +289,9 @@ def test_index_col_empty(self, read_ext): @pytest.mark.parametrize("index_col", [None, 2]) def test_index_col_with_unnamed(self, read_ext, index_col): # see gh-18792 - result = pd.read_excel("test1" + read_ext, "Sheet4", index_col=index_col) + result = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet4", index_col=index_col + ) expected = DataFrame( [["i1", "a", "x"], ["i2", "b", "y"]], columns=["Unnamed: 0", "col1", "col2"] ) @@ -286,7 +320,7 @@ def test_usecols_wrong_type(self, read_ext): def test_excel_stop_iterator(self, read_ext): - parsed = pd.read_excel("test2" + read_ext, "Sheet1") + parsed = pd.read_excel("test2" + read_ext, sheet_name="Sheet1") expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"]) tm.assert_frame_equal(parsed, expected) @@ -294,7 +328,7 @@ def test_excel_cell_error_na(self, read_ext): if pd.read_excel.keywords["engine"] == "pyxlsb": pytest.xfail("Sheets containing datetimes not supported by pyxlsb") - parsed = pd.read_excel("test3" + read_ext, "Sheet1") + parsed = pd.read_excel("test3" + read_ext, sheet_name="Sheet1") expected = DataFrame([[np.nan]], columns=["Test"]) tm.assert_frame_equal(parsed, expected) @@ -302,13 +336,17 @@ def test_excel_table(self, read_ext, df_ref): if pd.read_excel.keywords["engine"] == "pyxlsb": pytest.xfail("Sheets containing datetimes not supported by pyxlsb") - df1 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0) - df2 = pd.read_excel("test1" + read_ext, "Sheet2", skiprows=[1], index_col=0) + df1 = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0) + df2 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet2", skiprows=[1], index_col=0 + ) # TODO add index to file tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) - df3 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, skipfooter=1) + df3 = pd.read_excel( + "test1" + read_ext, sheet_name="Sheet1", index_col=0, skipfooter=1 + ) tm.assert_frame_equal(df3, df1.iloc[:-1]) def test_reader_special_dtypes(self, read_ext): @@ -316,50 +354,49 @@ def test_reader_special_dtypes(self, read_ext): pytest.xfail("Sheets containing datetimes not supported by pyxlsb") expected = DataFrame.from_dict( - OrderedDict( - [ - ("IntCol", [1, 2, -3, 4, 0]), - ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), - ("BoolCol", [True, False, True, True, False]), - ("StrCol", [1, 2, 3, 4, 5]), - # GH5394 - this is why convert_float isn't vectorized - ("Str2Col", ["a", 3, "c", "d", "e"]), - ( - "DateCol", - [ - datetime(2013, 10, 30), - datetime(2013, 10, 31), - datetime(1905, 1, 1), - datetime(2013, 12, 14), - datetime(2015, 3, 14), - ], - ), - ] - ) + { + "IntCol": [1, 2, -3, 4, 0], + "FloatCol": [1.25, 2.25, 1.83, 1.92, 0.0000000005], + "BoolCol": [True, False, True, True, False], + "StrCol": [1, 2, 3, 4, 5], + # GH5394 - this is why convert_float isn't vectorized + "Str2Col": ["a", 3, "c", "d", "e"], + "DateCol": [ + datetime(2013, 10, 30), + datetime(2013, 10, 31), + datetime(1905, 1, 1), + datetime(2013, 12, 14), + datetime(2015, 3, 14), + ], + }, ) basename = "test_types" # should read in correctly and infer types - actual = pd.read_excel(basename + read_ext, "Sheet1") + actual = pd.read_excel(basename + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, expected) # if not coercing number, then int comes in as float float_expected = expected.copy() float_expected["IntCol"] = float_expected["IntCol"].astype(float) float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 - actual = pd.read_excel(basename + read_ext, "Sheet1", convert_float=False) + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", convert_float=False + ) tm.assert_frame_equal(actual, float_expected) # check setting Index (assuming xls and xlsx are the same here) for icol, name in enumerate(expected.columns): - actual = pd.read_excel(basename + read_ext, "Sheet1", index_col=icol) + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", index_col=icol + ) exp = expected.set_index(name) tm.assert_frame_equal(actual, exp) # convert_float and converters should be different but both accepted expected["StrCol"] = expected["StrCol"].apply(str) actual = pd.read_excel( - basename + read_ext, "Sheet1", converters={"StrCol": str} + basename + read_ext, sheet_name="Sheet1", converters={"StrCol": str} ) tm.assert_frame_equal(actual, expected) @@ -367,7 +404,7 @@ def test_reader_special_dtypes(self, read_ext): no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) actual = pd.read_excel( basename + read_ext, - "Sheet1", + sheet_name="Sheet1", convert_float=False, converters={"StrCol": str}, ) @@ -379,14 +416,12 @@ def test_reader_converters(self, read_ext): basename = "test_converters" expected = DataFrame.from_dict( - OrderedDict( - [ - ("IntCol", [1, 2, -3, -1000, 0]), - ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]), - ("BoolCol", ["Found", "Found", "Found", "Not found", "Found"]), - ("StrCol", ["1", np.nan, "3", "4", "5"]), - ] - ) + { + "IntCol": [1, 2, -3, -1000, 0], + "FloatCol": [12.5, np.nan, 18.3, 19.2, 0.000000005], + "BoolCol": ["Found", "Found", "Found", "Not found", "Found"], + "StrCol": ["1", np.nan, "3", "4", "5"], + } ) converters = { @@ -398,7 +433,9 @@ def test_reader_converters(self, read_ext): # should read in correctly and set types of single cells (not array # dtypes) - actual = pd.read_excel(basename + read_ext, "Sheet1", converters=converters) + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", converters=converters + ) tm.assert_frame_equal(actual, expected) def test_reader_dtype(self, read_ext): @@ -483,7 +520,7 @@ def test_reader_spaces(self, read_ext): tm.assert_frame_equal(actual, expected) def test_reading_all_sheets(self, read_ext): - # Test reading all sheetnames by setting sheetname to None, + # Test reading all sheet names by setting sheet_name to None, # Ensure a dict is returned. # See PR #9450 basename = "test_multisheet" @@ -496,7 +533,7 @@ def test_reading_all_sheets(self, read_ext): assert expected_keys == list(dfs.keys()) def test_reading_multiple_specific_sheets(self, read_ext): - # Test reading specific sheetnames by specifying a mixed list + # Test reading specific sheet names by specifying a mixed list # of integers and strings, and confirm that duplicated sheet # references (positions/names) are removed properly. # Ensure a dict is returned @@ -510,7 +547,7 @@ def test_reading_multiple_specific_sheets(self, read_ext): assert len(expected_keys) == len(dfs.keys()) def test_reading_all_sheets_with_blank(self, read_ext): - # Test reading all sheetnames by setting sheetname to None, + # Test reading all sheet names by setting sheet_name to None, # In the case where some sheets are blank. # Issue #11711 basename = "blank_with_header" @@ -520,12 +557,12 @@ def test_reading_all_sheets_with_blank(self, read_ext): # GH6403 def test_read_excel_blank(self, read_ext): - actual = pd.read_excel("blank" + read_ext, "Sheet1") + actual = pd.read_excel("blank" + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, DataFrame()) def test_read_excel_blank_with_header(self, read_ext): expected = DataFrame(columns=["col_1", "col_2"]) - actual = pd.read_excel("blank_with_header" + read_ext, "Sheet1") + actual = pd.read_excel("blank_with_header" + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, expected) def test_date_conversion_overflow(self, read_ext): @@ -569,9 +606,9 @@ def test_sheet_name(self, read_ext, df_ref): def test_excel_read_buffer(self, read_ext): pth = "test1" + read_ext - expected = pd.read_excel(pth, "Sheet1", index_col=0) + expected = pd.read_excel(pth, sheet_name="Sheet1", index_col=0) with open(pth, "rb") as f: - actual = pd.read_excel(f, "Sheet1", index_col=0) + actual = pd.read_excel(f, sheet_name="Sheet1", index_col=0) tm.assert_frame_equal(expected, actual) def test_bad_engine_raises(self, read_ext): @@ -626,10 +663,10 @@ def test_read_from_pathlib_path(self, read_ext): from pathlib import Path str_path = "test1" + read_ext - expected = pd.read_excel(str_path, "Sheet1", index_col=0) + expected = pd.read_excel(str_path, sheet_name="Sheet1", index_col=0) path_obj = Path("test1" + read_ext) - actual = pd.read_excel(path_obj, "Sheet1", index_col=0) + actual = pd.read_excel(path_obj, sheet_name="Sheet1", index_col=0) tm.assert_frame_equal(expected, actual) @@ -641,10 +678,10 @@ def test_read_from_py_localpath(self, read_ext): from py.path import local as LocalPath str_path = os.path.join("test1" + read_ext) - expected = pd.read_excel(str_path, "Sheet1", index_col=0) + expected = pd.read_excel(str_path, sheet_name="Sheet1", index_col=0) path_obj = LocalPath().join("test1" + read_ext) - actual = pd.read_excel(path_obj, "Sheet1", index_col=0) + actual = pd.read_excel(path_obj, sheet_name="Sheet1", index_col=0) tm.assert_frame_equal(expected, actual) @@ -654,7 +691,7 @@ def test_close_from_py_localpath(self, read_ext): # GH31467 str_path = os.path.join("test1" + read_ext) with open(str_path, "rb") as f: - x = pd.read_excel(f, "Sheet1", index_col=0) + x = pd.read_excel(f, sheet_name="Sheet1", index_col=0) del x # should not throw an exception because the passed file was closed f.read() @@ -682,10 +719,10 @@ def test_reader_seconds(self, read_ext): } ) - actual = pd.read_excel("times_1900" + read_ext, "Sheet1") + actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, expected) - actual = pd.read_excel("times_1904" + read_ext, "Sheet1") + actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, expected) def test_read_excel_multiindex(self, read_ext): @@ -707,52 +744,66 @@ def test_read_excel_multiindex(self, read_ext): columns=mi, ) - actual = pd.read_excel(mi_file, "mi_column", header=[0, 1], index_col=0) + actual = pd.read_excel( + mi_file, sheet_name="mi_column", header=[0, 1], index_col=0 + ) tm.assert_frame_equal(actual, expected) # "mi_index" sheet expected.index = mi expected.columns = ["a", "b", "c", "d"] - actual = pd.read_excel(mi_file, "mi_index", index_col=[0, 1]) + actual = pd.read_excel(mi_file, sheet_name="mi_index", index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) # "both" sheet expected.columns = mi - actual = pd.read_excel(mi_file, "both", index_col=[0, 1], header=[0, 1]) + actual = pd.read_excel( + mi_file, sheet_name="both", index_col=[0, 1], header=[0, 1] + ) tm.assert_frame_equal(actual, expected, check_names=False) # "mi_index_name" sheet expected.columns = ["a", "b", "c", "d"] expected.index = mi.set_names(["ilvl1", "ilvl2"]) - actual = pd.read_excel(mi_file, "mi_index_name", index_col=[0, 1]) + actual = pd.read_excel(mi_file, sheet_name="mi_index_name", index_col=[0, 1]) tm.assert_frame_equal(actual, expected) # "mi_column_name" sheet expected.index = list(range(4)) expected.columns = mi.set_names(["c1", "c2"]) - actual = pd.read_excel(mi_file, "mi_column_name", header=[0, 1], index_col=0) + actual = pd.read_excel( + mi_file, sheet_name="mi_column_name", header=[0, 1], index_col=0 + ) tm.assert_frame_equal(actual, expected) # see gh-11317 # "name_with_int" sheet expected.columns = mi.set_levels([1, 2], level=1).set_names(["c1", "c2"]) - actual = pd.read_excel(mi_file, "name_with_int", index_col=0, header=[0, 1]) + actual = pd.read_excel( + mi_file, sheet_name="name_with_int", index_col=0, header=[0, 1] + ) tm.assert_frame_equal(actual, expected) # "both_name" sheet expected.columns = mi.set_names(["c1", "c2"]) expected.index = mi.set_names(["ilvl1", "ilvl2"]) - actual = pd.read_excel(mi_file, "both_name", index_col=[0, 1], header=[0, 1]) + actual = pd.read_excel( + mi_file, sheet_name="both_name", index_col=[0, 1], header=[0, 1] + ) tm.assert_frame_equal(actual, expected) # "both_skiprows" sheet actual = pd.read_excel( - mi_file, "both_name_skiprows", index_col=[0, 1], header=[0, 1], skiprows=2 + mi_file, + sheet_name="both_name_skiprows", + index_col=[0, 1], + header=[0, 1], + skiprows=2, ) tm.assert_frame_equal(actual, expected) @@ -761,7 +812,7 @@ def test_read_excel_multiindex_header_only(self, read_ext): # # Don't try to parse a header name if there isn't one. mi_file = "testmultiindex" + read_ext - result = pd.read_excel(mi_file, "index_col_none", header=[0, 1]) + result = pd.read_excel(mi_file, sheet_name="index_col_none", header=[0, 1]) exp_columns = MultiIndex.from_product([("A", "B"), ("key", "val")]) expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns) @@ -799,12 +850,12 @@ def test_excel_old_index_format(self, read_ext): expected = pd.DataFrame(data, index=si, columns=columns) - actual = pd.read_excel(filename, "single_names", index_col=0) + actual = pd.read_excel(filename, sheet_name="single_names", index_col=0) tm.assert_frame_equal(actual, expected) expected.index = mi - actual = pd.read_excel(filename, "multi_names", index_col=[0, 1]) + actual = pd.read_excel(filename, sheet_name="multi_names", index_col=[0, 1]) tm.assert_frame_equal(actual, expected) # The analogous versions of the "names" version data @@ -831,12 +882,12 @@ def test_excel_old_index_format(self, read_ext): expected = pd.DataFrame(data, index=si, columns=columns) - actual = pd.read_excel(filename, "single_no_names", index_col=0) + actual = pd.read_excel(filename, sheet_name="single_no_names", index_col=0) tm.assert_frame_equal(actual, expected) expected.index = mi - actual = pd.read_excel(filename, "multi_no_names", index_col=[0, 1]) + actual = pd.read_excel(filename, sheet_name="multi_no_names", index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) def test_read_excel_bool_header_arg(self, read_ext): @@ -858,7 +909,7 @@ def test_read_excel_skiprows_list(self, read_ext): pytest.xfail("Sheets containing datetimes not supported by pyxlsb") actual = pd.read_excel( - "testskiprows" + read_ext, "skiprows_list", skiprows=[0, 2] + "testskiprows" + read_ext, sheet_name="skiprows_list", skiprows=[0, 2] ) expected = DataFrame( [ @@ -872,7 +923,9 @@ def test_read_excel_skiprows_list(self, read_ext): tm.assert_frame_equal(actual, expected) actual = pd.read_excel( - "testskiprows" + read_ext, "skiprows_list", skiprows=np.array([0, 2]) + "testskiprows" + read_ext, + sheet_name="skiprows_list", + skiprows=np.array([0, 2]), ) tm.assert_frame_equal(actual, expected) @@ -902,19 +955,25 @@ def test_read_excel_squeeze(self, read_ext): # GH 12157 f = "test_squeeze" + read_ext - actual = pd.read_excel(f, "two_columns", index_col=0, squeeze=True) + actual = pd.read_excel(f, sheet_name="two_columns", index_col=0, squeeze=True) expected = pd.Series([2, 3, 4], [4, 5, 6], name="b") expected.index.name = "a" tm.assert_series_equal(actual, expected) - actual = pd.read_excel(f, "two_columns", squeeze=True) + actual = pd.read_excel(f, sheet_name="two_columns", squeeze=True) expected = pd.DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]}) tm.assert_frame_equal(actual, expected) - actual = pd.read_excel(f, "one_column", squeeze=True) + actual = pd.read_excel(f, sheet_name="one_column", squeeze=True) expected = pd.Series([1, 2, 3], name="a") tm.assert_series_equal(actual, expected) + def test_deprecated_kwargs(self, read_ext): + with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False): + pd.read_excel("test1" + read_ext, "Sheet1", 0) + + pd.read_excel("test1" + read_ext) + class TestExcelFileRead: @pytest.fixture(autouse=True) @@ -929,7 +988,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): def test_excel_passes_na(self, read_ext): with pd.ExcelFile("test4" + read_ext) as excel: parsed = pd.read_excel( - excel, "Sheet1", keep_default_na=False, na_values=["apple"] + excel, sheet_name="Sheet1", keep_default_na=False, na_values=["apple"] ) expected = DataFrame( [["NA"], [1], ["NA"], [np.nan], ["rabbit"]], columns=["Test"] @@ -938,7 +997,7 @@ def test_excel_passes_na(self, read_ext): with pd.ExcelFile("test4" + read_ext) as excel: parsed = pd.read_excel( - excel, "Sheet1", keep_default_na=True, na_values=["apple"] + excel, sheet_name="Sheet1", keep_default_na=True, na_values=["apple"] ) expected = DataFrame( [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"] @@ -948,7 +1007,7 @@ def test_excel_passes_na(self, read_ext): # 13967 with pd.ExcelFile("test5" + read_ext) as excel: parsed = pd.read_excel( - excel, "Sheet1", keep_default_na=False, na_values=["apple"] + excel, sheet_name="Sheet1", keep_default_na=False, na_values=["apple"] ) expected = DataFrame( [["1.#QNAN"], [1], ["nan"], [np.nan], ["rabbit"]], columns=["Test"] @@ -957,7 +1016,7 @@ def test_excel_passes_na(self, read_ext): with pd.ExcelFile("test5" + read_ext) as excel: parsed = pd.read_excel( - excel, "Sheet1", keep_default_na=True, na_values=["apple"] + excel, sheet_name="Sheet1", keep_default_na=True, na_values=["apple"] ) expected = DataFrame( [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"] @@ -974,7 +1033,11 @@ def test_excel_passes_na_filter(self, read_ext, na_filter): with pd.ExcelFile("test5" + read_ext) as excel: parsed = pd.read_excel( - excel, "Sheet1", keep_default_na=True, na_values=["apple"], **kwargs + excel, + sheet_name="Sheet1", + keep_default_na=True, + na_values=["apple"], + **kwargs, ) if na_filter is False: @@ -1003,8 +1066,8 @@ def test_excel_table_sheet_by_index(self, read_ext, df_ref): pytest.xfail("Sheets containing datetimes not supported by pyxlsb") with pd.ExcelFile("test1" + read_ext) as excel: - df1 = pd.read_excel(excel, 0, index_col=0) - df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) + df1 = pd.read_excel(excel, sheet_name=0, index_col=0) + df2 = pd.read_excel(excel, sheet_name=1, skiprows=[1], index_col=0) tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) @@ -1015,7 +1078,7 @@ def test_excel_table_sheet_by_index(self, read_ext, df_ref): tm.assert_frame_equal(df2, df_ref, check_names=False) with pd.ExcelFile("test1" + read_ext) as excel: - df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) + df3 = pd.read_excel(excel, sheet_name=0, index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1]) with pd.ExcelFile("test1" + read_ext) as excel: @@ -1043,11 +1106,11 @@ def test_sheet_name(self, read_ext, df_ref): def test_excel_read_buffer(self, engine, read_ext): pth = "test1" + read_ext - expected = pd.read_excel(pth, "Sheet1", index_col=0, engine=engine) + expected = pd.read_excel(pth, sheet_name="Sheet1", index_col=0, engine=engine) with open(pth, "rb") as f: with pd.ExcelFile(f) as xls: - actual = pd.read_excel(xls, "Sheet1", index_col=0) + actual = pd.read_excel(xls, sheet_name="Sheet1", index_col=0) tm.assert_frame_equal(expected, actual) @@ -1055,7 +1118,7 @@ def test_reader_closes_file(self, engine, read_ext): with open("test1" + read_ext, "rb") as f: with pd.ExcelFile(f) as xlsx: # parses okay - pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine) + pd.read_excel(xlsx, sheet_name="Sheet1", index_col=0, engine=engine) assert f.closed diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ff366036714e4..b909f1f3a958f 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -64,7 +64,9 @@ def test_read_one_empty_col_no_header(self, ext, header, expected): with tm.ensure_clean(ext) as path: df.to_excel(path, filename, index=False, header=False) - result = pd.read_excel(path, filename, usecols=[0], header=header) + result = pd.read_excel( + path, sheet_name=filename, usecols=[0], header=header + ) tm.assert_frame_equal(result, expected) @@ -80,7 +82,9 @@ def test_read_one_empty_col_with_header(self, ext, header, expected): with tm.ensure_clean(ext) as path: df.to_excel(path, "with_header", index=False, header=True) - result = pd.read_excel(path, filename, usecols=[0], header=header) + result = pd.read_excel( + path, sheet_name=filename, usecols=[0], header=header + ) tm.assert_frame_equal(result, expected) @@ -100,10 +104,13 @@ def test_set_column_names_in_parameter(self, ext): with ExcelFile(pth) as reader: xlsdf_no_head = pd.read_excel( - reader, "Data_no_head", header=None, names=["A", "B"] + reader, sheet_name="Data_no_head", header=None, names=["A", "B"] ) xlsdf_with_head = pd.read_excel( - reader, "Data_with_head", index_col=None, names=["A", "B"] + reader, + sheet_name="Data_with_head", + index_col=None, + names=["A", "B"], ) tm.assert_frame_equal(xlsdf_no_head, refdf) @@ -326,13 +333,13 @@ def test_excel_sheet_by_name_raise(self, path): gt.to_excel(path) xl = ExcelFile(path) - df = pd.read_excel(xl, 0, index_col=0) + df = pd.read_excel(xl, sheet_name=0, index_col=0) tm.assert_frame_equal(gt, df) msg = "No sheet named <'0'>" with pytest.raises(xlrd.XLRDError, match=msg): - pd.read_excel(xl, "0") + pd.read_excel(xl, sheet_name="0") def test_excel_writer_context_manager(self, frame, path): with ExcelWriter(path) as writer: @@ -342,8 +349,8 @@ def test_excel_writer_context_manager(self, frame, path): frame2.to_excel(writer, "Data2") with ExcelFile(path) as reader: - found_df = pd.read_excel(reader, "Data1", index_col=0) - found_df2 = pd.read_excel(reader, "Data2", index_col=0) + found_df = pd.read_excel(reader, sheet_name="Data1", index_col=0) + found_df2 = pd.read_excel(reader, sheet_name="Data2", index_col=0) tm.assert_frame_equal(found_df, frame) tm.assert_frame_equal(found_df2, frame2) @@ -359,25 +366,27 @@ def test_roundtrip(self, frame, path): # test roundtrip frame.to_excel(path, "test1") - recons = pd.read_excel(path, "test1", index_col=0) + recons = pd.read_excel(path, sheet_name="test1", index_col=0) tm.assert_frame_equal(frame, recons) frame.to_excel(path, "test1", index=False) - recons = pd.read_excel(path, "test1", index_col=None) + recons = pd.read_excel(path, sheet_name="test1", index_col=None) recons.index = frame.index tm.assert_frame_equal(frame, recons) frame.to_excel(path, "test1", na_rep="NA") - recons = pd.read_excel(path, "test1", index_col=0, na_values=["NA"]) + recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["NA"]) tm.assert_frame_equal(frame, recons) # GH 3611 frame.to_excel(path, "test1", na_rep="88") - recons = pd.read_excel(path, "test1", index_col=0, na_values=["88"]) + recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["88"]) tm.assert_frame_equal(frame, recons) frame.to_excel(path, "test1", na_rep="88") - recons = pd.read_excel(path, "test1", index_col=0, na_values=[88, 88.0]) + recons = pd.read_excel( + path, sheet_name="test1", index_col=0, na_values=[88, 88.0] + ) tm.assert_frame_equal(frame, recons) # GH 6573 @@ -401,7 +410,7 @@ def test_mixed(self, frame, path): mixed_frame.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(mixed_frame, recons) def test_ts_frame(self, tsframe, path): @@ -414,7 +423,7 @@ def test_ts_frame(self, tsframe, path): df.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(df, recons) def test_basics_with_nan(self, frame, path): @@ -433,17 +442,19 @@ def test_int_types(self, np_type, path): df.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) int_frame = df.astype(np.int64) tm.assert_frame_equal(int_frame, recons) - recons2 = pd.read_excel(path, "test1", index_col=0) + recons2 = pd.read_excel(path, sheet_name="test1", index_col=0) tm.assert_frame_equal(int_frame, recons2) # Test with convert_float=False comes back as float. float_frame = df.astype(float) - recons = pd.read_excel(path, "test1", convert_float=False, index_col=0) + recons = pd.read_excel( + path, sheet_name="test1", convert_float=False, index_col=0 + ) tm.assert_frame_equal( recons, float_frame, check_index_type=False, check_column_type=False ) @@ -455,7 +466,7 @@ def test_float_types(self, np_type, path): df.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(np_type) tm.assert_frame_equal(df, recons) @@ -466,7 +477,7 @@ def test_bool_types(self, np_type, path): df.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(np_type) tm.assert_frame_equal(df, recons) @@ -475,7 +486,7 @@ def test_inf_roundtrip(self, path): df.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(df, recons) @@ -499,9 +510,9 @@ def test_sheets(self, frame, tsframe, path): tsframe.to_excel(writer, "test2") writer.save() reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(frame, recons) - recons = pd.read_excel(reader, "test2", index_col=0) + recons = pd.read_excel(reader, sheet_name="test2", index_col=0) tm.assert_frame_equal(tsframe, recons) assert 2 == len(reader.sheet_names) assert "test1" == reader.sheet_names[0] @@ -520,7 +531,7 @@ def test_colaliases(self, frame, path): col_aliases = Index(["AA", "X", "Y", "Z"]) frame.to_excel(path, "test1", header=col_aliases) reader = ExcelFile(path) - rs = pd.read_excel(reader, "test1", index_col=0) + rs = pd.read_excel(reader, sheet_name="test1", index_col=0) xp = frame.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs) @@ -538,7 +549,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): df = DataFrame(np.random.randn(10, 2)) >= 0 df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells) reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(np.int64) df.index.names = ["test"] assert df.index.names == recons.index.names @@ -550,14 +561,14 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): merge_cells=merge_cells, ) reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(np.int64) df.index.names = ["test"] assert df.index.names == recons.index.names df = DataFrame(np.random.randn(10, 2)) >= 0 df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells) reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(np.int64) df.index.names = ["test"] tm.assert_frame_equal(df, recons.astype(bool)) @@ -573,7 +584,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): df = df.set_index(["A", "B"]) reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=[0, 1]) + recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) tm.assert_frame_equal(df, recons) def test_excel_roundtrip_indexname(self, merge_cells, path): @@ -583,7 +594,7 @@ def test_excel_roundtrip_indexname(self, merge_cells, path): df.to_excel(path, merge_cells=merge_cells) xf = ExcelFile(path) - result = pd.read_excel(xf, xf.sheet_names[0], index_col=0) + result = pd.read_excel(xf, sheet_name=xf.sheet_names[0], index_col=0) tm.assert_frame_equal(result, df) assert result.index.name == "foo" @@ -601,7 +612,7 @@ def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path): tsf.to_excel(path, "test1", merge_cells=merge_cells) reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(tsframe, recons) @@ -643,8 +654,8 @@ def test_excel_date_datetime_format(self, engine, ext, path): reader1 = ExcelFile(path) reader2 = ExcelFile(filename2) - rs1 = pd.read_excel(reader1, "test1", index_col=0) - rs2 = pd.read_excel(reader2, "test1", index_col=0) + rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0) + rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0) tm.assert_frame_equal(rs1, rs2) @@ -665,7 +676,7 @@ def test_to_excel_interval_no_labels(self, path): df.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(expected, recons) def test_to_excel_interval_labels(self, path): @@ -683,7 +694,7 @@ def test_to_excel_interval_labels(self, path): df.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(expected, recons) def test_to_excel_timedelta(self, path): @@ -703,7 +714,7 @@ def test_to_excel_timedelta(self, path): df.to_excel(path, "test1") reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=0) + recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(expected, recons) def test_to_excel_periodindex(self, tsframe, path): @@ -712,7 +723,7 @@ def test_to_excel_periodindex(self, tsframe, path): xp.to_excel(path, "sht1") reader = ExcelFile(path) - rs = pd.read_excel(reader, "sht1", index_col=0) + rs = pd.read_excel(reader, sheet_name="sht1", index_col=0) tm.assert_frame_equal(xp, rs.to_period("M")) def test_to_excel_multiindex(self, merge_cells, frame, path): @@ -726,7 +737,7 @@ def test_to_excel_multiindex(self, merge_cells, frame, path): # round trip frame.to_excel(path, "test1", merge_cells=merge_cells) reader = ExcelFile(path) - df = pd.read_excel(reader, "test1", index_col=[0, 1]) + df = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511 @@ -757,7 +768,7 @@ def test_to_excel_multiindex_cols(self, merge_cells, frame, path): # round trip frame.to_excel(path, "test1", merge_cells=merge_cells) reader = ExcelFile(path) - df = pd.read_excel(reader, "test1", header=header, index_col=[0, 1]) + df = pd.read_excel(reader, sheet_name="test1", header=header, index_col=[0, 1]) if not merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) frame.columns = [".".join(map(str, q)) for q in zip(*fm)] @@ -771,7 +782,7 @@ def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path): tsframe.index.names = ["time", "foo"] tsframe.to_excel(path, "test1", merge_cells=merge_cells) reader = ExcelFile(path) - recons = pd.read_excel(reader, "test1", index_col=[0, 1]) + recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons) assert recons.index.names == ("time", "foo") @@ -792,7 +803,7 @@ def test_to_excel_multiindex_no_write_index(self, path): # Read it back in. reader = ExcelFile(path) - frame3 = pd.read_excel(reader, "test1") + frame3 = pd.read_excel(reader, sheet_name="test1") # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3) @@ -806,7 +817,7 @@ def test_to_excel_float_format(self, path): df.to_excel(path, "test1", float_format="%.2f") reader = ExcelFile(path) - result = pd.read_excel(reader, "test1", index_col=0) + result = pd.read_excel(reader, sheet_name="test1", index_col=0) expected = DataFrame( [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], @@ -825,7 +836,9 @@ def test_to_excel_output_encoding(self, ext): with tm.ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: df.to_excel(filename, sheet_name="TestSheet", encoding="utf8") - result = pd.read_excel(filename, "TestSheet", encoding="utf8", index_col=0) + result = pd.read_excel( + filename, sheet_name="TestSheet", encoding="utf8", index_col=0 + ) tm.assert_frame_equal(result, df) def test_to_excel_unicode_filename(self, ext, path): @@ -845,7 +858,7 @@ def test_to_excel_unicode_filename(self, ext, path): df.to_excel(filename, "test1", float_format="%.2f") reader = ExcelFile(filename) - result = pd.read_excel(reader, "test1", index_col=0) + result = pd.read_excel(reader, sheet_name="test1", index_col=0) expected = DataFrame( [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], @@ -965,7 +978,7 @@ def roundtrip(data, header=True, parser_hdr=0, index=True): data.to_excel(path, header=header, merge_cells=merge_cells, index=index) xf = ExcelFile(path) - return pd.read_excel(xf, xf.sheet_names[0], header=parser_hdr) + return pd.read_excel(xf, sheet_name=xf.sheet_names[0], header=parser_hdr) # Basic test. parser_header = 0 if use_headers else None @@ -1017,18 +1030,20 @@ def test_duplicated_columns(self, path): ) # By default, we mangle. - result = pd.read_excel(path, "test1", index_col=0) + result = pd.read_excel(path, sheet_name="test1", index_col=0) tm.assert_frame_equal(result, expected) # Explicitly, we pass in the parameter. - result = pd.read_excel(path, "test1", index_col=0, mangle_dupe_cols=True) + result = pd.read_excel( + path, sheet_name="test1", index_col=0, mangle_dupe_cols=True + ) tm.assert_frame_equal(result, expected) # see gh-11007, gh-10970 df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"]) df.to_excel(path, "test1") - result = pd.read_excel(path, "test1", index_col=0) + result = pd.read_excel(path, sheet_name="test1", index_col=0) expected = DataFrame( [[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"] ) @@ -1036,21 +1051,21 @@ def test_duplicated_columns(self, path): # see gh-10982 df.to_excel(path, "test1", index=False, header=False) - result = pd.read_excel(path, "test1", header=None) + result = pd.read_excel(path, sheet_name="test1", header=None) expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) tm.assert_frame_equal(result, expected) msg = "Setting mangle_dupe_cols=False is not supported yet" with pytest.raises(ValueError, match=msg): - pd.read_excel(path, "test1", header=None, mangle_dupe_cols=False) + pd.read_excel(path, sheet_name="test1", header=None, mangle_dupe_cols=False) def test_swapped_columns(self, path): # Test for issue #5427. write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) write_frame.to_excel(path, "test1", columns=["B", "A"]) - read_frame = pd.read_excel(path, "test1", header=0) + read_frame = pd.read_excel(path, sheet_name="test1", header=0) tm.assert_series_equal(write_frame["A"], read_frame["A"]) tm.assert_series_equal(write_frame["B"], read_frame["B"]) @@ -1083,7 +1098,7 @@ def test_write_subset_columns(self, path, to_excel_index, read_excel_index_col): expected = write_frame[["A", "B"]] read_frame = pd.read_excel( - path, "col_subset_bug", index_col=read_excel_index_col + path, sheet_name="col_subset_bug", index_col=read_excel_index_col ) tm.assert_frame_equal(expected, read_frame) @@ -1098,13 +1113,13 @@ def test_comment_arg(self, path): df.to_excel(path, "test_c") # Read file without comment arg. - result1 = pd.read_excel(path, "test_c", index_col=0) + result1 = pd.read_excel(path, sheet_name="test_c", index_col=0) result1.iloc[1, 0] = None result1.iloc[1, 1] = None result1.iloc[2, 1] = None - result2 = pd.read_excel(path, "test_c", comment="#", index_col=0) + result2 = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0) tm.assert_frame_equal(result1, result2) def test_comment_default(self, path): @@ -1116,8 +1131,8 @@ def test_comment_default(self, path): df.to_excel(path, "test_c") # Read file with default and explicit comment=None - result1 = pd.read_excel(path, "test_c") - result2 = pd.read_excel(path, "test_c", comment=None) + result1 = pd.read_excel(path, sheet_name="test_c") + result2 = pd.read_excel(path, sheet_name="test_c", comment=None) tm.assert_frame_equal(result1, result2) def test_comment_used(self, path): @@ -1131,7 +1146,7 @@ def test_comment_used(self, path): # Test read_frame_comment against manually produced expected output. expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]}) - result = pd.read_excel(path, "test_c", comment="#", index_col=0) + result = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0) tm.assert_frame_equal(result, expected) def test_comment_empty_line(self, path): @@ -1165,7 +1180,7 @@ def test_datetimes(self, path): write_frame = DataFrame({"A": datetimes}) write_frame.to_excel(path, "Sheet1") - read_frame = pd.read_excel(path, "Sheet1", header=0) + read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0) tm.assert_series_equal(write_frame["A"], read_frame["A"]) @@ -1193,7 +1208,7 @@ def test_write_lists_dict(self, path): } ) df.to_excel(path, "Sheet1") - read = pd.read_excel(path, "Sheet1", header=0, index_col=0) + read = pd.read_excel(path, sheet_name="Sheet1", header=0, index_col=0) expected = df.copy() expected.mixed = expected.mixed.apply(str) diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index d456afe4ed351..1c9c514b20f46 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -28,7 +28,7 @@ def test_read_xlrd_book(read_ext, frame): book = xlrd.open_workbook(pth) with ExcelFile(book, engine=engine) as xl: - result = pd.read_excel(xl, sheet_name, index_col=0) + result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) tm.assert_frame_equal(df, result) result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0) @@ -40,4 +40,4 @@ def test_excel_table_sheet_by_index(datapath, read_ext): path = datapath("io", "data", "excel", f"test1{read_ext}") with pd.ExcelFile(path) as excel: with pytest.raises(xlrd.XLRDError): - pd.read_excel(excel, "asdf") + pd.read_excel(excel, sheet_name="asdf")