diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 6aff4f4bd41e2..b1257fe893804 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -8,6 +8,15 @@ including other versions of pandas.
{{ header }}
+.. warning::
+
+ Previously, the default argument ``engine=None`` to ``pd.read_excel``
+ would result in using the `xlrd `_ engine in
+ many cases. The engine ``xlrd`` is no longer maintained, and is not supported with
+ python >= 3.9. If `openpyxl `_ is installed,
+ many of these cases will now default to using the ``openpyxl`` engine. See the
+ :func:`read_excel` documentation for more details.
+
.. ---------------------------------------------------------------------------
Enhancements
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index c519baa4c21da..0235d6a3f6384 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1,14 +1,17 @@
import abc
import datetime
+import inspect
from io import BufferedIOBase, BytesIO, RawIOBase
import os
from textwrap import fill
from typing import Any, Dict, Mapping, Union, cast
+import warnings
from pandas._config import config
from pandas._libs.parsers import STR_NA_VALUES
from pandas._typing import Buffer, FilePathOrBuffer, StorageOptions
+from pandas.compat._optional import import_optional_dependency
from pandas.errors import EmptyDataError
from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments
@@ -99,12 +102,32 @@
of dtype conversion.
engine : str, default None
If io is not a buffer or path, this must be set to identify io.
- Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd".
+ Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".
Engine compatibility :
+
- "xlrd" supports most old/new Excel file formats.
- "openpyxl" supports newer Excel file formats.
- "odf" supports OpenDocument file formats (.odf, .ods, .odt).
- "pyxlsb" supports Binary Excel files.
+
+ .. versionchanged:: 1.2.0
+ The engine `xlrd `_
+ is no longer maintained, and is not supported with
+ python >= 3.9. When ``engine=None``, the following logic will be
+ used to determine the engine.
+
+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
+ then `odf `_ will be used.
+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will
+ be used.
+ - Otherwise if `openpyxl `_ is installed,
+ then ``openpyxl`` will be used.
+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
+
+ Specifying ``engine="xlrd"`` will continue to be allowed for the
+ indefinite future.
+
converters : dict, default None
Dict of functions for converting values in certain columns. Keys can
either be integers or column labels, values are functions that take one
@@ -877,13 +900,32 @@ class ExcelFile:
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
engine : str, default None
If io is not a buffer or path, this must be set to identify io.
- Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
- default ``xlrd``.
+ Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
Engine compatibility :
+
- ``xlrd`` supports most old/new Excel file formats.
- ``openpyxl`` supports newer Excel file formats.
- ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
- ``pyxlsb`` supports Binary Excel files.
+
+ .. versionchanged:: 1.2.0
+
+ The engine `xlrd `_
+ is no longer maintained, and is not supported with
+ python >= 3.9. When ``engine=None``, the following logic will be
+ used to determine the engine.
+
+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
+ then `odf `_ will be used.
+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd``
+ will be used.
+ - Otherwise if `openpyxl `_ is installed,
+ then ``openpyxl`` will be used.
+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
+
+ Specifying ``engine="xlrd"`` will continue to be allowed for the
+ indefinite future.
"""
from pandas.io.excel._odfreader import ODFReader
@@ -902,14 +944,59 @@ def __init__(
self, path_or_buffer, engine=None, storage_options: StorageOptions = None
):
if engine is None:
- engine = "xlrd"
+ # Determine ext and use odf for ods stream/file
if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
+ ext = None
if _is_ods_stream(path_or_buffer):
engine = "odf"
else:
ext = os.path.splitext(str(path_or_buffer))[-1]
if ext == ".ods":
engine = "odf"
+
+ if (
+ import_optional_dependency(
+ "xlrd", raise_on_missing=False, on_version="ignore"
+ )
+ is not None
+ ):
+ from xlrd import Book
+
+ if isinstance(path_or_buffer, Book):
+ engine = "xlrd"
+
+ # GH 35029 - Prefer openpyxl except for xls files
+ if engine is None:
+ if ext is None or isinstance(path_or_buffer, bytes) or ext == ".xls":
+ engine = "xlrd"
+ elif (
+ import_optional_dependency(
+ "openpyxl", raise_on_missing=False, on_version="ignore"
+ )
+ is not None
+ ):
+ engine = "openpyxl"
+ else:
+ caller = inspect.stack()[1]
+ if (
+ caller.filename.endswith("pandas/io/excel/_base.py")
+ and caller.function == "read_excel"
+ ):
+ stacklevel = 4
+ else:
+ stacklevel = 2
+ warnings.warn(
+ "The xlrd engine is no longer maintained and is not "
+ "supported when using pandas with python >= 3.9. However, "
+ "the engine xlrd will continue to be allowed for the "
+ "indefinite future. Beginning with pandas 1.2.0, the "
+ "openpyxl engine will be used if it is installed and the "
+ "engine argument is not specified. Either install openpyxl "
+ "or specify engine='xlrd' to silence this warning.",
+ FutureWarning,
+ stacklevel=stacklevel,
+ )
+ engine = "xlrd"
if engine not in self._engines:
raise ValueError(f"Unknown engine: {engine}")
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index c582a0fa23577..98a55ae39bd77 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -577,6 +577,10 @@ def test_date_conversion_overflow(self, read_ext):
if pd.read_excel.keywords["engine"] == "openpyxl":
pytest.xfail("Maybe not supported by openpyxl")
+ if pd.read_excel.keywords["engine"] is None:
+ # GH 35029
+ pytest.xfail("Defaults to openpyxl, maybe not supported")
+
result = pd.read_excel("testdateoverflow" + read_ext)
tm.assert_frame_equal(result, expected)
@@ -1159,7 +1163,7 @@ def test_excel_high_surrogate(self, engine):
expected = DataFrame(["\udc88"], columns=["Column1"])
# should not produce a segmentation violation
- actual = pd.read_excel("high_surrogate.xlsx")
+ actual = pd.read_excel("high_surrogate.xlsx", engine="xlrd")
tm.assert_frame_equal(expected, actual)
@pytest.mark.parametrize("filename", ["df_empty.xlsx", "df_equals.xlsx"])
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index 8da9c79160e91..0aaa8be616342 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -351,12 +351,15 @@ def test_excel_sheet_by_name_raise(self, path, engine):
msg = "sheet 0 not found"
with pytest.raises(ValueError, match=msg):
pd.read_excel(xl, "0")
- else:
+ elif engine == "xlwt":
import xlrd
msg = "No sheet named <'0'>"
with pytest.raises(xlrd.XLRDError, match=msg):
pd.read_excel(xl, sheet_name="0")
+ else:
+ with pytest.raises(KeyError, match="Worksheet 0 does not exist."):
+ pd.read_excel(xl, sheet_name="0")
def test_excel_writer_context_manager(self, frame, path):
with ExcelWriter(path) as writer:
@@ -1193,7 +1196,9 @@ def test_datetimes(self, path):
write_frame = DataFrame({"A": datetimes})
write_frame.to_excel(path, "Sheet1")
- read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0)
+ # GH 35029 - Default changed to openpyxl, but test is for odf/xlrd
+ engine = "odf" if path.endswith("ods") else "xlrd"
+ read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0, engine=engine)
tm.assert_series_equal(write_frame["A"], read_frame["A"])
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
index 26190edaa4960..f2fbcbc2e2f04 100644
--- a/pandas/tests/io/excel/test_xlrd.py
+++ b/pandas/tests/io/excel/test_xlrd.py
@@ -1,5 +1,7 @@
import pytest
+from pandas.compat._optional import import_optional_dependency
+
import pandas as pd
import pandas._testing as tm
@@ -38,6 +40,48 @@ def test_read_xlrd_book(read_ext, frame):
# TODO: test for openpyxl as well
def test_excel_table_sheet_by_index(datapath, read_ext):
path = datapath("io", "data", "excel", f"test1{read_ext}")
- with ExcelFile(path) as excel:
+ with ExcelFile(path, engine="xlrd") as excel:
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, sheet_name="asdf")
+
+
+def test_excel_file_warning_with_xlsx_file(datapath):
+ # GH 29375
+ path = datapath("io", "data", "excel", "test1.xlsx")
+ has_openpyxl = (
+ import_optional_dependency(
+ "openpyxl", raise_on_missing=False, on_version="ignore"
+ )
+ is not None
+ )
+ if not has_openpyxl:
+ with tm.assert_produces_warning(
+ FutureWarning,
+ raise_on_extra_warnings=False,
+ match="The xlrd engine is no longer maintained",
+ ):
+ ExcelFile(path, engine=None)
+ else:
+ with tm.assert_produces_warning(None):
+ pd.read_excel(path, "Sheet1", engine=None)
+
+
+def test_read_excel_warning_with_xlsx_file(tmpdir, datapath):
+ # GH 29375
+ path = datapath("io", "data", "excel", "test1.xlsx")
+ has_openpyxl = (
+ import_optional_dependency(
+ "openpyxl", raise_on_missing=False, on_version="ignore"
+ )
+ is not None
+ )
+ if not has_openpyxl:
+ with tm.assert_produces_warning(
+ FutureWarning,
+ raise_on_extra_warnings=False,
+ match="The xlrd engine is no longer maintained",
+ ):
+ pd.read_excel(path, "Sheet1", engine=None)
+ else:
+ with tm.assert_produces_warning(None):
+ pd.read_excel(path, "Sheet1", engine=None)