pandas-dev · jreback · Dec 1, 2020 · Nov 2, 2019 · Aug 23, 2020 · Aug 26, 2020
diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst
@@ -156,7 +156,7 @@ The equivalent read function :meth:`~DataFrame.read_excel` will reload the data
 
 .. ipython:: python
 
-    titanic = pd.read_excel("titanic.xlsx", sheet_name="passengers")
+    titanic = pd.read_excel("titanic.xlsx", sheet_name="passengers", engine="openpyxl")
 
 .. ipython:: python
 

diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
@@ -808,7 +808,9 @@ Reading from an excel file.
 
 .. ipython:: python
 
-   pd.read_excel("foo.xlsx", "Sheet1", index_col=None, na_values=["NA"])
+   pd.read_excel(
+       "foo.xlsx", "Sheet1", index_col=None, na_values=["NA"], engine="openpyxl"
+   )
 
 .. ipython:: python
    :suppress:

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -2832,6 +2832,18 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
 Reading Excel files
 '''''''''''''''''''
 
+.. warning::
+
+   .. versionchanged:: 1.2.0
+
+   The default argument ``engine=None`` to ``pd.read_excel`` is
+   deprecated. Using None defaults to the xlrd engine which is no
+   longer maintained, and is not supported when using pandas with
+   python >= 3.9. The default value will be ``'openpyxl'`` in a future
+   version of pandas, although xlrd will continue to be allowed for the
+   indefinite future. Either install openpyxl and specify it as
+   the engine or specify ``'xlrd'`` to suppress this warning.
+
 In the most basic use-case, ``read_excel`` takes a path to an Excel
 file, and the ``sheet_name`` indicating which sheet to parse.
 
@@ -2982,7 +2994,7 @@ For example, to read in a ``MultiIndex`` index without names:
        index=pd.MultiIndex.from_product([["a", "b"], ["c", "d"]]),
    )
    df.to_excel("path_to_file.xlsx")
-   df = pd.read_excel("path_to_file.xlsx", index_col=[0, 1])
+   df = pd.read_excel("path_to_file.xlsx", index_col=[0, 1], engine="openpyxl")
    df
 
 If the index has level names, they will parsed as well, using the same
@@ -2992,7 +3004,7 @@ parameters.
 
    df.index = df.index.set_names(["lvl1", "lvl2"])
    df.to_excel("path_to_file.xlsx")
-   df = pd.read_excel("path_to_file.xlsx", index_col=[0, 1])
+   df = pd.read_excel("path_to_file.xlsx", index_col=[0, 1], engine="openpyxl")
    df
 
 
@@ -3003,7 +3015,9 @@ should be passed to ``index_col`` and ``header``:
 
    df.columns = pd.MultiIndex.from_product([["a"], ["b", "d"]], names=["c1", "c2"])
    df.to_excel("path_to_file.xlsx")
-   df = pd.read_excel("path_to_file.xlsx", index_col=[0, 1], header=[0, 1])
+   df = pd.read_excel(
+       "path_to_file.xlsx", index_col=[0, 1], header=[0, 1], engine="openpyxl"
+   )
    df
 
 .. ipython:: python

diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst
@@ -308,7 +308,7 @@ See the :ref:`documentation <io.excel>` for more details.
    df
    df.to_excel("test.xlsx")
 
-   df = pd.read_excel("test.xlsx", header=[0, 1], index_col=[0, 1])
+   df = pd.read_excel("test.xlsx", header=[0, 1], index_col=[0, 1], engine="openpyxl")
    df
 
 .. ipython:: python

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -8,6 +8,16 @@ including other versions of pandas.
 
 {{ header }}
 
+.. warning::
+
+   The default argument ``engine=None`` to ``pd.read_excel`` is
+   deprecated. Using None defaults to the xlrd engine which is no
+   longer maintained, and is not supported when using pandas with
+   python >= 3.9. The default value will be ``'openpyxl'`` in a future
+   version of pandas, although xlrd will continue to be allowed for the
+   indefinite future. Either install openpyxl and specify it as
+   the engine or specify ``'xlrd'`` to suppress this warning.
+
 .. ---------------------------------------------------------------------------
 
 Enhancements
@@ -461,6 +471,7 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
+
 - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
 - Deprecated parameter ``dtype`` of method :meth:`~Index.copy` for all :class:`Index` subclasses. Use the :meth:`~Index.astype` method instead for changing dtype (:issue:`35853`)
 - Deprecated parameters ``levels`` and ``codes`` in :meth:`MultiIndex.copy`. Use the :meth:`~MultiIndex.set_levels` and :meth:`~MultiIndex.set_codes` methods instead (:issue:`36685`)
@@ -487,6 +498,7 @@ Deprecations
 - Deprecated :meth:`Index.asi8` for :class:`Index` subclasses other than :class:`.DatetimeIndex`, :class:`.TimedeltaIndex`, and :class:`PeriodIndex` (:issue:`37877`)
 - The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`)
 - The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`)
+- Deprecated the default argument ``engine=None`` of the function :func:`read_excel`, which uses the no longer maintained xlrd engine. Not specifying the engine will raise a ``FutureWarning``. This argument will default to ``"openpyxl"`` in a future version, which is now the recommended engine for xlsx and xlsm files (:issue:`28547`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -4,11 +4,13 @@
 import os
 from textwrap import fill
 from typing import Any, Dict, Mapping, Union, cast
+import warnings
 
 from pandas._config import config
 
 from pandas._libs.parsers import STR_NA_VALUES
 from pandas._typing import Buffer, FilePathOrBuffer, StorageOptions
+from pandas.compat._optional import import_optional_dependency
 from pandas.errors import EmptyDataError
 from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments
 
@@ -101,10 +103,17 @@
     If io is not a buffer or path, this must be set to identify io.
     Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd".
     Engine compatibility :
-    - "xlrd" supports most old/new Excel file formats.
+
+    - "xlrd" supports most old/new Excel file formats but is no longer maintained
+      and not supported with Python >= 3.9.
     - "openpyxl" supports newer Excel file formats.
     - "odf" supports OpenDocument file formats (.odf, .ods, .odt).
     - "pyxlsb" supports Binary Excel files.
+
+    .. deprecated:: 1.2.0
+        The default value ``None`` is deprecated and will be changed to ``"openpyxl"``
+        in a future version. Not specifying an engine will raise a FutureWarning.
+
 converters : dict, default None
     Dict of functions for converting values in certain columns. Keys can
     either be integers or column labels, values are functions that take one
@@ -878,12 +887,19 @@ class ExcelFile:
     engine : str, default None
         If io is not a buffer or path, this must be set to identify io.
         Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
-        default ``xlrd``.
+        default ``xlrd`` for .xls* files, ``odf`` for .ods files.
         Engine compatibility :
-        - ``xlrd`` supports most old/new Excel file formats.
+
+        - ``xlrd`` supports most old/new Excel file formats but is no longer maintained
+          and is not supported with Python >= 3.9.
         - ``openpyxl`` supports newer Excel file formats.
         - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
         - ``pyxlsb`` supports Binary Excel files.
+
+        .. deprecated:: 1.2.0
+            The default value ``None`` is deprecated and will be changed to
+            ``"openpyxl"`` in a future version. Not specifying an engine will
+            raise a FutureWarning.
     """
 
     from pandas.io.excel._odfreader import ODFReader
@@ -903,13 +919,51 @@ def __init__(
     ):
         if engine is None:
             engine = "xlrd"
+
+            # Determine ext and use odf for ods stream/file
             if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
+                ext = None
                 if _is_ods_stream(path_or_buffer):
                     engine = "odf"
             else:
                 ext = os.path.splitext(str(path_or_buffer))[-1]
                 if ext == ".ods":
                     engine = "odf"
+
+            # GH 35029 - Default to openpyxl if xlrd is not installed for non-xls
+            if (
+                engine == "xlrd"
+                and ext != ".xls"
+                and import_optional_dependency(
+                    "xlrd", raise_on_missing=False, on_version="ignore"
+                )
+                is None
+            ):
+                engine = "openpyxl"
+
+            # GH 35029 - Don't warn with xls files as only xlrd can read them
+            if engine == "xlrd" and ext != ".xls":
+                import inspect
+
+                caller = inspect.stack()[1]
+                if (
+                    caller.filename.endswith("pandas/io/excel/_base.py")
+                    and caller.function == "read_excel"
+                ):
+                    stacklevel = 4
+                else:
+                    stacklevel = 2
+                warnings.warn(
+                    "The default argument engine=None is deprecated. Using None "
+                    "defaults to the xlrd engine which is no longer maintained, "
+                    "and is not supported when using pandas with python >= 3.9. "
+                    "The default value will be 'openpyxl' in a future version of "
+                    "pandas, although xlrd will continue to be allowed for the "
+                    "indefinite future. Either install openpyxl and specify it as "
+                    "the engine or specify 'xlrd' to suppress this warning.",
+                    FutureWarning,
+                    stacklevel=stacklevel,
+                )
         if engine not in self._engines:
             raise ValueError(f"Unknown engine: {engine}")
 

diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
@@ -38,6 +38,24 @@ def test_read_xlrd_book(read_ext, frame):
 # TODO: test for openpyxl as well
 def test_excel_table_sheet_by_index(datapath, read_ext):
     path = datapath("io", "data", "excel", f"test1{read_ext}")
-    with ExcelFile(path) as excel:
+    with ExcelFile(path, engine="xlrd") as excel:
         with pytest.raises(xlrd.XLRDError):
             pd.read_excel(excel, sheet_name="asdf")
+
+
+def test_excel_file_warning_with_xlsx_file(datapath):
+    # GH 29375
+    path = datapath("io", "data", "excel", "test1.xlsx")
+    # DeprecationWarning: "This method will be removed in future versions.
+    # Use 'tree.iter()' or 'list(tree.iter())' instead."
+    with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False):
+        ExcelFile(path, engine=None)
+
+
+def test_read_excel_warning_with_xlsx_file(tmpdir, datapath):
+    # GH 29375
+    path = datapath("io", "data", "excel", "test1.xlsx")
+    # DeprecationWarning: "This method will be removed in future versions.
+    # Use 'tree.iter()' or 'list(tree.iter())' instead."
+    with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False):
+        pd.read_excel(path, "Sheet1", engine=None)