From 81761f1eb33a27bc50ae322a21c30d170c83d18a Mon Sep 17 00:00:00 2001 From: Chris Withers Date: Wed, 23 Dec 2020 16:48:11 +0000 Subject: [PATCH] DOC: update wording about when xlrd engine can be used (#38456) Co-authored-by: Joris Van den Bossche --- doc/source/user_guide/io.rst | 31 ++++++++++++++++++++++++++++--- doc/source/whatsnew/v1.2.0.rst | 29 +++++++++++++++-------------- pandas/io/excel/_base.py | 24 +++++++++++++----------- 3 files changed, 56 insertions(+), 28 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index b04abf512fbeb..2326c79e4af06 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2834,15 +2834,40 @@ parse HTML tables in the top-level pandas io function ``read_html``. Excel files ----------- -The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``) -files using the ``xlrd`` Python module. Excel 2007+ (``.xlsx``) files -can be read using either ``xlrd`` or ``openpyxl``. Binary Excel (``.xlsb``) +The :func:`~pandas.read_excel` method can read Excel 2007+ (``.xlsx``) files +using the ``openpyxl`` Python module. Excel 2003 (``.xls``) files +can be read using ``xlrd``. Binary Excel (``.xlsb``) files can be read using ``pyxlsb``. The :meth:`~DataFrame.to_excel` instance method is used for saving a ``DataFrame`` to Excel. Generally the semantics are similar to working with :ref:`csv` data. See the :ref:`cookbook` for some advanced strategies. +.. warning:: + + The `xlwt `__ package for writing old-style ``.xls`` + excel files is no longer maintained. + The `xlrd `__ package is now only for reading + old-style ``.xls`` files. + + Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` + would result in using the ``xlrd`` engine in many cases, including new + Excel 2007+ (``.xlsx``) files. + If `openpyxl `__ is installed, + many of these cases will now default to using the ``openpyxl`` engine. + See the :func:`read_excel` documentation for more details. + + Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ + (``.xlsx``) files. + **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** + This is no longer supported, switch to using ``openpyxl`` instead. + + Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` + unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. + While this option is now deprecated and will also raise a ``FutureWarning``, + it can be globally set and the warning suppressed. Users are recommended to + write ``.xlsx`` files using the ``openpyxl`` engine instead. + .. _io.excel_reader: Reading Excel files diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 89b4240afe694..151b41705f9a5 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -10,21 +10,22 @@ including other versions of pandas. .. warning:: - The packages `xlrd `_ for reading excel - files and `xlwt `_ for - writing excel files are no longer maintained. These are the only engines in pandas - that support the xls format. - - Previously, the default argument ``engine=None`` to ``pd.read_excel`` - would result in using the ``xlrd`` engine in many cases. If - `openpyxl `_ is installed, + The `xlwt `_ package for writing old-style ``.xls`` + excel files is no longer maintained. + The `xlrd `_ package is now only for reading + old-style ``.xls`` files. + + Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel` + would result in using the ``xlrd`` engine in many cases, including new + Excel 2007+ (``.xlsx``) files. + If `openpyxl `_ is installed, many of these cases will now default to using the ``openpyxl`` engine. - See the :func:`read_excel` documentation for more details. Attempting to read - ``.xls`` files or specifying ``engine="xlrd"`` to ``pd.read_excel`` will not - raise a warning. However users should be aware that ``xlrd`` is already - broken with certain package configurations, for example with Python 3.9 - when `defusedxml `_ is installed, and - is anticipated to be unusable in the future. + See the :func:`read_excel` documentation for more details. + + Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ + (``.xlsx``) files. + **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** + This is no longer supported, switch to using ``openpyxl`` instead. Attempting to use the the ``xlwt`` engine will raise a ``FutureWarning`` unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``. diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index bf1011176693f..c72f294bf6ac8 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -105,16 +105,16 @@ Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb". Engine compatibility : - - "xlrd" supports most old/new Excel file formats. + - "xlrd" supports old-style Excel files (.xls). - "openpyxl" supports newer Excel file formats. - "odf" supports OpenDocument file formats (.odf, .ods, .odt). - "pyxlsb" supports Binary Excel files. .. versionchanged:: 1.2.0 The engine `xlrd `_ - is no longer maintained, and is not supported with - python >= 3.9. When ``engine=None``, the following logic will be - used to determine the engine. + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), then `odf `_ will be used. @@ -920,7 +920,7 @@ class ExcelFile: """ Class for parsing tabular excel sheets into DataFrame objects. - Uses xlrd engine by default. See read_excel for more documentation + See read_excel for more documentation Parameters ---------- @@ -933,7 +933,7 @@ class ExcelFile: Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb`` Engine compatibility : - - ``xlrd`` supports most old/new Excel file formats. + - ``xlrd`` supports old-style Excel files (.xls). - ``openpyxl`` supports newer Excel file formats. - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). - ``pyxlsb`` supports Binary Excel files. @@ -941,9 +941,9 @@ class ExcelFile: .. versionchanged:: 1.2.0 The engine `xlrd `_ - is no longer maintained, and is not supported with - python >= 3.9. When ``engine=None``, the following logic will be - used to determine the engine. + now only supports old-style ``.xls`` files. + When ``engine=None``, the following logic will be + used to determine the engine: - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt), then `odf `_ will be used. @@ -954,8 +954,10 @@ class ExcelFile: then ``openpyxl`` will be used. - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. - Specifying ``engine="xlrd"`` will continue to be allowed for the - indefinite future. + .. warning:: + + Please do not report issues when using ``xlrd`` to read ``.xlsx`` files. + This is not supported, switch to using ``openpyxl`` instead. """ from pandas.io.excel._odfreader import ODFReader