-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DEPR: Deprecate using xlrd
engine for read_excel
#35029
Changes from 10 commits
3a76a36
101aa97
081ecf8
ada4354
3233381
0f4c8a1
499f9a0
825c61c
88093f6
44f157b
fffbacb
bb53725
d8dcb04
f9876dd
bc3ec47
fe10a89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,11 +4,13 @@ | |
import os | ||
from textwrap import fill | ||
from typing import Any, Dict, Mapping, Union, cast | ||
import warnings | ||
|
||
from pandas._config import config | ||
|
||
from pandas._libs.parsers import STR_NA_VALUES | ||
from pandas._typing import Buffer, FilePathOrBuffer, StorageOptions | ||
from pandas.compat._optional import import_optional_dependency | ||
from pandas.errors import EmptyDataError | ||
from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments | ||
|
||
|
@@ -101,10 +103,17 @@ | |
If io is not a buffer or path, this must be set to identify io. | ||
Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd". | ||
Engine compatibility : | ||
- "xlrd" supports most old/new Excel file formats. | ||
rhshadrach marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
- "xlrd" supports most old/new Excel file formats but is no longer maintained | ||
and not supported with Python >= 3.9. | ||
- "openpyxl" supports newer Excel file formats. | ||
- "odf" supports OpenDocument file formats (.odf, .ods, .odt). | ||
- "pyxlsb" supports Binary Excel files. | ||
|
||
.. deprecated:: 1.2.0 | ||
The default value ``None`` is deprecated and will be changed to ``"openpyxl"`` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The default of None is only deprecated when you're reading an xls/xlsx file, not when reading any of the other formats like ods. |
||
in a future version. Not specifying an engine will raise a FutureWarning. | ||
|
||
converters : dict, default None | ||
Dict of functions for converting values in certain columns. Keys can | ||
either be integers or column labels, values are functions that take one | ||
|
@@ -878,12 +887,19 @@ class ExcelFile: | |
engine : str, default None | ||
If io is not a buffer or path, this must be set to identify io. | ||
Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``, | ||
default ``xlrd``. | ||
default ``xlrd`` for .xls* files, ``odf`` for .ods files. | ||
Engine compatibility : | ||
- ``xlrd`` supports most old/new Excel file formats. | ||
|
||
- ``xlrd`` supports most old/new Excel file formats but is no longer maintained | ||
and is not supported with Python >= 3.9. | ||
- ``openpyxl`` supports newer Excel file formats. | ||
- ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). | ||
- ``pyxlsb`` supports Binary Excel files. | ||
|
||
.. deprecated:: 1.2.0 | ||
The default value ``None`` is deprecated and will be changed to | ||
``"openpyxl"`` in a future version. Not specifying an engine will | ||
raise a FutureWarning. | ||
""" | ||
|
||
from pandas.io.excel._odfreader import ODFReader | ||
|
@@ -903,13 +919,51 @@ def __init__( | |
): | ||
if engine is None: | ||
engine = "xlrd" | ||
|
||
# Determine ext and use odf for ods stream/file | ||
if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)): | ||
ext = None | ||
if _is_ods_stream(path_or_buffer): | ||
engine = "odf" | ||
else: | ||
ext = os.path.splitext(str(path_or_buffer))[-1] | ||
if ext == ".ods": | ||
engine = "odf" | ||
|
||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# GH 35029 - Default to openpyxl if xlrd is not installed for non-xls | ||
if ( | ||
engine == "xlrd" | ||
and ext != ".xls" | ||
and import_optional_dependency( | ||
"xlrd", raise_on_missing=False, on_version="ignore" | ||
) | ||
is None | ||
): | ||
engine = "openpyxl" | ||
|
||
# GH 35029 - Don't warn with xls files as only xlrd can read them | ||
if engine == "xlrd" and ext != ".xls": | ||
import inspect | ||
|
||
caller = inspect.stack()[1] | ||
if ( | ||
caller.filename.endswith("pandas/io/excel/_base.py") | ||
and caller.function == "read_excel" | ||
): | ||
stacklevel = 4 | ||
else: | ||
stacklevel = 2 | ||
warnings.warn( | ||
"The default argument engine=None is deprecated. Using None " | ||
"defaults to the xlrd engine which is no longer maintained, " | ||
"and is not supported when using pandas with python >= 3.9. " | ||
"The default value will be 'openpyxl' in a future version of " | ||
"pandas, although xlrd will continue to be allowed for the " | ||
"indefinite future. Either install openpyxl and specify it as " | ||
"the engine or specify 'xlrd' to suppress this warning.", | ||
FutureWarning, | ||
stacklevel=stacklevel, | ||
) | ||
if engine not in self._engines: | ||
raise ValueError(f"Unknown engine: {engine}") | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
great if we think it's overkill than can remove before the release
but can't hurt having it here