Skip to content

Commit

Permalink
Backport PR #32544: BUG: pd.ExcelFile closes stream on destruction (#…
Browse files Browse the repository at this point in the history
…32657)

Co-authored-by: Robert de Vries <rhdv@xs4all.nl>
  • Loading branch information
meeseeksmachine and roberthdevries authored Mar 12, 2020
1 parent b38357b commit 8b40ae9
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 12 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Fixed regressions
- Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`)
- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`)
- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`)
- Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`)
- Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`)
- Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`)

Expand Down
12 changes: 4 additions & 8 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,9 @@ def _workbook_class(self):
def load_workbook(self, filepath_or_buffer):
pass

def close(self):
pass

@property
@abc.abstractmethod
def sheet_names(self):
Expand Down Expand Up @@ -895,14 +898,7 @@ def sheet_names(self):

def close(self):
"""close io if necessary"""
if self.engine == "openpyxl":
# https://stackoverflow.com/questions/31416842/
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
wb = self.book
wb._archive.close()

if hasattr(self.io, "close"):
self.io.close()
self._reader.close()

def __enter__(self):
return self
Expand Down
5 changes: 5 additions & 0 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,11 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
filepath_or_buffer, read_only=True, data_only=True, keep_links=False
)

def close(self):
# https://stackoverflow.com/questions/31416842/
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
self.book.close()

@property
def sheet_names(self) -> List[str]:
return self.book.sheetnames
Expand Down
19 changes: 15 additions & 4 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,17 @@ def test_read_from_py_localpath(self, read_ext):

tm.assert_frame_equal(expected, actual)

@td.check_file_leaks
def test_close_from_py_localpath(self, read_ext):

# GH31467
str_path = os.path.join("test1" + read_ext)
with open(str_path, "rb") as f:
x = pd.read_excel(f, "Sheet1", index_col=0)
del x
# should not throw an exception because the passed file was closed
f.read()

def test_reader_seconds(self, read_ext):
if pd.read_excel.keywords["engine"] == "pyxlsb":
pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
Expand Down Expand Up @@ -1019,10 +1030,10 @@ def test_excel_read_buffer(self, engine, read_ext):
tm.assert_frame_equal(expected, actual)

def test_reader_closes_file(self, engine, read_ext):
f = open("test1" + read_ext, "rb")
with pd.ExcelFile(f) as xlsx:
# parses okay
pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine)
with open("test1" + read_ext, "rb") as f:
with pd.ExcelFile(f) as xlsx:
# parses okay
pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine)

assert f.closed

Expand Down

0 comments on commit 8b40ae9

Please sign in to comment.