From d8bccc4f982f6dc01fcf5254c37cd7e46c08a0d3 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 20 Jun 2017 01:36:44 +0200 Subject: [PATCH] Fix reading Series with read_hdf (#16610) * Added test to reproduce issue #16583 * Fix #16583 by adding an explicit `mode` argument to `read_hdf` kwargs which are meant for the opening of the HDFStore should be filtered out before passing the remaining kwargs to the `select` function to load the data. * Noted fix for #16583 in WhatsNew (cherry picked from commit 196eb8e5c05952574dcdd5d0fb4d0a73e4bd6e91) --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/io/pytables.py | 16 +++++++++------- pandas/tests/io/test_pytables.py | 17 +++++++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index acd19a8b8da10..265f0c8d0cf59 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -56,6 +56,7 @@ I/O ^^^ -- Bug in ``pd.read_csv()`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue`16675`) +-- Bug in ``pd.read_hdf()`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 26c4a08bee59f..1b944936ec3e7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -282,7 +282,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, f(path_or_buf) -def read_hdf(path_or_buf, key=None, **kwargs): +def read_hdf(path_or_buf, key=None, mode='r', **kwargs): """ read from the store, close it if we opened it Retrieve pandas object stored in file, optionally based on where @@ -290,13 +290,16 @@ def read_hdf(path_or_buf, key=None, **kwargs): Parameters ---------- - path_or_buf : path (string), buffer, or path object (pathlib.Path or - py._path.local.LocalPath) to read from + path_or_buf : path (string), buffer or path object (pathlib.Path or + py._path.local.LocalPath) designating the file to open, or an + already opened pd.HDFStore object .. versionadded:: 0.19.0 support for pathlib, py.path. key : group identifier in the store. Can be omitted if the HDF file contains a single pandas object. + mode : string, {'r', 'r+', 'a'}, default 'r'. Mode to use when opening + the file. Ignored if path_or_buf is a pd.HDFStore. where : list of Term (or convertable) objects, optional start : optional, integer (defaults to None), row number to start selection @@ -313,10 +316,9 @@ def read_hdf(path_or_buf, key=None, **kwargs): """ - if kwargs.get('mode', 'a') not in ['r', 'r+', 'a']: + if mode not in ['r', 'r+', 'a']: raise ValueError('mode {0} is not allowed while performing a read. ' - 'Allowed modes are r, r+ and a.' - .format(kwargs.get('mode'))) + 'Allowed modes are r, r+ and a.'.format(mode)) # grab the scope if 'where' in kwargs: kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1) @@ -335,9 +337,9 @@ def read_hdf(path_or_buf, key=None, **kwargs): raise compat.FileNotFoundError( 'File %s does not exist' % path_or_buf) + store = HDFStore(path_or_buf, mode=mode, **kwargs) # can't auto open/close if we are using an iterator # so delegate to the iterator - store = HDFStore(path_or_buf, **kwargs) auto_close = True elif isinstance(path_or_buf, HDFStore): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 82784da094ed4..1cdb2c058c9be 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5190,6 +5190,23 @@ def test_query_compare_column_type(self): expected = df.loc[[], :] tm.assert_frame_equal(expected, result) + @pytest.mark.parametrize('format', ['fixed', 'table']) + def test_read_hdf_series_mode_r(self, format): + # GH 16583 + # Tests that reading a Series saved to an HDF file + # still works if a mode='r' argument is supplied + series = tm.makeFloatSeries() + with ensure_clean_path(self.path) as path: + series.to_hdf(path, key='data', format=format) + result = pd.read_hdf(path, key='data', mode='r') + tm.assert_series_equal(result, series) + + @pytest.mark.skipif(sys.version_info < (3, 6), reason="Need python 3.6") + def test_fspath(self): + with tm.ensure_clean('foo.h5') as path: + with pd.HDFStore(path) as store: + assert os.fspath(store) == str(path) + class TestHDFComplexValues(Base): # GH10447