From 62bed0e33397132bd4340c8da54c3feeb22e5083 Mon Sep 17 00:00:00 2001 From: John Evans Date: Mon, 16 May 2016 08:06:51 -0400 Subject: [PATCH] COMPAT: Add Pathlib, py.path support for read_hdf Closes #11773 Author: John Evans Closes #12930 from quintusdias/issue11773 and squashes the following commits: dcee282 [John Evans] COMPAT: Add Pathlib, py.path support for read_hdf, to_hdf --- doc/source/whatsnew/v0.18.2.txt | 1 + pandas/io/pytables.py | 11 +++++++++- pandas/io/tests/test_pytables.py | 36 ++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 3ac466158276f..459bdbf10a4f1 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -32,6 +32,7 @@ Other enhancements - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`) - ``Index`` now supports ``.str.extractall()`` which returns ``DataFrame``, see :ref:`Extract all matches in each subject (extractall) ` (:issue:`10008`, :issue:`13156`) +- ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) .. ipython:: python diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 318fd17b8f88e..d350358081aa7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -13,10 +13,12 @@ import os import numpy as np + import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, MultiIndex, Int64Index) from pandas.core import config +from pandas.io.common import _stringify_path from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.tseries.api import PeriodIndex, DatetimeIndex @@ -254,6 +256,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, else: f = lambda store: store.put(key, value, **kwargs) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, string_types): with HDFStore(path_or_buf, mode=mode, complevel=complevel, complib=complib) as store: @@ -270,7 +273,11 @@ def read_hdf(path_or_buf, key=None, **kwargs): Parameters ---------- - path_or_buf : path (string), or buffer to read from + path_or_buf : path (string), buffer, or path object (pathlib.Path or + py._path.local.LocalPath) to read from + + .. versionadded:: 0.18.2 support for pathlib, py.path. + key : group identifier in the store. Can be omitted a HDF file contains a single pandas object. where : list of Term (or convertable) objects, optional @@ -293,6 +300,7 @@ def read_hdf(path_or_buf, key=None, **kwargs): if 'where' in kwargs: kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1) + path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, string_types): try: @@ -316,6 +324,7 @@ def read_hdf(path_or_buf, key=None, **kwargs): store = path_or_buf auto_close = False + else: raise NotImplementedError('Support for generic buffers has not been ' 'implemented.') diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index d21189fe91a2a..6bf0175526424 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4836,6 +4836,42 @@ def test_read_nokey(self): df.to_hdf(path, 'df2', mode='a') self.assertRaises(ValueError, read_hdf, path) + def test_read_from_pathlib_path(self): + + # GH11773 + tm._skip_if_no_pathlib() + + from pathlib import Path + + expected = DataFrame(np.random.rand(4, 5), + index=list('abcd'), + columns=list('ABCDE')) + with ensure_clean_path(self.path) as filename: + path_obj = Path(filename) + + expected.to_hdf(path_obj, 'df', mode='a') + actual = read_hdf(path_obj, 'df') + + tm.assert_frame_equal(expected, actual) + + def test_read_from_py_localpath(self): + + # GH11773 + tm._skip_if_no_localpath() + + from py.path import local as LocalPath + + expected = DataFrame(np.random.rand(4, 5), + index=list('abcd'), + columns=list('ABCDE')) + with ensure_clean_path(self.path) as filename: + path_obj = LocalPath(filename) + + expected.to_hdf(path_obj, 'df', mode='a') + actual = read_hdf(path_obj, 'df') + + tm.assert_frame_equal(expected, actual) + class TestHDFComplexValues(Base): # GH10447