Skip to content

Commit

Permalink
COMPAT: Add Pathlib, py.path support for read_hdf
Browse files Browse the repository at this point in the history
Closes #11773

Author: John Evans <john.g.evans.ne@gmail.com>

Closes #12930 from quintusdias/issue11773 and squashes the following commits:

dcee282 [John Evans] COMPAT: Add Pathlib, py.path support for read_hdf, to_hdf
  • Loading branch information
quintusdias authored and jreback committed May 16, 2016
1 parent f637aa3 commit 62bed0e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Other enhancements
- The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`)

- ``Index`` now supports ``.str.extractall()`` which returns ``DataFrame``, see :ref:`Extract all matches in each subject (extractall) <text.extractall>` (:issue:`10008`, :issue:`13156`)
- ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`)

.. ipython:: python

Expand Down
11 changes: 10 additions & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
import os

import numpy as np

import pandas as pd
from pandas import (Series, DataFrame, Panel, Panel4D, Index,
MultiIndex, Int64Index)
from pandas.core import config
from pandas.io.common import _stringify_path
from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
from pandas.sparse.array import BlockIndex, IntIndex
from pandas.tseries.api import PeriodIndex, DatetimeIndex
Expand Down Expand Up @@ -254,6 +256,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
else:
f = lambda store: store.put(key, value, **kwargs)

path_or_buf = _stringify_path(path_or_buf)
if isinstance(path_or_buf, string_types):
with HDFStore(path_or_buf, mode=mode, complevel=complevel,
complib=complib) as store:
Expand All @@ -270,7 +273,11 @@ def read_hdf(path_or_buf, key=None, **kwargs):
Parameters
----------
path_or_buf : path (string), or buffer to read from
path_or_buf : path (string), buffer, or path object (pathlib.Path or
py._path.local.LocalPath) to read from
.. versionadded:: 0.18.2 support for pathlib, py.path.
key : group identifier in the store. Can be omitted a HDF file contains
a single pandas object.
where : list of Term (or convertable) objects, optional
Expand All @@ -293,6 +300,7 @@ def read_hdf(path_or_buf, key=None, **kwargs):
if 'where' in kwargs:
kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1)

path_or_buf = _stringify_path(path_or_buf)
if isinstance(path_or_buf, string_types):

try:
Expand All @@ -316,6 +324,7 @@ def read_hdf(path_or_buf, key=None, **kwargs):

store = path_or_buf
auto_close = False

else:
raise NotImplementedError('Support for generic buffers has not been '
'implemented.')
Expand Down
36 changes: 36 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4836,6 +4836,42 @@ def test_read_nokey(self):
df.to_hdf(path, 'df2', mode='a')
self.assertRaises(ValueError, read_hdf, path)

def test_read_from_pathlib_path(self):

# GH11773
tm._skip_if_no_pathlib()

from pathlib import Path

expected = DataFrame(np.random.rand(4, 5),
index=list('abcd'),
columns=list('ABCDE'))
with ensure_clean_path(self.path) as filename:
path_obj = Path(filename)

expected.to_hdf(path_obj, 'df', mode='a')
actual = read_hdf(path_obj, 'df')

tm.assert_frame_equal(expected, actual)

def test_read_from_py_localpath(self):

# GH11773
tm._skip_if_no_localpath()

from py.path import local as LocalPath

expected = DataFrame(np.random.rand(4, 5),
index=list('abcd'),
columns=list('ABCDE'))
with ensure_clean_path(self.path) as filename:
path_obj = LocalPath(filename)

expected.to_hdf(path_obj, 'df', mode='a')
actual = read_hdf(path_obj, 'df')

tm.assert_frame_equal(expected, actual)


class TestHDFComplexValues(Base):
# GH10447
Expand Down

0 comments on commit 62bed0e

Please sign in to comment.