Skip to content

Commit

Permalink
BUG: Handle numpy strings in index names in HDF5 pandas-dev#13492
Browse files Browse the repository at this point in the history
  • Loading branch information
makmanalp committed Jun 1, 2017
1 parent fb47ee5 commit 90f63b0
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ I/O
- Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`)
- Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
- Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`)
- Bug where ``pd.read_hdf()`` returns numpy strings for index names (:issue:`13492`)

- Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)

Expand Down
2 changes: 2 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2568,6 +2568,8 @@ def read_index_node(self, node, start=None, stop=None):

if 'name' in node._v_attrs:
name = node._v_attrs.name
if isinstance(name, compat.string_types):
name = compat.text_type(name)

index_class = self._alias_to_class(getattr(node._v_attrs,
'index_class', ''))
Expand Down
23 changes: 22 additions & 1 deletion pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
date_range, timedelta_range, Index, DatetimeIndex,
isnull)

from pandas.compat import is_platform_windows, PY3, PY35, BytesIO
from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type
from pandas.io.formats.printing import pprint_thing

tables = pytest.importorskip('tables')
Expand Down Expand Up @@ -2920,6 +2920,27 @@ def test_store_index_name_with_tz(self):
recons = store['frame']
tm.assert_frame_equal(recons, df)

@pytest.mark.parametrize('table_format', ['table', 'fixed'])
def test_store_index_name_numpy_str(self, table_format):
# GH #13492
idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1),
datetime.date(2000, 1, 2)]),
name=u('cols\u05d2'))
idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1),
datetime.date(2010, 1, 2)]),
name=u('rows\u05d0'))
df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)

# This used to fail, returning numpy strings instead of python strings.
with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df', format=table_format)
df2 = read_hdf(path, 'df')

assert_frame_equal(df, df2, check_names=True)

assert type(df2.index.name) == text_type
assert type(df2.columns.name) == text_type

def test_store_series_name(self):
df = tm.makeDataFrame()
series = df['A']
Expand Down

0 comments on commit 90f63b0

Please sign in to comment.