diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 379249b6e55d67..7bca4174da2978 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -70,6 +70,7 @@ I/O - Bug that raised IndexError HTML-rendering an empty DataFrame (:issue:`15953`) - Bug in ``pd.read_csv()`` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) - Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`) +- Bug where ``pd.read_hdf()`` returns numpy strings for index names (:issue:`13492`) - Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b838260d1f73c8..9b2f7a677853a0 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2568,6 +2568,8 @@ def read_index_node(self, node, start=None, stop=None): if 'name' in node._v_attrs: name = node._v_attrs.name + if isinstance(name, compat.string_types): + name = compat.text_type(name) index_class = self._alias_to_class(getattr(node._v_attrs, 'index_class', '')) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index ae14f74ece31c1..e79b79cfe7d9b4 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -16,7 +16,7 @@ date_range, timedelta_range, Index, DatetimeIndex, isnull) -from pandas.compat import is_platform_windows, PY3, PY35, BytesIO +from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type from pandas.io.formats.printing import pprint_thing tables = pytest.importorskip('tables') @@ -2920,6 +2920,29 @@ def test_store_index_name_with_tz(self): recons = store['frame'] tm.assert_frame_equal(recons, df) + @pytest.mark.parametrize('table_format', ['table', 'fixed']) + def test_store_index_name_numpy_str(self, table_format): + # GH #13492 + idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1), + datetime.date(2000, 1, 2)]), + name=u('cols\u05d2')) + idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1), + datetime.date(2010, 1, 2)]), + name=u('rows\u05d0')) + df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) + + # This used to fail, returning numpy strings instead of python strings. + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format=table_format) + df2 = read_hdf(path, 'df') + assert_frame_equal(df, df2) + + assert type(df2.index.name) == text_type + assert df2.index.name == u('rows\u05d0') + + assert type(df2.columns.name) == text_type + assert df2.columns.name == u('cols\u05d2') + def test_store_series_name(self): df = tm.makeDataFrame() series = df['A']