Skip to content

Commit

Permalink
BUG: convert numpy strings in index names in HDF pandas-dev#13492 (pa…
Browse files Browse the repository at this point in the history
…ndas-dev#16444)

* BUG: Handle numpy strings in index names in HDF5 pandas-dev#13492

* REF: refactor to _ensure_str
  • Loading branch information
makmanalp authored and Kiv committed Jun 11, 2017
1 parent b3769f1 commit a0174eb
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ I/O
- Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`)
- Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
- Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`)
- Bug where ``pd.read_hdf()`` returns numpy strings for index names (:issue:`13492`)

- Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)

Expand Down
14 changes: 13 additions & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ def _ensure_encoding(encoding):
return encoding


def _ensure_str(name):
"""Ensure that an index / column name is a str (python 3) or
unicode (python 2); otherwise they may be np.string dtype.
Non-string dtypes are passed through unchanged.
https://github.com/pandas-dev/pandas/issues/13492
"""
if isinstance(name, compat.string_types):
name = compat.text_type(name)
return name


Term = Expr


Expand Down Expand Up @@ -2574,7 +2586,7 @@ def read_index_node(self, node, start=None, stop=None):
name = None

if 'name' in node._v_attrs:
name = node._v_attrs.name
name = _ensure_str(node._v_attrs.name)

index_class = self._alias_to_class(getattr(node._v_attrs,
'index_class', ''))
Expand Down
23 changes: 22 additions & 1 deletion pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
date_range, timedelta_range, Index, DatetimeIndex,
isnull)

from pandas.compat import is_platform_windows, PY3, PY35, BytesIO
from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type
from pandas.io.formats.printing import pprint_thing

tables = pytest.importorskip('tables')
Expand Down Expand Up @@ -2922,6 +2922,27 @@ def test_store_index_name_with_tz(self):
recons = store['frame']
tm.assert_frame_equal(recons, df)

@pytest.mark.parametrize('table_format', ['table', 'fixed'])
def test_store_index_name_numpy_str(self, table_format):
# GH #13492
idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1),
datetime.date(2000, 1, 2)]),
name=u('cols\u05d2'))
idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1),
datetime.date(2010, 1, 2)]),
name=u('rows\u05d0'))
df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)

# This used to fail, returning numpy strings instead of python strings.
with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df', format=table_format)
df2 = read_hdf(path, 'df')

assert_frame_equal(df, df2, check_names=True)

assert type(df2.index.name) == text_type
assert type(df2.columns.name) == text_type

def test_store_series_name(self):
df = tm.makeDataFrame()
series = df['A']
Expand Down

0 comments on commit a0174eb

Please sign in to comment.