Skip to content

Commit

Permalink
BUG: try to convert non-unicode non-ascii characters in repr #1620
Browse files Browse the repository at this point in the history
  • Loading branch information
changhiskhan committed Jul 20, 2012
1 parent 8cc9826 commit 00b31f1
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 8 deletions.
8 changes: 6 additions & 2 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,11 +705,16 @@ def _index_labels_to_array(labels):

return labels

def _stringify(col):
def _stringify(col, encoding='UTF8'):
# unicode workaround
try:
return unicode(col)
except UnicodeError:
try:
if isinstance(col, str):
return col.decode(encoding)
except UnicodeError:
pass
return console_encode(col)

def _stringify_seq(values):
Expand Down Expand Up @@ -930,4 +935,3 @@ def _concat_compat(to_concat):
return new_values.view(_NS_DTYPE)
else:
return np.concatenate(to_concat)

33 changes: 28 additions & 5 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from itertools import izip
import sys

try:
from StringIO import StringIO
Expand Down Expand Up @@ -480,15 +481,30 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
self.justify = justify

def get_result(self):
if self._have_unicode():
if self._conv_unicode():
fmt_values = self._format_strings(use_unicode=True)
else:
fmt_values = self._format_strings(use_unicode=False)

return _make_fixed_width(fmt_values, self.justify)

def _have_unicode(self):
mask = lib.map_infer(self.values, lambda x: isinstance(x, unicode))
def _conv_unicode(self):
#check if any needs and can be converted to nonascii encoding
def _nonascii(x):
if isinstance(x, unicode):
return True
try:
if isinstance(x, str):
x.decode('ascii')
return False
except UnicodeError:
try:
x.decode(print_config.encoding)
return True
except UnicodeError:
return False
return False
mask = lib.map_infer(self.values, _nonascii)
return mask.any()

def _format_strings(self, use_unicode=False):
Expand All @@ -501,7 +517,9 @@ def _format_strings(self, use_unicode=False):
float_format = self.float_format

if use_unicode:
formatter = _stringify if self.formatter is None else self.formatter
def _strify(x):
return _stringify(x, print_config.encoding)
formatter = _strify if self.formatter is None else self.formatter
else:
formatter = str if self.formatter is None else self.formatter

Expand Down Expand Up @@ -668,7 +686,7 @@ def set_printoptions(precision=None, column_space=None, max_rows=None,
max_columns=None, colheader_justify=None,
max_colwidth=None, notebook_repr_html=None,
date_dayfirst=None, date_yearfirst=None,
multi_sparse=None):
multi_sparse=None, encoding=None):
"""
Alter default behavior of DataFrame.toString
Expand Down Expand Up @@ -716,6 +734,8 @@ def set_printoptions(precision=None, column_space=None, max_rows=None,
print_config.date_yearfirst = date_yearfirst
if multi_sparse is not None:
print_config.multi_sparse = multi_sparse
if encoding is not None:
print_config.encoding = encoding

def reset_printoptions():
print_config.reset()
Expand Down Expand Up @@ -846,6 +866,9 @@ def __init__(self):
self.date_dayfirst = False
self.date_yearfirst = False
self.multi_sparse = True
self.encoding = sys.getdefaultencoding()
if self.encoding == 'ascii':
self.encoding = 'UTF8'

def reset(self):
self.__init__()
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,12 @@ def test_to_html_unicode(self):
df = DataFrame({'A' : [u'\u03c3']})
df.to_html()

def test_nonunicode_nonascii_alignment(self):
df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]])
rep_str = df.to_string()
lines = rep_str.split('\n')
self.assert_(len(lines[1]) == len(lines[2]))

def test_unicode_problem_decoding_as_ascii(self):
dm = DataFrame({u'c/\u03c3': Series({'test':np.NaN})})
unicode(dm.to_string())
Expand Down Expand Up @@ -776,4 +782,3 @@ def test_misc(self):
import nose
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
exit=False)

0 comments on commit 00b31f1

Please sign in to comment.