Skip to content

Commit

Permalink
working on bunch of issues
Browse files Browse the repository at this point in the history
  • Loading branch information
adamklein authored and wesm committed Jan 6, 2012
1 parent edb2409 commit 4a5a677
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 89 deletions.
4 changes: 3 additions & 1 deletion doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ over the string representation of the object. All arguments are optional:

- ``buf`` default None, for example a StringIO object
- ``columns`` default None, which columns to write
- ``colSpace`` default None, number of spaces to write between columns
- ``col_space`` default None, number of spaces to write between columns
- ``na_rep`` default ``NaN``, representation of NA value
- ``formatters`` default None, a dictionary (by column) of functions each of
which takes a single argument and returns a formatted string
Expand All @@ -288,6 +288,8 @@ over the string representation of the object. All arguments are optional:
- ``sparsify`` default True, set to False for a DataFrame with a hierarchical
index to print every multiindex key at each row.
- ``index_names`` default True, will print the names of the indices
- ``index`` default True, will print the index (ie, row labels)
- ``header`` default True, will print the column labels

The Series object also has a ``to_string`` method, but with only the ``buf``,
``na_rep``, ``float_format`` arguments. There is also a ``length`` argument
Expand Down
82 changes: 39 additions & 43 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,8 @@ def _try_sort(iterable):
except Exception:
return listed

def set_printoptions(precision=None, column_space=None, max_rows=None,
max_columns=None):
def set_printoptions(precision=None, column_space=None, max_rows=None,
max_columns=None, justify='right'):
"""
Alter default behavior of DataFrame.toString
Expand All @@ -376,8 +376,11 @@ def set_printoptions(precision=None, column_space=None, max_rows=None,
Either one, or both can be set to 0 (experimental). Pandas will figure
out how big the terminal is and will not display more rows or/and
columns that can fit on it.
justify : string
'right' or 'left' to justify the values of the dataframe using this
alignment
"""
global _float_format, _column_space, _max_rows, _max_columns
global _float_format, _column_space, _max_rows, _max_columns, _justify
if precision is not None:
float_format = '%.' + '%d' % precision + 'g'
_float_format = lambda x: float_format % x
Expand All @@ -387,6 +390,8 @@ def set_printoptions(precision=None, column_space=None, max_rows=None,
_max_rows = max_rows
if max_columns is not None:
_max_columns = max_columns
if justify is not None and justify in ('right', 'left'):
_justify = justify

class EngFormatter(object):
"""
Expand Down Expand Up @@ -491,33 +496,11 @@ def set_eng_float_format(precision=3, use_eng_prefix=False):
_float_format = EngFormatter(precision, use_eng_prefix)
_column_space = max(12, precision + 9)

_float_format = lambda x: '%.4g' % x
_float_format = lambda x: '% .4f' % x
_column_space = 12
_max_rows = 500
_max_columns = 0

def _pfixed(s, space, na_rep=None, float_format=None):
if isinstance(s, float):
if na_rep is not None and isnull(s):
if np.isnan(s):
s = na_rep
return (' %s' % s).ljust(space)

if float_format:
formatted = float_format(s)
else:
is_neg = s < 0
formatted = _float_format(np.abs(s))

if is_neg:
formatted = '-' + formatted
else:
formatted = ' ' + formatted

return formatted.ljust(space)
else:
stringified = _stringify(s)
return (' %s' % stringified)[:space].ljust(space)
_justify = 'right'

def _stringify(col):
# unicode workaround
Expand All @@ -526,29 +509,31 @@ def _stringify(col):
else:
return '%s' % col

def _format(s, na_rep=None, float_format=None):
def _format(s, space=None, na_rep=None, float_format=None):
def _just_help(x):
if space is None:
return x
if _justify == 'right':
return x[:space].rjust(space)
else:
return x[:space].ljust(space)

if isinstance(s, float):
if na_rep is not None and isnull(s):
if np.isnan(s):
s = na_rep
return ' %s' % s
return _just_help('%s' % s)

if float_format:
formatted = float_format(s)
else:
is_neg = s < 0
formatted = _float_format(np.abs(s))

if is_neg:
formatted = '-' + formatted
else:
formatted = ' ' + formatted
formatted = _float_format(s)

return formatted
return _just_help(formatted)
else:
return ' %s' % _stringify(s)
return _just_help('%s' % _stringify(s))

#-------------------------------------------------------------------------------
#------------------------------------------------------------------------------
# miscellaneous python tools

def rands(n):
Expand All @@ -564,14 +549,22 @@ def adjoin(space, *lists):
"""
outLines = []
newLists = []
lengths = [max(map(len, x)) + space for x in lists[:-1]]

# not the last one
lengths.append(max(map(len, lists[-1])))
if _justify == 'right':
# everyone but the first one, add space (right-aligned)
lengths = [max(map(len, x)) + space for x in lists[1:]]
lengths.insert(0, max(map(len, lists[0])))
else:
# everyone but the last one, add space (left-aligned)
lengths = [max(map(len, x)) + space for x in lists[:-1]]
lengths.append(max(map(len, lists[-1])))

maxLen = max(map(len, lists))
for i, lst in enumerate(lists):
nl = [x.ljust(lengths[i]) for x in lst]
if _justify == 'right':
nl = [x.rjust(lengths[i]) for x in lst]
else:
nl = [x.ljust(lengths[i]) for x in lst]
nl.extend([' ' * lengths[i]] * (maxLen - len(lst)))
newLists.append(nl)
toJoin = zip(*newLists)
Expand Down Expand Up @@ -691,6 +684,9 @@ def is_integer_dtype(arr):
def is_float_dtype(arr):
return issubclass(arr.dtype.type, np.floating)

def is_numeric_dtype(arr):
return is_integer_dtype(arr) or is_float_dtype(arr)

def save(obj, path):
"""
Pickle (serialize) object to input file path
Expand Down
110 changes: 79 additions & 31 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,50 @@
from StringIO import StringIO
from pandas.core.common import adjoin, _pfixed
from pandas.core.common import adjoin, is_numeric_dtype
from pandas.core.index import MultiIndex, _ensure_index

docstring_to_string = """
Parameters
----------
frame : DataFrame
object to render
buf : StringIO-like, optional
buffer to write to
columns : sequence, optional
the subset of columns to write; default None writes all columns
col_space : int, optional
the width of each columns
header : bool, optional
whether to print column labels, default True
index : bool, optional
whether to print index (row) labels, default True
na_rep : string, optional
string representation of NAN to use, default 'NaN'
formatters : list or dict of one-parameter functions, optional
formatter functions to apply to columns' elements by position or name,
default None
float_format : one-parameter function, optional
formatter function to apply to columns' elements if they are floats
default None
sparsify : bool, optional
Set to False for a DataFrame with a hierarchical index to print every
multiindex key at each row, default True
index_names : bool, optional
Prints the names of the indexes, default True """

class DataFrameFormatter(object):
"""
Render a DataFrame
self.to_string() : console-friendly tabular output
self.to_html() : html table
self.to_html() : html table
"""
def __init__(self, frame, buf=None, columns=None, col_space=None,
na_rep='NaN', formatters=None, float_format=None,
sparsify=True, index_names=True):

__doc__ += docstring_to_string

def __init__(self, frame, buf=None, columns=None, col_space=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=True, index_names=True):
self.frame = frame
self.buf = buf if buf is not None else StringIO()
self.show_index_names = index_names
Expand All @@ -22,6 +53,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
self.formatters = formatters
self.na_rep = na_rep
self.col_space = col_space
self.header = header
self.index = index

if columns is not None:
self.columns = _ensure_index(columns)
Expand All @@ -47,10 +80,16 @@ def to_string(self):
str_index = self._get_formatted_index()
str_columns = self._get_formatted_column_labels()

stringified = [str_columns[i] + format_col(c)
for i, c in enumerate(self.columns)]
if self.header:
stringified = [str_columns[i] + format_col(c)
for i, c in enumerate(self.columns)]
else:
stringified = [format_col(c) for c in self.columns]

to_write.append(adjoin(1, str_index, *stringified))
if self.index:
to_write.append(adjoin(1, str_index, *stringified))
else:
to_write.append(adjoin(1, *stringified))

for s in to_write:
if isinstance(s, unicode):
Expand Down Expand Up @@ -114,17 +153,21 @@ def _column_header():
write(buf, '</tbody>', indent + indent_delta)
else:
indent += indent_delta
write(buf, '<thead>', indent)
row = []

# header row
col_row = _column_header()
indent += indent_delta
write_tr(buf, col_row, indent, indent_delta, header=True)
if self.has_index_names:
row = frame.index.names + [''] * len(frame.columns)
write_tr(buf, row, indent, indent_delta, header=True)
write(buf, '</thead>', indent)
if self.header:
write(buf, '<thead>', indent)
row = []

col_row = _column_header()
indent += indent_delta
write_tr(buf, col_row, indent, indent_delta, header=True)
if self.has_index_names:
row = frame.index.names + [''] * len(frame.columns)
write_tr(buf, row, indent, indent_delta, header=True)

write(buf, '</thead>', indent)

write(buf, '<tbody>', indent)

# write values
Expand All @@ -148,14 +191,9 @@ def _get_column_formatter(self):

col_space = self.col_space

if col_space is None:
def _myformat(v):
return _format(v, na_rep=self.na_rep,
float_format=self.float_format)
else:
def _myformat(v):
return _pfixed(v, col_space, na_rep=self.na_rep,
float_format=self.float_format)
def _myformat(v):
return _format(v, space=col_space, na_rep=self.na_rep,
float_format=self.float_format)

formatters = {} if self.formatters is None else self.formatters

Expand All @@ -171,16 +209,24 @@ def _format_col(col, i=None):
def _get_formatted_column_labels(self):
from pandas.core.index import _sparsify

formatters = self.formatters
if formatters is None:
formatters = {}

if isinstance(self.columns, MultiIndex):
fmt_columns = self.columns.format(sparsify=False, adjoin=False)
str_columns = zip(*[[' %s' % y for y in x]
str_columns = zip(*[[' %s' % y if y not in formatters and is_numeric_dtype(self.frame[x])
else str(y)
for y in x]
for x in zip(*fmt_columns)])
if self.sparsify:
str_columns = _sparsify(str_columns)

str_columns = [list(x) for x in zip(*str_columns)]
else:
str_columns = [[' %s' % x] for x in self.columns.format()]
str_columns = [[' %s' % x if x not in formatters and is_numeric_dtype(self.frame[x])
else str(x)]
for x in self.columns.format()]

if self.show_index_names and self.has_index_names:
for x in str_columns:
Expand All @@ -201,7 +247,7 @@ def _get_formatted_index(self):
columns = self.frame.columns

show_index_names = self.show_index_names and self.has_index_names
show_col_names = self.show_index_names and self.has_column_names
show_col_names = (self.show_index_names and self.has_column_names)

if isinstance(index, MultiIndex):
fmt_index = index.format(sparsify=self.sparsify, adjoin=False,
Expand All @@ -213,11 +259,14 @@ def _get_formatted_index(self):

# empty space for columns
if show_col_names:
col_header = [' %s' % x for x in self._get_column_name_list()]
col_header = ['%s' % x for x in self._get_column_name_list()]
else:
col_header = [''] * columns.nlevels

return col_header + adjoined
if self.header:
return col_header + adjoined
else:
return adjoined

def _get_column_name_list(self):
names = []
Expand All @@ -229,7 +278,6 @@ def _get_column_name_list(self):
names.append('' if columns.name is None else columns.name)
return names


def single_column_table(column):
table = '<table><tbody>'
for i in column:
Expand Down
Loading

0 comments on commit 4a5a677

Please sign in to comment.