working on bunch of issues

pandas-dev · Jan 6, 2012 · 4a5a677 · 4a5a677
1 parent edb2409
commit 4a5a677
Show file tree

Hide file tree

Showing 6 changed files with 251 additions and 89 deletions.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -278,7 +278,7 @@ over the string representation of the object. All arguments are optional:
 
   - ``buf`` default None, for example a StringIO object
   - ``columns`` default None, which columns to write
-  - ``colSpace`` default None, number of spaces to write between columns
+  - ``col_space`` default None, number of spaces to write between columns
   - ``na_rep`` default ``NaN``, representation of NA value
   - ``formatters`` default None, a dictionary (by column) of functions each of
     which takes a single argument and returns a formatted string
@@ -288,6 +288,8 @@ over the string representation of the object. All arguments are optional:
   - ``sparsify`` default True, set to False for a DataFrame with a hierarchical
     index to print every multiindex key at each row.
   - ``index_names`` default True, will print the names of the indices
+  - ``index`` default True, will print the index (ie, row labels)
+  - ``header`` default True, will print the column labels
 
 The Series object also has a ``to_string`` method, but with only the ``buf``,
 ``na_rep``, ``float_format`` arguments. There is also a ``length`` argument

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -360,8 +360,8 @@ def _try_sort(iterable):
     except Exception:
         return listed
 
-def set_printoptions(precision=None, column_space=None, max_rows=None,
-        max_columns=None):
+def set_printoptions(precision=None, column_space=None, max_rows=None, 
+                     max_columns=None, justify='right'):
     """
     Alter default behavior of DataFrame.toString
 
@@ -376,8 +376,11 @@ def set_printoptions(precision=None, column_space=None, max_rows=None,
         Either one, or both can be set to 0 (experimental). Pandas will figure
         out how big the terminal is and will not display more rows or/and
         columns that can fit on it.
+    justify : string
+        'right' or 'left' to justify the values of the dataframe using this
+        alignment
     """
-    global _float_format, _column_space, _max_rows, _max_columns
+    global _float_format, _column_space, _max_rows, _max_columns, _justify
     if precision is not None:
         float_format = '%.' + '%d' % precision + 'g'
         _float_format = lambda x: float_format % x
@@ -387,6 +390,8 @@ def set_printoptions(precision=None, column_space=None, max_rows=None,
         _max_rows = max_rows
     if max_columns is not None:
         _max_columns = max_columns
+    if justify is not None and justify in ('right', 'left'):
+        _justify = justify
 
 class EngFormatter(object):
     """
@@ -491,33 +496,11 @@ def set_eng_float_format(precision=3, use_eng_prefix=False):
     _float_format = EngFormatter(precision, use_eng_prefix)
     _column_space = max(12, precision + 9)
 
-_float_format = lambda x: '%.4g' % x
+_float_format = lambda x: '% .4f' % x
 _column_space = 12
 _max_rows = 500
 _max_columns = 0
-
-def _pfixed(s, space, na_rep=None, float_format=None):
-    if isinstance(s, float):
-        if na_rep is not None and isnull(s):
-            if np.isnan(s):
-                s = na_rep
-            return (' %s' % s).ljust(space)
-
-        if float_format:
-            formatted = float_format(s)
-        else:
-            is_neg = s < 0
-            formatted = _float_format(np.abs(s))
-
-            if is_neg:
-                formatted = '-' + formatted
-            else:
-                formatted = ' ' + formatted
-
-        return formatted.ljust(space)
-    else:
-        stringified = _stringify(s)
-        return (' %s' % stringified)[:space].ljust(space)
+_justify = 'right'
 
 def _stringify(col):
     # unicode workaround
@@ -526,29 +509,31 @@ def _stringify(col):
     else:
         return '%s' % col
 
-def _format(s, na_rep=None, float_format=None):
+def _format(s, space=None, na_rep=None, float_format=None):
+    def _just_help(x):
+        if space is None:
+            return x
+        if _justify == 'right':
+            return x[:space].rjust(space)
+        else:
+            return x[:space].ljust(space)
+
     if isinstance(s, float):
         if na_rep is not None and isnull(s):
             if np.isnan(s):
                 s = na_rep
-            return ' %s' % s
+            return _just_help('%s' % s)
 
         if float_format:
             formatted = float_format(s)
         else:
-            is_neg = s < 0
-            formatted = _float_format(np.abs(s))
-
-            if is_neg:
-                formatted = '-' + formatted
-            else:
-                formatted = ' ' + formatted
+            formatted = _float_format(s)
 
-        return formatted
+        return _just_help(formatted)
     else:
-        return ' %s' % _stringify(s)
+        return _just_help('%s' % _stringify(s))
 
-#-------------------------------------------------------------------------------
+#------------------------------------------------------------------------------
 # miscellaneous python tools
 
 def rands(n):
@@ -564,14 +549,22 @@ def adjoin(space, *lists):
     """
     outLines = []
     newLists = []
-    lengths = [max(map(len, x)) + space for x in lists[:-1]]
 
-    # not the last one
-    lengths.append(max(map(len, lists[-1])))
+    if _justify == 'right':
+        # everyone but the first one, add space (right-aligned)
+        lengths = [max(map(len, x)) + space for x in lists[1:]]
+        lengths.insert(0, max(map(len, lists[0])))
+    else:
+        # everyone but the last one, add space (left-aligned)
+        lengths = [max(map(len, x)) + space for x in lists[:-1]]
+        lengths.append(max(map(len, lists[-1])))
 
     maxLen = max(map(len, lists))
     for i, lst in enumerate(lists):
-        nl = [x.ljust(lengths[i]) for x in lst]
+        if _justify == 'right':
+            nl = [x.rjust(lengths[i]) for x in lst]
+        else:
+            nl = [x.ljust(lengths[i]) for x in lst]
         nl.extend([' ' * lengths[i]] * (maxLen - len(lst)))
         newLists.append(nl)
     toJoin = zip(*newLists)
@@ -691,6 +684,9 @@ def is_integer_dtype(arr):
 def is_float_dtype(arr):
     return issubclass(arr.dtype.type, np.floating)
 
+def is_numeric_dtype(arr):
+    return is_integer_dtype(arr) or is_float_dtype(arr)
+
 def save(obj, path):
     """
     Pickle (serialize) object to input file path

diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -1,19 +1,50 @@
 from StringIO import StringIO
-from pandas.core.common import adjoin, _pfixed
+from pandas.core.common import adjoin, is_numeric_dtype
 from pandas.core.index import MultiIndex, _ensure_index
 
+docstring_to_string = """
+    Parameters
+    ----------
+    frame : DataFrame
+        object to render
+    buf : StringIO-like, optional
+        buffer to write to
+    columns : sequence, optional
+        the subset of columns to write; default None writes all columns
+    col_space : int, optional
+        the width of each columns
+    header : bool, optional
+        whether to print column labels, default True
+    index : bool, optional
+        whether to print index (row) labels, default True
+    na_rep : string, optional
+        string representation of NAN to use, default 'NaN'
+    formatters : list or dict of one-parameter functions, optional
+        formatter functions to apply to columns' elements by position or name,
+        default None
+    float_format : one-parameter function, optional
+        formatter function to apply to columns' elements if they are floats
+        default None
+    sparsify : bool, optional
+        Set to False for a DataFrame with a hierarchical index to print every
+        multiindex key at each row, default True
+    index_names : bool, optional
+        Prints the names of the indexes, default True """
 
 class DataFrameFormatter(object):
     """
     Render a DataFrame
 
     self.to_string() : console-friendly tabular output
-    self.to_html() : html table
+    self.to_html()   : html table
+
     """
-    def __init__(self, frame, buf=None, columns=None, col_space=None,
-                 na_rep='NaN', formatters=None, float_format=None,
-                 sparsify=True, index_names=True):
 
+    __doc__ += docstring_to_string
+
+    def __init__(self, frame, buf=None, columns=None, col_space=None,
+                 header=True, index=True, na_rep='NaN', formatters=None,
+                 float_format=None, sparsify=True, index_names=True):
         self.frame = frame
         self.buf = buf if buf is not None else StringIO()
         self.show_index_names = index_names
@@ -22,6 +53,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
         self.formatters = formatters
         self.na_rep = na_rep
         self.col_space = col_space
+        self.header = header
+        self.index = index
 
         if columns is not None:
             self.columns = _ensure_index(columns)
@@ -47,10 +80,16 @@ def to_string(self):
             str_index = self._get_formatted_index()
             str_columns = self._get_formatted_column_labels()
 
-            stringified = [str_columns[i] + format_col(c)
-                           for i, c in enumerate(self.columns)]
+            if self.header:
+                stringified = [str_columns[i] + format_col(c)
+                               for i, c in enumerate(self.columns)]
+            else:
+                stringified = [format_col(c) for c in self.columns]
 
-            to_write.append(adjoin(1, str_index, *stringified))
+            if self.index:
+                to_write.append(adjoin(1, str_index, *stringified))
+            else:
+                to_write.append(adjoin(1, *stringified))
 
         for s in to_write:
             if isinstance(s, unicode):
@@ -114,17 +153,21 @@ def _column_header():
             write(buf, '</tbody>', indent  + indent_delta)
         else:
             indent += indent_delta
-            write(buf, '<thead>', indent)
-            row = []
 
             # header row
-            col_row = _column_header()
-            indent += indent_delta
-            write_tr(buf, col_row, indent, indent_delta, header=True)
-            if self.has_index_names:
-                row = frame.index.names + [''] * len(frame.columns)
-                write_tr(buf, row, indent, indent_delta, header=True)
-            write(buf, '</thead>', indent)
+            if self.header:
+                write(buf, '<thead>', indent)
+                row = []
+
+                col_row = _column_header()
+                indent += indent_delta
+                write_tr(buf, col_row, indent, indent_delta, header=True)
+                if self.has_index_names:
+                    row = frame.index.names + [''] * len(frame.columns)
+                    write_tr(buf, row, indent, indent_delta, header=True)
+
+                write(buf, '</thead>', indent)
+
             write(buf, '<tbody>', indent)
 
             # write values
@@ -148,14 +191,9 @@ def _get_column_formatter(self):
 
         col_space = self.col_space
 
-        if col_space is None:
-            def _myformat(v):
-                return _format(v, na_rep=self.na_rep,
-                               float_format=self.float_format)
-        else:
-            def _myformat(v):
-                return _pfixed(v, col_space, na_rep=self.na_rep,
-                               float_format=self.float_format)
+        def _myformat(v):
+            return _format(v, space=col_space, na_rep=self.na_rep,
+                           float_format=self.float_format)
 
         formatters = {} if self.formatters is None else self.formatters
 
@@ -171,16 +209,24 @@ def _format_col(col, i=None):
     def _get_formatted_column_labels(self):
         from pandas.core.index import _sparsify
 
+        formatters = self.formatters
+        if formatters is None:
+            formatters = {}
+
         if isinstance(self.columns, MultiIndex):
             fmt_columns = self.columns.format(sparsify=False, adjoin=False)
-            str_columns = zip(*[[' %s' % y for y in x]
+            str_columns = zip(*[[' %s' % y if y not in formatters and is_numeric_dtype(self.frame[x])
+                                else str(y)
+                                for y in x]
                                 for x in zip(*fmt_columns)])
             if self.sparsify:
                 str_columns = _sparsify(str_columns)
 
             str_columns = [list(x) for x in zip(*str_columns)]
         else:
-            str_columns = [[' %s' % x] for x in self.columns.format()]
+            str_columns = [[' %s' % x if x not in formatters and is_numeric_dtype(self.frame[x])
+                           else str(x)]
+                           for x in self.columns.format()]
 
         if self.show_index_names and self.has_index_names:
             for x in str_columns:
@@ -201,7 +247,7 @@ def _get_formatted_index(self):
         columns = self.frame.columns
 
         show_index_names = self.show_index_names and self.has_index_names
-        show_col_names = self.show_index_names and self.has_column_names
+        show_col_names = (self.show_index_names and self.has_column_names)
 
         if isinstance(index, MultiIndex):
             fmt_index = index.format(sparsify=self.sparsify, adjoin=False,
@@ -213,11 +259,14 @@ def _get_formatted_index(self):
 
         # empty space for columns
         if show_col_names:
-            col_header = ['  %s' % x for x in self._get_column_name_list()]
+            col_header = ['%s' % x for x in self._get_column_name_list()]
         else:
             col_header = [''] * columns.nlevels
 
-        return col_header + adjoined
+        if self.header:
+            return col_header + adjoined
+        else:
+            return adjoined
 
     def _get_column_name_list(self):
         names = []
@@ -229,7 +278,6 @@ def _get_column_name_list(self):
             names.append('' if columns.name is None else columns.name)
         return names
 
-
 def single_column_table(column):
     table = '<table><tbody>'
     for i in column: