From 49992e9c5b5edfe9880d93466df4d448f3add5db Mon Sep 17 00:00:00 2001 From: lexual Date: Thu, 12 Nov 2015 22:39:55 +1100 Subject: [PATCH] ENH: #2679 - DataFrame.to_html() urls_as_links parameter. New urls_as_links boolean paramater that will output urls as href html links. ref #2679 --- pandas/core/format.py | 11 +++++- pandas/core/frame.py | 7 ++-- pandas/tests/test_format.py | 71 +++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 3 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index efa4b182f1133..18cdc126db919 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -7,6 +7,7 @@ from pandas.core.base import PandasObject from pandas.core.common import adjoin, notnull +from pandas.io.common import _is_url from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat from pandas.compat import(StringIO, lzip, range, map, zip, reduce, u, @@ -341,7 +342,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='NaN', formatters=None, justify=None, float_format=None, sparsify=None, index_names=True, line_width=None, max_rows=None, - max_cols=None, show_dimensions=False, **kwds): + max_cols=None, show_dimensions=False, urls_as_links=False, + **kwds): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names @@ -363,6 +365,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame)) self.show_dimensions = show_dimensions + self.urls_as_links = urls_as_links if justify is None: self.justify = get_option("display.colheader_justify") @@ -863,6 +866,7 @@ def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, self.max_rows = max_rows or len(self.fmt.frame) self.max_cols = max_cols or len(self.fmt.columns) self.show_dimensions = self.fmt.show_dimensions + self.urls_as_links = self.fmt.urls_as_links self.is_truncated = (self.max_rows < len(self.fmt.frame) or self.max_cols < len(self.fmt.columns)) self.notebook = notebook @@ -896,6 +900,11 @@ def _write_cell(self, s, kind='td', indent=0, tags=None): else: esc = {} rs = com.pprint_thing(s, escape_chars=esc).strip() + if self.urls_as_links and isinstance(s, compat.string_types): + s = s.strip() + if _is_url(s): + rs = '{escaped_url}'.format(url=s, + escaped_url=rs) self.write( '%s%s' % (start_tag, rs, kind), indent) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index de74b70cdfaac..356e32d2f43a0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1479,7 +1479,7 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, float_format=None, sparsify=None, index_names=True, justify=None, bold_rows=True, classes=None, escape=True, max_rows=None, max_cols=None, show_dimensions=False, - notebook=False): + notebook=False, urls_as_links=False): """ Render a DataFrame as an HTML table. @@ -1497,6 +1497,8 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, max_cols : int, optional Maximum number of columns to show before truncating. If None, show all. + urls_as_links : boolean, default False + Convert urls to HTML links. """ @@ -1517,7 +1519,8 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, escape=escape, max_rows=max_rows, max_cols=max_cols, - show_dimensions=show_dimensions) + show_dimensions=show_dimensions, + urls_as_links=urls_as_links) formatter.to_html(classes=classes, notebook=notebook) if buf is None: diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 22555a84c55de..850843b2870a5 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -949,6 +949,77 @@ def test_to_html_multiindex_sparsify_false_multi_sparse(self): """ self.assertEqual(result, expected) + def test_to_html_with_hyperlinks(self): + data = [ + { + 'foo': 0, + 'bar': 'http://pandas.pydata.org/', + None: 'pydata.org', + }, + { + 'foo': 0, + 'bar': 'http://pandas.pydata.org/?q1=a&q2=b', + None: 'pydata.org', + }, + ] + df = DataFrame(data, columns=['foo', 'bar', None], + index=range(len(data))) + + result_no_links = df.to_html() + result_with_links = df.to_html(urls_as_links=True) + expected_no_links = """\ + + + + + + + + + + + + + + + + + + + + + + + +
foobarNone
00http://pandas.pydata.org/pydata.org
10http://pandas.pydata.org/?q1=a&q2=bpydata.org
""" + expected_with_links = """\ + + + + + + + + + + + + + + + + + + + + + + + +
foobarNone
00http://pandas.pydata.org/pydata.org
10http://pandas.pydata.org/?q1=a&q2=bpydata.org
""" + self.assertEqual(result_with_links, expected_with_links) + self.assertEqual(result_no_links, expected_no_links) + def test_to_html_multiindex_sparsify(self): index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], names=['foo', None])