Skip to content

Commit

Permalink
Add option for DataFrame.to_html() to render URL data as links (panda…
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminarjun authored and Pingviinituutti committed Feb 28, 2019
1 parent 5bc5fb6 commit f411e73
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 8 deletions.
22 changes: 22 additions & 0 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2596,6 +2596,28 @@ table CSS classes. Note that these classes are *appended* to the existing
print(df.to_html(classes=['awesome_table_class', 'even_more_awesome_class']))
The ``render_links`` argument provides the ability to add hyperlinks to cells
that contain URLs.

.. versionadded:: 0.24

.. ipython:: python
url_df = pd.DataFrame({
'name': ['Python', 'Pandas'],
'url': ['https://www.python.org/', 'http://pandas.pydata.org']})
print(url_df.to_html(render_links=True))
.. ipython:: python
:suppress:
write_html(url_df, 'render_links', render_links=True)
HTML:

.. raw:: html
:file: _static/render_links.html

Finally, the ``escape`` argument allows you to control whether the
"<", ">" and "&" characters escaped in the resulting HTML (by default it is
``True``). So to get the HTML without escaped characters pass ``escape=False``
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ New features
- :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`)
- :func:`DataFrame.read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`)
- :func:`DataFrame.to_html` now accepts ``render_links`` as an argument, allowing the user to generate HTML with links to any URLs that appear in the DataFrame.
See the :ref:`section on writing HTML <io.html>` in the IO docs for example usage. (:issue:`2679`)

.. _whatsnew_0240.values_api:

Expand Down
13 changes: 10 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2044,8 +2044,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
index=True, na_rep='NaN', formatters=None, float_format=None,
sparsify=None, index_names=True, justify=None, max_rows=None,
max_cols=None, show_dimensions=False, decimal='.',
bold_rows=True, classes=None, escape=True,
notebook=False, border=None, table_id=None):
bold_rows=True, classes=None, escape=True, notebook=False,
border=None, table_id=None, render_links=False):
"""
Render a DataFrame as an HTML table.
%(shared_params)s
Expand All @@ -2067,6 +2067,12 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
A css id is included in the opening `<table>` tag if specified.
.. versionadded:: 0.23.0
render_links : bool, default False
Convert URLs to HTML links.
.. versionadded:: 0.24.0
%(returns)s
See Also
--------
Expand All @@ -2088,7 +2094,8 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True,
max_rows=max_rows,
max_cols=max_cols,
show_dimensions=show_dimensions,
decimal=decimal, table_id=table_id)
decimal=decimal, table_id=table_id,
render_links=render_links)
# TODO: a generic formatter wld b in DataFrameFormatter
formatter.to_html(classes=classes, notebook=notebook, border=border)

Expand Down
6 changes: 4 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
justify=None, float_format=None, sparsify=None,
index_names=True, line_width=None, max_rows=None,
max_cols=None, show_dimensions=False, decimal='.',
table_id=None, **kwds):
table_id=None, render_links=False, **kwds):
self.frame = frame
if buf is not None:
self.buf = _expand_user(_stringify_path(buf))
Expand All @@ -410,6 +410,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
len(self.frame))
self.show_dimensions = show_dimensions
self.table_id = table_id
self.render_links = render_links

if justify is None:
self.justify = get_option("display.colheader_justify")
Expand Down Expand Up @@ -731,7 +732,8 @@ def to_html(self, classes=None, notebook=False, border=None):
"""
from pandas.io.formats.html import HTMLFormatter
html_renderer = HTMLFormatter(self, classes=classes, notebook=notebook,
border=border, table_id=self.table_id)
border=border, table_id=self.table_id,
render_links=self.render_links)
if hasattr(self.buf, 'write'):
html_renderer.write_result(self.buf)
elif isinstance(self.buf, compat.string_types):
Expand Down
18 changes: 15 additions & 3 deletions pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pandas.core.common as com
from pandas.core.config import get_option

from pandas.io.common import _is_url
from pandas.io.formats.format import (
TableFormatter, buffer_put_lines, get_level_lengths)
from pandas.io.formats.printing import pprint_thing
Expand All @@ -25,7 +26,7 @@ class HTMLFormatter(TableFormatter):
indent_delta = 2

def __init__(self, formatter, classes=None, notebook=False, border=None,
table_id=None):
table_id=None, render_links=False):
self.fmt = formatter
self.classes = classes

Expand All @@ -40,6 +41,7 @@ def __init__(self, formatter, classes=None, notebook=False, border=None,
border = get_option('display.html.border')
self.border = border
self.table_id = table_id
self.render_links = render_links

@property
def is_truncated(self):
Expand Down Expand Up @@ -76,9 +78,19 @@ def _write_cell(self, s, kind='td', indent=0, tags=None):
('>', r'&gt;')])
else:
esc = {}

rs = pprint_thing(s, escape_chars=esc).strip()
self.write(u'{start}{rs}</{kind}>'
.format(start=start_tag, rs=rs, kind=kind), indent)

if self.render_links and _is_url(rs):
rs_unescaped = pprint_thing(s, escape_chars={}).strip()
start_tag += '<a href="{url}" target="_blank">'.format(
url=rs_unescaped)
end_a = '</a>'
else:
end_a = ''

self.write(u'{start}{rs}{end_a}</{kind}>'.format(
start=start_tag, rs=rs, end_a=end_a, kind=kind), indent)

def write_tr(self, line, indent=0, indent_delta=0, header=False,
align=None, tags=None, nindex_levels=0):
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/io/formats/data/render_links_false.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>foo</th>
<th>bar</th>
<th>None</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>0</td>
<td>http://pandas.pydata.org/?q1=a&amp;q2=b</td>
<td>pydata.org</td>
</tr>
<tr>
<th>1</th>
<td>0</td>
<td>www.pydata.org</td>
<td>pydata.org</td>
</tr>
</tbody>
</table>
24 changes: 24 additions & 0 deletions pandas/tests/io/formats/data/render_links_true.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>foo</th>
<th>bar</th>
<th>None</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>0</td>
<td><a href="http://pandas.pydata.org/?q1=a&q2=b" target="_blank">http://pandas.pydata.org/?q1=a&amp;q2=b</a></td>
<td>pydata.org</td>
</tr>
<tr>
<th>1</th>
<td>0</td>
<td>www.pydata.org</td>
<td>pydata.org</td>
</tr>
</tbody>
</table>
16 changes: 16 additions & 0 deletions pandas/tests/io/formats/test_to_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,3 +477,19 @@ def test_to_html_float_format_no_fixed_width(self, datapath):
df = DataFrame({'x': [100.0]})
expected = expected_html(datapath, 'gh22270_expected_output')
assert df.to_html(float_format='%.0f') == expected

@pytest.mark.parametrize("render_links, file_name", [
(True, 'render_links_true'),
(False, 'render_links_false'),
])
def test_to_html_render_links(self, render_links, file_name, datapath):
# GH 2679
data = [
[0, 'http://pandas.pydata.org/?q1=a&q2=b', 'pydata.org'],
[0, 'www.pydata.org', 'pydata.org']
]
df = DataFrame(data, columns=['foo', 'bar', None])

result = df.to_html(render_links=render_links)
expected = expected_html(datapath, file_name)
assert result == expected

0 comments on commit f411e73

Please sign in to comment.