Skip to content

Commit

Permalink
Merge branch 'add-footer-to-read-html' of https://github.com/mjsu/pandas
Browse files Browse the repository at this point in the history
 into mjsu-add-footer-to-read-html

Conflicts:
	doc/source/whatsnew/v0.15.2.txt
  • Loading branch information
jreback committed Dec 6, 2014
2 parents 6cd7490 + 7587bf1 commit 7bd1b24
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ Enhancements
- Added context manager to ``HDFStore`` for automatic closing (:issue:`8791`).
- ``to_datetime`` gains an ``exact`` keyword to allow for a format to not require an exact match for a provided format string (if its ``False). ``exact`` defaults to ``True`` (meaning that exact matching is still the default) (:issue:`8904`)
- Added ``axvlines`` boolean option to parallel_coordinates plot function, determines whether vertical lines will be printed, default is True
- Added ability to read table footers to read_html (:issue:`8552`)

.. _whatsnew_0152.performance:

Expand Down
7 changes: 5 additions & 2 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def _parse_raw_thead(self, table):
table.xpath(expr)]

def _parse_raw_tfoot(self, table):
expr = './/tfoot//th'
expr = './/tfoot//th|//tfoot//td'
return [_remove_whitespace(x.text_content()) for x in
table.xpath(expr)]

Expand All @@ -594,14 +594,17 @@ def _expand_elements(body):

def _data_to_frame(data, header, index_col, skiprows, infer_types,
parse_dates, tupleize_cols, thousands):
head, body, _ = data # _ is footer which is rarely used: ignore for now
head, body, foot = data

if head:
body = [head] + body

if header is None: # special case when a table has <th> elements
header = 0

if foot:
body += [foot]

# fill out elements of body that are "ragged"
_expand_elements(body)

Expand Down
32 changes: 32 additions & 0 deletions pandas/io/tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,38 @@ def test_empty_tables(self):
res1 = self.read_html(StringIO(data1))
res2 = self.read_html(StringIO(data2))
assert_framelist_equal(res1, res2)

def test_tfoot_read(self):
"""
Make sure that read_html reads tfoot, containing td or th.
Ignores empty tfoot
"""
data_template = '''<table>
<thead>
<tr>
<th>A</th>
<th>B</th>
</tr>
</thead>
<tbody>
<tr>
<td>bodyA</td>
<td>bodyB</td>
</tr>
</tbody>
<tfoot>
{footer}
</tfoot>
</table>'''

data1 = data_template.format(footer = "")
data2 = data_template.format(footer ="<tr><td>footA</td><th>footB</th></tr>")

d1 = {'A': ['bodyA'], 'B': ['bodyB']}
d2 = {'A': ['bodyA', 'footA'], 'B': ['bodyB', 'footB']}

tm.assert_frame_equal(self.read_html(data1)[0], DataFrame(d1))
tm.assert_frame_equal(self.read_html(data2)[0], DataFrame(d2))

def test_countries_municipalities(self):
# GH5048
Expand Down

0 comments on commit 7bd1b24

Please sign in to comment.