Skip to content

Commit

Permalink
Merge pull request #10 from mcs07/refreader
Browse files Browse the repository at this point in the history
Fix issues with reference link extraction using HTML/XML readers
  • Loading branch information
mcs07 authored Nov 1, 2016
2 parents be8ec3a + f59592b commit 608f719
Show file tree
Hide file tree
Showing 3 changed files with 6,241 additions and 3 deletions.
6 changes: 3 additions & 3 deletions chemdataextractor/reader/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,14 @@ def _parse_table_footnotes(self, fns, refs, specials):

def _parse_reference(self, el):
"""Return reference ID from href or text content."""
if el.get('href', '').startswith('#'):
return [el.get('href')[1:]]
if '#' in el.get('href', ''):
return [el.get('href').split('#', 1)[1]]
elif 'rid' in el.attrib:
return [el.attrib['rid']]
elif 'idref' in el.attrib:
return [el.attrib['idref']]
else:
return [el.text.strip()]
return [''.join(el.itertext()).strip()]

def _parse_table(self, el, refs, specials):
caps = self._css(self.table_caption_css, el)
Expand Down
Loading

0 comments on commit 608f719

Please sign in to comment.