From 8bbbc80abc9afade748a82d01f50a002b68501f5 Mon Sep 17 00:00:00 2001 From: Dan Scott Date: Sat, 16 Jan 2016 14:13:02 -0500 Subject: [PATCH] Prevent RDFa parser from failing on time elements with child nodes The _get_literal() function in the RDFa parser referred to a self instance that does not exist. This led to a NameError exception if the HTML to be parsed included a TIME element with one or more child nodes, such as ''. Removing the reference to 'self' fixes the problem and fixes #576. Signed-off-by: Dan Scott --- rdflib/plugins/parsers/pyRdfa/host/html5.py | 2 +- test/test_issue576.py | 26 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 test/test_issue576.py diff --git a/rdflib/plugins/parsers/pyRdfa/host/html5.py b/rdflib/plugins/parsers/pyRdfa/host/html5.py index b7420aad0..1e839a2c2 100644 --- a/rdflib/plugins/parsers/pyRdfa/host/html5.py +++ b/rdflib/plugins/parsers/pyRdfa/host/html5.py @@ -172,7 +172,7 @@ def _get_literal(Pnode): if node.nodeType == node.TEXT_NODE: rc = rc + node.data elif node.nodeType == node.ELEMENT_NODE : - rc = rc + self._get_literal(node) + rc = rc + _get_literal(node) if state.options.space_preserve : return rc else : diff --git a/test/test_issue576.py b/test/test_issue576.py new file mode 100644 index 000000000..250138ab8 --- /dev/null +++ b/test/test_issue576.py @@ -0,0 +1,26 @@ +import rdflib + +html = """ + + + Boom + + + + + +""" + + +def test_time_child_element(): + """ + Ensure TIME elements that contain child nodes parse cleanly + """ + g = rdflib.Graph() + g.parse(data=html, format='rdfa') + date = g.value( + rdflib.URIRef("http://example.com/"), + rdflib.URIRef("http://schema.org/dateCreated") + ) + assert len(g) == 3 + assert date == rdflib.term.Literal("2016-01-01")