From 41e7df0013d6b70f3678b2d5361173e48ec3255c Mon Sep 17 00:00:00 2001 From: artreven Date: Wed, 7 Jun 2017 12:23:00 +0200 Subject: [PATCH 1/2] Add support to parsing large xml inputs --- rdflib/plugins/sparql/results/xmlresults.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rdflib/plugins/sparql/results/xmlresults.py b/rdflib/plugins/sparql/results/xmlresults.py index 4d8c866b1..ca76398bf 100644 --- a/rdflib/plugins/sparql/results/xmlresults.py +++ b/rdflib/plugins/sparql/results/xmlresults.py @@ -1,4 +1,5 @@ import logging +from io import BytesIO from xml.sax.saxutils import XMLGenerator from xml.dom import XML_NAMESPACE @@ -48,7 +49,8 @@ def __init__(self, source): if isinstance(xmlstring, text_type): xmlstring = xmlstring.encode('utf-8') try: - tree = etree.fromstring(xmlstring) + parser = etree.XMLParser(huge_tree=True) + tree = etree.parse(BytesIO(xmlstring), parser) except Exception as e: log.exception("Error parsing XML results: %s"%xmlstring) raise e From 85309a5976c7c35959ce45474cf0c650ac6fac68 Mon Sep 17 00:00:00 2001 From: artreven Date: Tue, 14 Nov 2017 11:40:43 +0100 Subject: [PATCH 2/2] Add except clause to fallback to default xml parsing in case of not having lxml --- rdflib/plugins/sparql/results/xmlresults.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rdflib/plugins/sparql/results/xmlresults.py b/rdflib/plugins/sparql/results/xmlresults.py index ca76398bf..9e0019198 100644 --- a/rdflib/plugins/sparql/results/xmlresults.py +++ b/rdflib/plugins/sparql/results/xmlresults.py @@ -51,6 +51,8 @@ def __init__(self, source): try: parser = etree.XMLParser(huge_tree=True) tree = etree.parse(BytesIO(xmlstring), parser) + except TypeError: + tree = etree.fromstring(xmlstring) except Exception as e: log.exception("Error parsing XML results: %s"%xmlstring) raise e