From 05e809a08fc7c317e7d3405cd16cf02073bde8f6 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Thu, 29 Feb 2024 23:52:50 +0100 Subject: [PATCH] gh-115398: Expose Expat >=2.6.0 reparse deferral API (CVE-2023-52425) (GH-115623) Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding five new methods: - `xml.etree.ElementTree.XMLParser.flush` - `xml.etree.ElementTree.XMLPullParser.flush` - `xml.parsers.expat.xmlparser.GetReparseDeferralEnabled` - `xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` - `xml.sax.expatreader.ExpatParser.flush` Based on the "flush" idea from https://github.com/python/cpython/pull/115138#issuecomment-1932444270 . - Please treat as a security fix related to CVE-2023-52425. Includes code suggested-by: Snild Dolkow and by core dev Serhiy Storchaka. (cherry picked from commit 6a95676bb526261434dd068d6c49927c44d24a9b) --- Doc/library/pyexpat.rst | 31 ++++++++ Doc/library/xml.etree.elementtree.rst | 29 +++++++ Include/pyexpat.h | 4 +- Lib/test/test_pyexpat.py | 54 +++++++++++++ Lib/test/test_sax.py | 51 ++++++++++++ Lib/test/test_xml_etree.py | 79 +++++++++++++++---- Lib/xml/etree/ElementTree.py | 14 ++++ Lib/xml/sax/expatreader.py | 14 ++++ ...-02-18-03-14-40.gh-issue-115398.tzvxH8.rst | 8 ++ Modules/_elementtree.c | 33 ++++++++ Modules/clinic/_elementtree.c.h | 19 ++++- Modules/clinic/pyexpat.c.h | 49 +++++++++++- Modules/expat/pyexpatns.h | 1 + Modules/pyexpat.c | 53 +++++++++++++ 14 files changed, 420 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index a6ae8fdaa4991c..d92bb9797c9be7 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -196,6 +196,37 @@ XMLParser Objects :exc:`ExpatError` to be raised with the :attr:`code` attribute set to ``errors.codes[errors.XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING]``. +.. method:: xmlparser.SetReparseDeferralEnabled(enabled) + + .. warning:: + + Calling ``SetReparseDeferralEnabled(False)`` has security implications, + as detailed below; please make sure to understand these consequences + prior to using the ``SetReparseDeferralEnabled`` method. + + Expat 2.6.0 introduced a security mechanism called "reparse deferral" + where instead of causing denial of service through quadratic runtime + from reparsing large tokens, reparsing of unfinished tokens is now delayed + by default until a sufficient amount of input is reached. + Due to this delay, registered handlers may — depending of the sizing of + input chunks pushed to Expat — no longer be called right after pushing new + input to the parser. Where immediate feedback and taking over responsiblity + of protecting against denial of service from large tokens are both wanted, + calling ``SetReparseDeferralEnabled(False)`` disables reparse deferral + for the current Expat parser instance, temporarily or altogether. + Calling ``SetReparseDeferralEnabled(True)`` allows re-enabling reparse + deferral. + + .. versionadded:: 3.11.9 + +.. method:: xmlparser.GetReparseDeferralEnabled() + + Returns whether reparse deferral is currently enabled for the given + Expat parser instance. + + .. versionadded:: 3.11.9 + + :class:`xmlparser` objects have the following attributes: diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 0e5c281a329605..44e159adb4319e 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -166,6 +166,11 @@ data but would still like to have incremental parsing capabilities, take a look at :func:`iterparse`. It can be useful when you're reading a large XML document and don't want to hold it wholly in memory. +Where *immediate* feedback through events is wanted, calling method +:meth:`XMLPullParser.flush` can help reduce delay; +please make sure to study the related security notes. + + Finding interesting elements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1378,6 +1383,19 @@ XMLParser Objects Feeds data to the parser. *data* is encoded data. + + .. method:: flush() + + Triggers parsing of any previously fed unparsed data, which can be + used to ensure more immediate feedback, in particular with Expat >=2.6.0. + The implementation of :meth:`flush` temporarily disables reparse deferral + with Expat (if currently enabled) and triggers a reparse. + Disabling reparse deferral has security consequences; please see + :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details. + + .. versionadded:: 3.11.9 + + :meth:`XMLParser.feed` calls *target*\'s ``start(tag, attrs_dict)`` method for each opening tag, its ``end(tag)`` method for each closing tag, and data is processed by method ``data(data)``. For further supported callback @@ -1439,6 +1457,17 @@ XMLPullParser Objects Feed the given bytes data to the parser. + .. method:: flush() + + Triggers parsing of any previously fed unparsed data, which can be + used to ensure more immediate feedback, in particular with Expat >=2.6.0. + The implementation of :meth:`flush` temporarily disables reparse deferral + with Expat (if currently enabled) and triggers a reparse. + Disabling reparse deferral has security consequences; please see + :meth:`xml.parsers.expat.xmlparser.SetReparseDeferralEnabled` for details. + + .. versionadded:: 3.11.9 + .. method:: close() Signal the parser that the data stream is terminated. Unlike diff --git a/Include/pyexpat.h b/Include/pyexpat.h index 07020b5dc964cb..9824d099c3df7d 100644 --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -48,8 +48,10 @@ struct PyExpat_CAPI enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding); int (*DefaultUnknownEncodingHandler)( void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); - /* might be none for expat < 2.1.0 */ + /* might be NULL for expat < 2.1.0 */ int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt); + /* might be NULL for expat < 2.6.0 */ + XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled); /* always add new stuff to the end! */ }; diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index fa19c0ad9eebdb..44bd1deed1f0f5 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -758,5 +758,59 @@ def resolve_entity(context, base, system_id, public_id): self.assertEqual(handler_call_args, [("bar", "baz")]) +class ReparseDeferralTest(unittest.TestCase): + def test_getter_setter_round_trip(self): + parser = expat.ParserCreate() + enabled = (expat.version_info >= (2, 6, 0)) + + self.assertIs(parser.GetReparseDeferralEnabled(), enabled) + parser.SetReparseDeferralEnabled(False) + self.assertIs(parser.GetReparseDeferralEnabled(), False) + parser.SetReparseDeferralEnabled(True) + self.assertIs(parser.GetReparseDeferralEnabled(), enabled) + + def test_reparse_deferral_enabled(self): + if expat.version_info < (2, 6, 0): + self.skipTest(f'Expat {expat.version_info} does not ' + 'support reparse deferral') + + started = [] + + def start_element(name, _): + started.append(name) + + parser = expat.ParserCreate() + parser.StartElementHandler = start_element + self.assertTrue(parser.GetReparseDeferralEnabled()) + + for chunk in (b''): + parser.Parse(chunk, False) + + # The key test: Have handlers already fired? Expecting: no. + self.assertEqual(started, []) + + parser.Parse(b'', True) + + self.assertEqual(started, ['doc']) + + def test_reparse_deferral_disabled(self): + started = [] + + def start_element(name, _): + started.append(name) + + parser = expat.ParserCreate() + parser.StartElementHandler = start_element + if expat.version_info >= (2, 6, 0): + parser.SetReparseDeferralEnabled(False) + self.assertFalse(parser.GetReparseDeferralEnabled()) + + for chunk in (b''): + parser.Parse(chunk, False) + + # The key test: Have handlers already fired? Expecting: yes. + self.assertEqual(started, ['doc']) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py index eda4e6a46df437..97e96668f85c8a 100644 --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -19,6 +19,7 @@ from io import BytesIO, StringIO import codecs import os.path +import pyexpat import shutil import sys from urllib.error import URLError @@ -1214,6 +1215,56 @@ def test_expat_incremental_reset(self): self.assertEqual(result.getvalue(), start + b"text") + def test_flush_reparse_deferral_enabled(self): + if pyexpat.version_info < (2, 6, 0): + self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral') + + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + for chunk in (""): + parser.feed(chunk) + + self.assertEqual(result.getvalue(), start) # i.e. no elements started + self.assertTrue(parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assertTrue(parser._parser.GetReparseDeferralEnabled()) + self.assertEqual(result.getvalue(), start + b"") + + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + + def test_flush_reparse_deferral_disabled(self): + result = BytesIO() + xmlgen = XMLGenerator(result) + parser = create_parser() + parser.setContentHandler(xmlgen) + + for chunk in (""): + parser.feed(chunk) + + if pyexpat.version_info >= (2, 6, 0): + parser._parser.SetReparseDeferralEnabled(False) + + self.assertEqual(result.getvalue(), start) # i.e. no elements started + self.assertFalse(parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assertFalse(parser._parser.GetReparseDeferralEnabled()) + self.assertEqual(result.getvalue(), start + b"") + + parser.feed("") + parser.close() + + self.assertEqual(result.getvalue(), start + b"") + # ===== Locator support def test_expat_locator_noinfo(self): diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index fa03f381fac92a..7d95eb397ad87e 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -121,10 +121,6 @@ """ -fails_with_expat_2_6_0 = (unittest.expectedFailure - if pyexpat.version_info >= (2, 6, 0) else - lambda test: test) - def checkwarnings(*filters, quiet=False): def decorator(test): def newtest(*args, **kwargs): @@ -1382,12 +1378,14 @@ def test_attlist_default(self): class XMLPullParserTest(unittest.TestCase): - def _feed(self, parser, data, chunk_size=None): + def _feed(self, parser, data, chunk_size=None, flush=False): if chunk_size is None: parser.feed(data) else: for i in range(0, len(data), chunk_size): parser.feed(data[i:i+chunk_size]) + if flush: + parser.flush() def assert_events(self, parser, expected, max_events=None): self.assertEqual( @@ -1405,34 +1403,32 @@ def assert_event_tags(self, parser, expected, max_events=None): self.assertEqual([(action, elem.tag) for action, elem in events], expected) - def test_simple_xml(self, chunk_size=None): + def test_simple_xml(self, chunk_size=None, flush=False): parser = ET.XMLPullParser() self.assert_event_tags(parser, []) - self._feed(parser, "\n", chunk_size) + self._feed(parser, "\n", chunk_size, flush) self.assert_event_tags(parser, []) self._feed(parser, "\n text\n", chunk_size) + self._feed(parser, ">\n", chunk_size, flush) self.assert_event_tags(parser, [('end', 'element')]) - self._feed(parser, "texttail\n", chunk_size) - self._feed(parser, "\n", chunk_size) + self._feed(parser, "texttail\n", chunk_size, flush) + self._feed(parser, "\n", chunk_size, flush) self.assert_event_tags(parser, [ ('end', 'element'), ('end', 'empty-element'), ]) - self._feed(parser, "\n", chunk_size) + self._feed(parser, "\n", chunk_size, flush) self.assert_event_tags(parser, [('end', 'root')]) self.assertIsNone(parser.close()) - @fails_with_expat_2_6_0 def test_simple_xml_chunk_1(self): - self.test_simple_xml(chunk_size=1) + self.test_simple_xml(chunk_size=1, flush=True) - @fails_with_expat_2_6_0 def test_simple_xml_chunk_5(self): - self.test_simple_xml(chunk_size=5) + self.test_simple_xml(chunk_size=5, flush=True) def test_simple_xml_chunk_22(self): self.test_simple_xml(chunk_size=22) @@ -1631,6 +1627,57 @@ def test_unknown_event(self): with self.assertRaises(ValueError): ET.XMLPullParser(events=('start', 'end', 'bogus')) + def test_flush_reparse_deferral_enabled(self): + if pyexpat.version_info < (2, 6, 0): + self.skipTest(f'Expat {pyexpat.version_info} does not ' + 'support reparse deferral') + + parser = ET.XMLPullParser(events=('start', 'end')) + + for chunk in (""): + parser.feed(chunk) + + self.assert_event_tags(parser, []) # i.e. no elements started + if ET is pyET: + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assert_event_tags(parser, [('start', 'doc')]) + if ET is pyET: + self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.feed("") + parser.close() + + self.assert_event_tags(parser, [('end', 'doc')]) + + def test_flush_reparse_deferral_disabled(self): + parser = ET.XMLPullParser(events=('start', 'end')) + + for chunk in (""): + parser.feed(chunk) + + if pyexpat.version_info >= (2, 6, 0): + if not ET is pyET: + self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled ' + 'methods not available in C') + parser._parser._parser.SetReparseDeferralEnabled(False) + + self.assert_event_tags(parser, []) # i.e. no elements started + if ET is pyET: + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.flush() + + self.assert_event_tags(parser, [('start', 'doc')]) + if ET is pyET: + self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) + + parser.feed("") + parser.close() + + self.assert_event_tags(parser, [('end', 'doc')]) # # xinclude tests (samples from appendix C of the xinclude specification) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index fce0c2963a28d5..5f32649c7fde83 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1328,6 +1328,11 @@ def read_events(self): else: yield event + def flush(self): + if self._parser is None: + raise ValueError("flush() called after end of stream") + self._parser.flush() + def XML(text, parser=None): """Parse XML document from string constant. @@ -1734,6 +1739,15 @@ def close(self): del self.parser, self._parser del self.target, self._target + def flush(self): + was_enabled = self.parser.GetReparseDeferralEnabled() + try: + self.parser.SetReparseDeferralEnabled(False) + self.parser.Parse(b"", False) + except self._error as v: + self._raiseerror(v) + finally: + self.parser.SetReparseDeferralEnabled(was_enabled) # -------------------------------------------------------------------- # C14N 2.0 diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py index e334ac9fea0d36..2f7c87b126c993 100644 --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -220,6 +220,20 @@ def feed(self, data, isFinal=False): # FIXME: when to invoke error()? self._err_handler.fatalError(exc) + def flush(self): + if self._parser is None: + return + + was_enabled = self._parser.GetReparseDeferralEnabled() + try: + self._parser.SetReparseDeferralEnabled(False) + self._parser.Parse(b"", False) + except expat.error as e: + exc = SAXParseException(expat.ErrorString(e.code), e, self) + self._err_handler.fatalError(exc) + finally: + self._parser.SetReparseDeferralEnabled(was_enabled) + def _close_source(self): source = self._source try: diff --git a/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst b/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst new file mode 100644 index 00000000000000..97b23936928d91 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-02-18-03-14-40.gh-issue-115398.tzvxH8.rst @@ -0,0 +1,8 @@ +Allow controlling Expat >=2.6.0 reparse deferral (CVE-2023-52425) by adding +five new methods: + +* ``xml.etree.ElementTree.XMLParser.flush`` +* ``xml.etree.ElementTree.XMLPullParser.flush`` +* ``xml.parsers.expat.xmlparser.GetReparseDeferralEnabled`` +* ``xml.parsers.expat.xmlparser.SetReparseDeferralEnabled`` +* ``xml.sax.expatreader.ExpatParser.flush`` diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 12d22337d8217a..a52fea2e483940 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3875,6 +3875,38 @@ _elementtree_XMLParser_close_impl(XMLParserObject *self) } } +/*[clinic input] +_elementtree.XMLParser.flush + +[clinic start generated code]*/ + +static PyObject * +_elementtree_XMLParser_flush_impl(XMLParserObject *self) +/*[clinic end generated code: output=42fdb8795ca24509 input=effbecdb28715949]*/ +{ + if (!_check_xmlparser(self)) { + return NULL; + } + + if (EXPAT(SetReparseDeferralEnabled) == NULL) { + Py_RETURN_NONE; + } + + // NOTE: The Expat parser in the C implementation of ElementTree is not + // exposed to the outside; as a result we known that reparse deferral + // is currently enabled, or we would not even have access to function + // XML_SetReparseDeferralEnabled in the first place (which we checked + // for, a few lines up). + + EXPAT(SetReparseDeferralEnabled)(self->parser, XML_FALSE); + + PyObject *res = expat_parse(self, "", 0, XML_FALSE); + + EXPAT(SetReparseDeferralEnabled)(self->parser, XML_TRUE); + + return res; +} + /*[clinic input] _elementtree.XMLParser.feed @@ -4302,6 +4334,7 @@ static PyTypeObject TreeBuilder_Type = { static PyMethodDef xmlparser_methods[] = { _ELEMENTTREE_XMLPARSER_FEED_METHODDEF _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF + _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF {NULL, NULL} diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h index 047203eefa3579..fb02a5c1b96c64 100644 --- a/Modules/clinic/_elementtree.c.h +++ b/Modules/clinic/_elementtree.c.h @@ -865,6 +865,23 @@ _elementtree_XMLParser_close(XMLParserObject *self, PyObject *Py_UNUSED(ignored) return _elementtree_XMLParser_close_impl(self); } +PyDoc_STRVAR(_elementtree_XMLParser_flush__doc__, +"flush($self, /)\n" +"--\n" +"\n"); + +#define _ELEMENTTREE_XMLPARSER_FLUSH_METHODDEF \ + {"flush", (PyCFunction)_elementtree_XMLParser_flush, METH_NOARGS, _elementtree_XMLParser_flush__doc__}, + +static PyObject * +_elementtree_XMLParser_flush_impl(XMLParserObject *self); + +static PyObject * +_elementtree_XMLParser_flush(XMLParserObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _elementtree_XMLParser_flush_impl(self); +} + PyDoc_STRVAR(_elementtree_XMLParser_feed__doc__, "feed($self, data, /)\n" "--\n" @@ -915,4 +932,4 @@ _elementtree_XMLParser__setevents(XMLParserObject *self, PyObject *const *args, exit: return return_value; } -/*[clinic end generated code: output=3fd6fa2ce1aeca76 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=88a3c8b7164a6474 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index b2648320aad78d..8f152230ea9289 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -2,6 +2,53 @@ preserve [clinic start generated code]*/ +PyDoc_STRVAR(pyexpat_xmlparser_SetReparseDeferralEnabled__doc__, +"SetReparseDeferralEnabled($self, enabled, /)\n" +"--\n" +"\n" +"Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0."); + +#define PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF \ + {"SetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_SetReparseDeferralEnabled, METH_O, pyexpat_xmlparser_SetReparseDeferralEnabled__doc__}, + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self, + int enabled); + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled(xmlparseobject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + int enabled; + + enabled = PyObject_IsTrue(arg); + if (enabled < 0) { + goto exit; + } + return_value = pyexpat_xmlparser_SetReparseDeferralEnabled_impl(self, enabled); + +exit: + return return_value; +} + +PyDoc_STRVAR(pyexpat_xmlparser_GetReparseDeferralEnabled__doc__, +"GetReparseDeferralEnabled($self, /)\n" +"--\n" +"\n" +"Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0."); + +#define PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF \ + {"GetReparseDeferralEnabled", (PyCFunction)pyexpat_xmlparser_GetReparseDeferralEnabled, METH_NOARGS, pyexpat_xmlparser_GetReparseDeferralEnabled__doc__}, + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self); + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled(xmlparseobject *self, PyObject *Py_UNUSED(ignored)) +{ + return pyexpat_xmlparser_GetReparseDeferralEnabled_impl(self); +} + PyDoc_STRVAR(pyexpat_xmlparser_Parse__doc__, "Parse($self, data, isfinal=False, /)\n" "--\n" @@ -425,4 +472,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=3e333b89da3aa58c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8d544b917e35add6 input=a9049054013a1b77]*/ diff --git a/Modules/expat/pyexpatns.h b/Modules/expat/pyexpatns.h index d45d9b6c457159..8ee03ef0792815 100644 --- a/Modules/expat/pyexpatns.h +++ b/Modules/expat/pyexpatns.h @@ -108,6 +108,7 @@ #define XML_SetNotStandaloneHandler PyExpat_XML_SetNotStandaloneHandler #define XML_SetParamEntityParsing PyExpat_XML_SetParamEntityParsing #define XML_SetProcessingInstructionHandler PyExpat_XML_SetProcessingInstructionHandler +#define XML_SetReparseDeferralEnabled PyExpat_XML_SetReparseDeferralEnabled #define XML_SetReturnNSTriplet PyExpat_XML_SetReturnNSTriplet #define XML_SetSkippedEntityHandler PyExpat_XML_SetSkippedEntityHandler #define XML_SetStartCdataSectionHandler PyExpat_XML_SetStartCdataSectionHandler diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 226887cfacc0a2..bc14a691318814 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1,6 +1,7 @@ #include "Python.h" #include +#include #include "structmember.h" // PyMemberDef #include "expat.h" @@ -76,6 +77,12 @@ typedef struct { /* NULL if not enabled */ int buffer_size; /* Size of buffer, in XML_Char units */ int buffer_used; /* Buffer units in use */ + bool reparse_deferral_enabled; /* Whether to defer reparsing of + unfinished XML tokens; a de-facto cache of + what Expat has the authority on, for lack + of a getter API function + "XML_GetReparseDeferralEnabled" in Expat + 2.6.0 */ PyObject *intern; /* Dictionary to intern strings */ PyObject **handlers; } xmlparseobject; @@ -705,6 +712,40 @@ get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv) #define MAX_CHUNK_SIZE (1 << 20) +/*[clinic input] +pyexpat.xmlparser.SetReparseDeferralEnabled + + enabled: bool + / + +Enable/Disable reparse deferral; enabled by default with Expat >=2.6.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_SetReparseDeferralEnabled_impl(xmlparseobject *self, + int enabled) +/*[clinic end generated code: output=5ec539e3b63c8c49 input=021eb9e0bafc32c5]*/ +{ +#if XML_COMBINED_VERSION >= 20600 + XML_SetReparseDeferralEnabled(self->itself, enabled ? XML_TRUE : XML_FALSE); + self->reparse_deferral_enabled = (bool)enabled; +#endif + Py_RETURN_NONE; +} + +/*[clinic input] +pyexpat.xmlparser.GetReparseDeferralEnabled + +Retrieve reparse deferral enabled status; always returns false with Expat <2.6.0. +[clinic start generated code]*/ + +static PyObject * +pyexpat_xmlparser_GetReparseDeferralEnabled_impl(xmlparseobject *self) +/*[clinic end generated code: output=4e91312e88a595a8 input=54b5f11d32b20f3e]*/ +{ + return PyBool_FromLong(self->reparse_deferral_enabled); +} + /*[clinic input] pyexpat.xmlparser.Parse @@ -1067,6 +1108,8 @@ static struct PyMethodDef xmlparse_methods[] = { #if XML_COMBINED_VERSION >= 19505 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif + PYEXPAT_XMLPARSER_SETREPARSEDEFERRALENABLED_METHODDEF + PYEXPAT_XMLPARSER_GETREPARSEDEFERRALENABLED_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -1150,6 +1193,11 @@ newxmlparseobject(pyexpat_state *state, const char *encoding, self->handlers = NULL; self->intern = intern; Py_XINCREF(self->intern); +#if XML_COMBINED_VERSION >= 20600 + self->reparse_deferral_enabled = true; +#else + self->reparse_deferral_enabled = false; +#endif /* namespace_separator is either NULL or contains one char + \0 */ self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler, @@ -2005,6 +2053,11 @@ pyexpat_exec(PyObject *mod) #else capi.SetHashSalt = NULL; #endif +#if XML_COMBINED_VERSION >= 20600 + capi.SetReparseDeferralEnabled = XML_SetReparseDeferralEnabled; +#else + capi.SetReparseDeferralEnabled = NULL; +#endif /* export using capsule */ PyObject *capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);