Grow buffer based on current size

Until now, the buffer size to grow to has been calculated based on the distance from the current parse position to the end of the buffer. This means that the size of any already-parsed data was not considered, leading to inconsistent buffer growth. Growing the buffer based on its total size makes its growth consistent. The commit includes a test that checks that we can reach the max buffer size (usually INT_MAX/2 + 1) regardless of previously parsed content.
SonyMobile · Oct 19, 2023 · bfa615e · bfa615e
1 parent a68399a
commit bfa615e
Showing 2 changed files with 37 additions and 1 deletion.
diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c
@@ -2126,7 +2126,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
     } else {
       char *newBuf;
       int bufferSize
-          = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
+          = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
       if (bufferSize == 0)
         bufferSize = INIT_BUFFER_SIZE;
       do {

diff --git a/expat/tests/basic_tests.c b/expat/tests/basic_tests.c
@@ -2855,6 +2855,41 @@ START_TEST(test_get_buffer_3_overflow) {
 END_TEST
 #endif // XML_CONTEXT_BYTES > 0
 
+START_TEST(test_buffer_can_grow_to_max) {
+  const char *const prefixes[] = {
+      "",
+      "<",
+      "<x a='",
+      "<doc><x a='",
+      "<document><x a='",
+      "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
+      "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
+      "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
+      "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
+      "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
+  const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
+  const int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
+
+  for (int i = 0; i < num_prefixes; ++i) {
+    set_subtest("\"%s\"", prefixes[i]);
+    XML_Parser parser = XML_ParserCreate(NULL);
+    const int prefix_len = (int)strlen(prefixes[i]);
+    const enum XML_Status s
+        = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
+    if (s != XML_STATUS_OK)
+      xml_failure(parser);
+
+    // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
+    // subtracting the whole prefix is easiest, and close enough.
+    fail_unless(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
+    // The limit should be consistent; no prefix should allow us to
+    // reach above the max buffer size.
+    fail_unless(XML_GetBuffer(parser, maxbuf + 1) == NULL);
+    XML_ParserFree(parser);
+  }
+}
+END_TEST
+
 /* Test position information macros */
 START_TEST(test_byte_info_at_end) {
   const char *text = "<doc></doc>";
@@ -5242,6 +5277,7 @@ make_basic_test_case(Suite *s) {
 #if XML_CONTEXT_BYTES > 0
   tcase_add_test(tc_basic, test_get_buffer_3_overflow);
 #endif
+  tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
   tcase_add_test(tc_basic, test_byte_info_at_end);
   tcase_add_test(tc_basic, test_byte_info_at_error);
   tcase_add_test(tc_basic, test_byte_info_at_cdata);