From 21eeaf0a6044318c0fe42e4487e84dcb853af490 Mon Sep 17 00:00:00 2001 From: Asmir Mustafic Date: Thu, 6 Feb 2020 11:24:55 +0100 Subject: [PATCH] prevent infinite loop on unterminated entity declaration at end of stream --- src/HTML5/Parser/Tokenizer.php | 7 +++++++ test/HTML5/Parser/DOMTreeBuilderTest.php | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index f4e9652..0fd1ee7 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -1111,6 +1111,13 @@ protected function decodeCharacterReference($inAttribute = false) if ('#' === $tok) { $tok = $this->scanner->next(); + if (false === $tok) { + $this->parseError('Expected &#DEC; &#HEX;, got EOF'); + $this->scanner->unconsume(1); + + return '&'; + } + // Hexidecimal encoding. // X[0-9a-fA-F]+; // x[0-9a-fA-F]+; diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index b5940c2..00e9a47 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -133,6 +133,14 @@ public function testBareAmpersandNotAllowedInBody() ', $doc->saveXML()); } + public function testEntityAtEndOfFile() + { + $fragment = $this->parseFragment('&#'); + $this->assertInstanceOf('DOMDocumentFragment', $fragment); + $this->assertSame('&#', $fragment->textContent); + $this->assertEquals('Line 1, Col 2: Expected &#DEC; &#HEX;, got EOF', $this->errors[0]); + } + public function testStrangeCapitalization() { $html = '