diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index f4e9652..0fd1ee7 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -1111,6 +1111,13 @@ protected function decodeCharacterReference($inAttribute = false) if ('#' === $tok) { $tok = $this->scanner->next(); + if (false === $tok) { + $this->parseError('Expected &#DEC; &#HEX;, got EOF'); + $this->scanner->unconsume(1); + + return '&'; + } + // Hexidecimal encoding. // X[0-9a-fA-F]+; // x[0-9a-fA-F]+; diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index b5940c2..00e9a47 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -133,6 +133,14 @@ public function testBareAmpersandNotAllowedInBody() ', $doc->saveXML()); } + public function testEntityAtEndOfFile() + { + $fragment = $this->parseFragment('&#'); + $this->assertInstanceOf('DOMDocumentFragment', $fragment); + $this->assertSame('&#', $fragment->textContent); + $this->assertEquals('Line 1, Col 2: Expected &#DEC; &#HEX;, got EOF', $this->errors[0]); + } + public function testStrangeCapitalization() { $html = '