diff --git a/src/JoliTypo/Fixer.php b/src/JoliTypo/Fixer.php index 71d8c18..1066767 100644 --- a/src/JoliTypo/Fixer.php +++ b/src/JoliTypo/Fixer.php @@ -305,16 +305,13 @@ private function loadDOMDocument($content) $dom->substituteEntities = false; $dom->formatOutput = false; - // Change mb and libxml config + // Change libxml config $libxmlCurrent = libxml_use_internal_errors(true); - $mbDetectCurrent = mb_detect_order(); - mb_detect_order('ASCII,UTF-8,ISO-8859-1,windows-1252,iso-8859-15'); $loaded = $dom->loadHTML($this->fixContentEncoding($content)); - // Restore mb and libxml config + // Restore libxml config libxml_use_internal_errors($libxmlCurrent); - mb_detect_order(implode(',', $mbDetectCurrent)); if (!$loaded) { throw new InvalidMarkupException("Can't load the given HTML via DomDocument"); @@ -345,7 +342,15 @@ private function fixContentEncoding($content) $content = $hack . $content; } - $encoding = mb_detect_encoding($content); + $encoding = ''; + + foreach (['UTF-8', 'ASCII', 'ISO-8859-1', 'windows-1252', 'iso-8859-15'] as $testedEncoding) { + if (mb_detect_encoding($content, $testedEncoding, true)) { + $encoding = $testedEncoding; + break; + } + } + $headPos = mb_strpos($content, ''); // Add a meta to the section @@ -356,7 +361,9 @@ private function fixContentEncoding($content) mb_substr($content, $headPos); } - $content = mb_convert_encoding($content, 'HTML-ENTITIES', $encoding); + if ('UTF-8' !== $encoding) { + $content = mb_convert_encoding($content, 'UTF-8', $encoding); + } } return $content; diff --git a/tests/JoliTypo/Tests/Html5Test.php b/tests/JoliTypo/Tests/Html5Test.php index 0f439cc..2bfa956 100644 --- a/tests/JoliTypo/Tests/Html5Test.php +++ b/tests/JoliTypo/Tests/Html5Test.php @@ -46,7 +46,7 @@ public function testFullPageMarkup() HTML; $fixed = <<<'STRING' - “Who Let the Dogs Out?” is a song written and originally recorded by Anslem Douglas (titled “Doggie”). + “Who Let the Dogs Out?” is a song written and originally recorded by Anslem Douglas (titled “Doggie”). STRING; $this->assertEquals($fixed, $fixer->fix($html));