Skip to content

Commit

Permalink
Fix encoding issues
Browse files Browse the repository at this point in the history
  • Loading branch information
HedicGuibert committed Jun 4, 2022
1 parent a3269c8 commit a128993
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 9 deletions.
11 changes: 3 additions & 8 deletions src/JoliTypo/Fixer.php
Original file line number Diff line number Diff line change
Expand Up @@ -305,16 +305,13 @@ private function loadDOMDocument($content)
$dom->substituteEntities = false;
$dom->formatOutput = false;

// Change mb and libxml config
// Change libxml config
$libxmlCurrent = libxml_use_internal_errors(true);
$mbDetectCurrent = mb_detect_order();
mb_detect_order('ASCII,UTF-8,ISO-8859-1,windows-1252,iso-8859-15');

$loaded = $dom->loadHTML($this->fixContentEncoding($content));

// Restore mb and libxml config
// Restore libxml config
libxml_use_internal_errors($libxmlCurrent);
mb_detect_order(implode(',', $mbDetectCurrent));

if (!$loaded) {
throw new InvalidMarkupException("Can't load the given HTML via DomDocument");
Expand Down Expand Up @@ -345,7 +342,7 @@ private function fixContentEncoding($content)
$content = $hack . $content;
}

$encoding = mb_detect_encoding($content);
$encoding = mb_detect_encoding($content, ['UTF-8', 'ASCII']) ?: mb_detect_encoding($content, ['ISO-8859-1', 'windows-1252', 'iso-8859-15']);
$headPos = mb_strpos($content, '<head>');

// Add a meta to the <head> section
Expand All @@ -355,8 +352,6 @@ private function fixContentEncoding($content)
'<meta http-equiv="Content-Type" content="text/html; charset=' . $encoding . '">' .
mb_substr($content, $headPos);
}

$content = mb_convert_encoding($content, 'HTML-ENTITIES', $encoding);
}

return $content;
Expand Down
2 changes: 1 addition & 1 deletion tests/JoliTypo/Tests/Html5Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public function testFullPageMarkup()
HTML;

$fixed = <<<'STRING'
&#8220;Who Let the Dogs Out?&#8221; is a song written and originally recorded by Anslem Douglas (titled &#8220;Doggie&#8221;).
Who Let the Dogs Out? is a song written and originally recorded by Anslem Douglas (titled Doggie).
STRING;

$this->assertEquals($fixed, $fixer->fix($html));
Expand Down

0 comments on commit a128993

Please sign in to comment.