Skip to content

Commit

Permalink
Reinstates original dom parser. Closes #36.
Browse files Browse the repository at this point in the history
  • Loading branch information
crscheid committed Jul 12, 2021
1 parent cd6c723 commit 9a62f72
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# ChangeLog

## Version 2.4

- Reinstates [paquettg/php-html-parser](https://github.com/paquettg/php-html-parser) as the preferred DOM parser.
- Note that this updates many composer dependencies so releasing this as a separate release just in case.

## Version 2.3

- Added ability to pass in HTML and process via the `processHTML` method
Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"require": {
"php": ">=7.2",
"scotteh/php-goose": "dev-master",
"thesoftwarefanatics/php-html-parser": "^1.8.0",
"paquettg/php-html-parser": "^3.1.1",
"detectlanguage/detectlanguage": "2.*",
"andreskrey/readability.php": "^2.1.0"
},
Expand Down
9 changes: 5 additions & 4 deletions src/ArticleExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
use andreskrey\Readability\ParseException;

use PHPHtmlParser\Dom;
use PHPHtmlParser\Dom\HtmlNode;
use PHPHtmlParser\Dom\TextNode;
use PHPHtmlParser\Options;
use PHPHtmlParser\Dom\Node\HtmlNode;
use PHPHtmlParser\Dom\Node\TextNode;

use DetectLanguage\DetectLanguage;

Expand Down Expand Up @@ -536,7 +537,7 @@ private function performCustomPostProcessing($html) {

// Ok then try it a different way
$dom = new Dom;
$dom->load($html, ['whitespaceTextNode' => false]);
$dom->loadStr($html, (new Options())->setWhitespaceTextNode(false));

// First, just completely remove the items we don't even care about
$nodesToRemove = $dom->find('script, style, header, footer, input, button, aside, meta, link');
Expand Down Expand Up @@ -906,7 +907,7 @@ private function checkHTMLForLanguageHint($html_string) {
try {
// Ok then try it a different way
$dom = new Dom;
$dom->load($html_string, ['whitespaceTextNode' => false]);
$dom->loadStr($html_string, (new Options())->setWhitespaceTextNode(false));

$htmltag = $dom->find('html');
$lang = $htmltag->getAttribute('lang');
Expand Down

0 comments on commit 9a62f72

Please sign in to comment.