Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade string encoder #321

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# This fork

The original repo is not maintained anymore. This fork is intended to keep codebase up-to-date with current PHP versions.

PHP Html Parser
==========================

Expand Down
4 changes: 2 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"ext-mbstring": "*",
"ext-zlib": "*",
"ext-curl": "*",
"paquettg/string-encode": "~1.0.0",
"paquettg/string-encode": "^2.1",
"php-http/httplug": "^2.1",
"guzzlehttp/guzzle": "^7.0",
"guzzlehttp/psr7": "^1.6",
Expand All @@ -31,7 +31,7 @@
"friendsofphp/php-cs-fixer": "^2.16"
},
"autoload": {
"psr-4": {
"psr-4": {
"PHPHtmlParser\\": "src/PHPHtmlParser"
}
}
Expand Down
11 changes: 6 additions & 5 deletions src/PHPHtmlParser/DTO/Tag/AttributeDTO.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

namespace PHPHtmlParser\DTO\Tag;

use stringEncode\Encode;
use stringEncode\Exception;
use StringEncoder\Contracts\EncoderInterface;
use StringEncoder\Exceptions\InvalidEncodingException;


final class AttributeDTO
{
Expand Down Expand Up @@ -51,10 +52,10 @@ public function htmlspecialcharsDecode(): void
}

/**
* @throws Exception
* @throws InvalidEncodingException
*/
public function encodeValue(Encode $encode)
public function encodeValue(EncoderInterface $encode)
{
$this->value = $encode->convert($this->value);
$this->value = $encode->convert()->fromString($this->value)->toString();
}
}
7 changes: 4 additions & 3 deletions src/PHPHtmlParser/Dom/Node/AbstractNode.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException;
use PHPHtmlParser\Finder;
use PHPHtmlParser\Selector\Selector;
use stringEncode\Encode;
use StringEncoder\Contracts\EncoderInterface;


/**
* Dom node object.
Expand Down Expand Up @@ -57,7 +58,7 @@ abstract class AbstractNode
/**
* The encoding class used to encode strings.
*
* @var mixed
* @var EncoderInterface
*/
protected $encode;

Expand Down Expand Up @@ -206,7 +207,7 @@ public function delete()
*
* @return void
*/
public function propagateEncoding(Encode $encode)
public function propagateEncoding(EncoderInterface $encode)
{
$this->encode = $encode;
$this->tag->setEncoding($encode);
Expand Down
5 changes: 3 additions & 2 deletions src/PHPHtmlParser/Dom/Node/InnerNode.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
use PHPHtmlParser\Exceptions\ChildNotFoundException;
use PHPHtmlParser\Exceptions\CircularException;
use PHPHtmlParser\Exceptions\LogicalException;
use stringEncode\Encode;
use StringEncoder\Contracts\EncoderInterface;


/**
* Inner node of the html tree, might have children.
Expand All @@ -33,7 +34,7 @@ abstract class InnerNode extends ArrayNode
* Sets the encoding class to this node and propagates it
* to all its children.
*/
public function propagateEncoding(Encode $encode): void
public function propagateEncoding(EncoderInterface $encode): void
{
$this->encode = $encode;
$this->tag->setEncoding($encode);
Expand Down
4 changes: 2 additions & 2 deletions src/PHPHtmlParser/Dom/Node/TextNode.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public function text(): string
// we already know the converted value
return $this->convertedText;
}
$text = $this->encode->convert($text);
$text = $this->encode->convert()->fromString($text)->toString();

// remember the conversion
$this->convertedText = $text;
Expand All @@ -109,7 +109,7 @@ public function setText(string $text): void
{
$this->text = $text;
if (!\is_null($this->encode)) {
$text = $this->encode->convert($text);
$text = $this->encode->convert()->fromString($text)->toString();

// remember the conversion
$this->convertedText = $text;
Expand Down
20 changes: 11 additions & 9 deletions src/PHPHtmlParser/Dom/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
use PHPHtmlParser\Exceptions\LogicalException;
use PHPHtmlParser\Exceptions\StrictException;
use PHPHtmlParser\Options;
use stringEncode\Encode;
use StringEncoder\Contracts\EncoderInterface;
use StringEncoder\Encoder;


class Parser implements ParserInterface
{
Expand Down Expand Up @@ -104,15 +106,15 @@ public function parse(Options $options, Content $content, int $size): AbstractNo
public function detectCharset(Options $options, string $defaultCharset, AbstractNode $root): bool
{
// set the default
$encode = new Encode();
$encode->from($defaultCharset);
$encode->to($defaultCharset);
$encode = new Encoder();
$encode->setSourceEncoding($defaultCharset);
$encode->setTargetEncoding($defaultCharset);

$enforceEncoding = $options->getEnforceEncoding();
if ($enforceEncoding !== null) {
// they want to enforce the given encoding
$encode->from($enforceEncoding);
$encode->to($enforceEncoding);
$encode->setSourceEncoding($enforceEncoding);
$encode->setTargetEncoding($enforceEncoding);

return false;
}
Expand All @@ -138,7 +140,7 @@ public function detectCharset(Options $options, string $defaultCharset, Abstract
}
$matches = [];
if (\preg_match('/charset=([^;]+)/', $content, $matches)) {
$encode->from(\trim($matches[1]));
$encode->setSourceEncoding(\trim($matches[1]));
$root->propagateEncoding($encode);

return true;
Expand Down Expand Up @@ -233,15 +235,15 @@ private function parseTag(Options $options, Content $content, int $size): TagDTO
/**
* @throws ChildNotFoundException
*/
private function detectHTML5Charset(Encode $encode, AbstractNode $root): bool
private function detectHTML5Charset(EncoderInterface $encode, AbstractNode $root): bool
{
/** @var AbstractNode|null $meta */
$meta = $root->find('meta[charset]', 0);
if ($meta == null) {
return false;
}

$encode->from(\trim($meta->getAttribute('charset')));
$encode->setSourceEncoding(\trim($meta->getAttribute('charset')));
$root->propagateEncoding($encode);

return true;
Expand Down
16 changes: 10 additions & 6 deletions src/PHPHtmlParser/Dom/Tag.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

use PHPHtmlParser\DTO\Tag\AttributeDTO;
use PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException;
use stringEncode\Encode;
use StringEncoder\Contracts\EncoderInterface;
use StringEncoder\Exceptions\InvalidEncodingException;


/**
* Class Tag.
Expand Down Expand Up @@ -49,7 +51,7 @@ class Tag
/**
* The encoding class to... encode the tags.
*
* @var Encode|null
* @var EncoderInterface|null
*/
protected $encode;

Expand Down Expand Up @@ -135,7 +137,7 @@ public function isSelfClosing(): bool
/**
* Sets the encoding type to be used.
*/
public function setEncoding(Encode $encode): void
public function setEncoding(EncoderInterface $encode): void
{
$this->encode = $encode;
}
Expand Down Expand Up @@ -263,7 +265,7 @@ public function setAttributes(array $attr)
/**
* Returns all attributes of this tag.
*
* @throws \stringEncode\Exception
* @throws InvalidEncodingException
*
* @return AttributeDTO[]
*/
Expand All @@ -286,7 +288,7 @@ public function getAttributes(): array
* Returns an attribute by the key.
*
* @throws AttributeNotFoundException
* @throws \stringEncode\Exception
* @throws InvalidEncodingException
*/
public function getAttribute(string $key): AttributeDTO
{
Expand Down Expand Up @@ -332,12 +334,14 @@ public function makeOpeningTag()
} catch (\TypeError $e) {
$val = null;
}
$val = $attributeDTO->getValue();

if (\is_null($val)) {
$return .= ' ' . $key;
} elseif ($attributeDTO->isDoubleQuote()) {
$val = $attributeDTO->getValue();
$return .= ' ' . $key . '="' . $val . '"';
} else {
$val = $attributeDTO->getValue();
$return .= ' ' . $key . '=\'' . $val . '\'';
}
}
Expand Down
9 changes: 5 additions & 4 deletions tests/Node/TextTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
use PHPHtmlParser\Dom\Node\TextNode;
use PHPHtmlParser\Options;
use PHPUnit\Framework\TestCase;
use stringEncode\Encode;
use StringEncoder\Encoder;


class NodeTextTest extends TestCase
{
Expand Down Expand Up @@ -66,9 +67,9 @@ public function testSetText()

public function testSetTextEncoded()
{
$encode = new Encode();
$encode->from('UTF-8');
$encode->to('UTF-8');
$encode = new Encoder();
$encode->setSourceEncoding('UTF-8');
$encode->setTargetEncoding('UTF-8');

$node = new TextNode('foo bar');
$node->propagateEncoding($encode);
Expand Down