Skip to content

Commit

Permalink
fix(str): add invariant to avoid unexpected errors when parsing an in…
Browse files Browse the repository at this point in the history
…valid UTF8 string (#410)
  • Loading branch information
devnix authored Jul 18, 2023
1 parent b0e98cd commit 1d015ad
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 13 deletions.
8 changes: 4 additions & 4 deletions docs/component/str.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
- [range](./../../src/Psl/Str/range.php#L41)
- [repeat](./../../src/Psl/Str/repeat.php#L26)
- [replace](./../../src/Psl/Str/replace.php#L15)
- [replace_ci](./../../src/Psl/Str/replace_ci.php#L16)
- [replace_ci](./../../src/Psl/Str/replace_ci.php#L20)
- [replace_every](./../../src/Psl/Str/replace_every.php#L15)
- [replace_every_ci](./../../src/Psl/Str/replace_every_ci.php#L15)
- [reverse](./../../src/Psl/Str/reverse.php#L14)
Expand All @@ -69,9 +69,9 @@
- [strip_prefix](./../../src/Psl/Str/strip_prefix.php#L13)
- [strip_suffix](./../../src/Psl/Str/strip_suffix.php#L13)
- [to_int](./../../src/Psl/Str/to_int.php#L12)
- [trim](./../../src/Psl/Str/trim.php#L18)
- [trim_left](./../../src/Psl/Str/trim_left.php#L18)
- [trim_right](./../../src/Psl/Str/trim_right.php#L18)
- [trim](./../../src/Psl/Str/trim.php#L21)
- [trim_left](./../../src/Psl/Str/trim_left.php#L21)
- [trim_right](./../../src/Psl/Str/trim_right.php#L21)
- [truncate](./../../src/Psl/Str/truncate.php#L25)
- [uppercase](./../../src/Psl/Str/uppercase.php#L14)
- [width](./../../src/Psl/Str/width.php#L14)
Expand Down
2 changes: 2 additions & 0 deletions src/Psl/Encoding/Base64/Internal/Base64.php
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ public static function decode(string $base64, bool $explicit_padding = true): st
if ($explicit_padding && $base64_length % 4 !== 0) {
throw new Exception\IncorrectPaddingException('The given base64 string has incorrect padding.');
}

/** @psalm-suppress MissingThrowsDocblock */
$base64 = Str\trim_right($base64, '=');
$base64_length = Str\length($base64, encoding: Str\Encoding::ASCII_8BIT);

Expand Down
16 changes: 15 additions & 1 deletion src/Psl/Str/replace_ci.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace Psl\Str;

use Psl\Regex;

use function preg_quote;
use function preg_split;

Expand All @@ -12,12 +14,24 @@
* `$replacement` (case-insensitive).
*
* @pure
*
* @throws Exception\InvalidArgumentException if $needle is not a valid UTF-8 string.
*/
function replace_ci(string $haystack, string $needle, string $replacement, Encoding $encoding = Encoding::UTF_8): string
{
if ('' === $needle || null === search_ci($haystack, $needle, 0, $encoding)) {
return $haystack;
}

return join(preg_split('{' . preg_quote($needle, '/') . '}iu', $haystack), $replacement);
try {
/** @var list<string> */
$pieces = Regex\Internal\call_preg(
'preg_split',
static fn() => preg_split('{' . preg_quote($needle, '/') . '}iu', $haystack, -1),
);
} catch (Regex\Exception\RuntimeException | Regex\Exception\InvalidPatternException $error) {
throw new Exception\InvalidArgumentException($error->getMessage(), previous: $error);
}

return join($pieces, $replacement);
}
11 changes: 9 additions & 2 deletions src/Psl/Str/trim.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

namespace Psl\Str;

use Psl\Regex;

use function preg_quote;
use function preg_replace;

/**
* Returns the given string with whitespace stripped from the beginning and end.
Expand All @@ -14,11 +15,17 @@
* be stripped: space, tab, newline, carriage return, NUL byte, vertical tab.
*
* @pure
*
* @throws Exception\InvalidArgumentException if $string is not a valid UTF-8 string.
*/
function trim(string $string, ?string $char_mask = null): string
{
$char_mask ??= " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}";
$char_mask = preg_quote($char_mask, null);

return preg_replace("{^[{$char_mask}]++|[{$char_mask}]++$}uD", '', $string);
try {
return Regex\replace($string, "{^[{$char_mask}]++|[{$char_mask}]++$}uD", '');
} catch (Regex\Exception\RuntimeException | Regex\Exception\InvalidPatternException $error) {
throw new Exception\InvalidArgumentException($error->getMessage(), previous: $error);
}
}
11 changes: 9 additions & 2 deletions src/Psl/Str/trim_left.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

namespace Psl\Str;

use Psl\Regex;

use function preg_quote;
use function preg_replace;

/**
* Returns the given string with whitespace stripped from the left.
Expand All @@ -14,11 +15,17 @@
* be stripped: space, tab, newline, carriage return, NUL byte, vertical tab.
*
* @pure
*
* @throws Exception\InvalidArgumentException if $string is not a valid UTF-8 string.
*/
function trim_left(string $string, ?string $char_mask = null): string
{
$char_mask ??= " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}";
$char_mask = preg_quote($char_mask, null);

return preg_replace("{^[{$char_mask}]++}uD", '', $string);
try {
return Regex\replace($string, "{^[{$char_mask}]++}uD", '');
} catch (Regex\Exception\RuntimeException | Regex\Exception\InvalidPatternException $error) {
throw new Exception\InvalidArgumentException($error->getMessage(), previous: $error);
}
}
11 changes: 9 additions & 2 deletions src/Psl/Str/trim_right.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

namespace Psl\Str;

use Psl\Regex;

use function preg_quote;
use function preg_replace;

/**
* Returns the given string with whitespace stripped from the right.
Expand All @@ -14,11 +15,17 @@
* be stripped: space, tab, newline, carriage return, NUL byte, vertical tab.
*
* @pure
*
* @throws Exception\InvalidArgumentException if $string is not a valid UTF-8 string.
*/
function trim_right(string $string, ?string $char_mask = null): string
{
$char_mask ??= " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}";
$char_mask = preg_quote($char_mask, null);

return preg_replace("{[{$char_mask}]++$}uD", '', $string);
try {
return Regex\replace($string, "{[{$char_mask}]++$}uD", '');
} catch (Regex\Exception\RuntimeException | Regex\Exception\InvalidPatternException $error) {
throw new Exception\InvalidArgumentException($error->getMessage(), previous: $error);
}
}
1 change: 1 addition & 0 deletions src/Psl/Type/Internal/LiteralScalarType.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ public function toString(): string
}

if (Type\float()->matches($value)) {
/** @psalm-suppress MissingThrowsDocblock */
$string_representation = Str\trim_right(Str\format('%.14F', $value), '0');
/** @psalm-suppress MissingThrowsDocblock */
if (Str\ends_with($string_representation, '.')) {
Expand Down
9 changes: 7 additions & 2 deletions src/Psl/Type/Internal/PositiveIntType.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,13 @@ public function coerce(mixed $value): int
return $int;
}

$trimmed = Str\trim_left($str, '0');
$int = Str\to_int($trimmed);
try {
$trimmed = Str\trim_left($str, '0');
} catch (Str\Exception\InvalidArgumentException $e) {
throw CoercionException::withValue($value, $this->toString(), $this->getTrace());
}

$int = Str\to_int($trimmed);
if (null !== $int && $int > 0) {
return $int;
}
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/Str/ReplaceCiTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,36 @@ public function provideData(): array
['foo', 'foo', 'bar', 'baz'],
];
}

/**
* @dataProvider provideBadUtf8Data
*/
public function testBadUtf8(string $string, string $expectedException, string $expectedExceptionMessage): void
{
$this->expectException($expectedException);
$this->expectExceptionMessage($expectedExceptionMessage);

Str\replace_ci($string, $string, $string);
}

public function provideBadUtf8Data(): iterable
{
yield [
"\xc1\xbf",
Str\Exception\InvalidArgumentException::class,
'Compilation failed: UTF-8 error: overlong 2-byte sequence at offset 0',
];

yield [
"\xe0\x81\xbf",
Str\Exception\InvalidArgumentException::class,
'Compilation failed: UTF-8 error: overlong 3-byte sequence at offset 0',
];

yield [
"\xf0\x80\x81\xbf",
Str\Exception\InvalidArgumentException::class,
'Compilation failed: UTF-8 error: overlong 4-byte sequence at offset 0',
];
}
}
32 changes: 32 additions & 0 deletions tests/unit/Str/TrimLeftTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,36 @@ public function provideData(): array
],
];
}

/**
* @dataProvider provideBadUtf8Data
*/
public function testBadUtf8(string $string, string $expectedException, string $expectedExceptionMessage): void
{
$this->expectException($expectedException);
$this->expectExceptionMessage($expectedExceptionMessage);

Str\trim_left($string);
}

public function provideBadUtf8Data(): iterable
{
yield [
"\xc1\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];

yield [
"\xe0\x81\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];

yield [
"\xf0\x80\x81\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];
}
}
32 changes: 32 additions & 0 deletions tests/unit/Str/TrimRightTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,36 @@ public function provideData(): array
],
];
}

/**
* @dataProvider provideBadUtf8Data
*/
public function testBadUtf8(string $string, string $expectedException, string $expectedExceptionMessage): void
{
$this->expectException($expectedException);
$this->expectExceptionMessage($expectedExceptionMessage);

Str\trim_right($string);
}

public function provideBadUtf8Data(): iterable
{
yield [
"\xc1\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];

yield [
"\xe0\x81\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];

yield [
"\xf0\x80\x81\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];
}
}
32 changes: 32 additions & 0 deletions tests/unit/Str/TrimTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,36 @@ public function provideData(): array
],
];
}

/**
* @dataProvider provideBadUtf8Data
*/
public function testBadUtf8(string $string, string $expectedException, string $expectedExceptionMessage): void
{
$this->expectException($expectedException);
$this->expectExceptionMessage($expectedExceptionMessage);

Str\trim($string);
}

public function provideBadUtf8Data(): iterable
{
yield [
"\xc1\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];

yield [
"\xe0\x81\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];

yield [
"\xf0\x80\x81\xbf",
Str\Exception\InvalidArgumentException::class,
'Malformed UTF-8 characters, possibly incorrectly encoded',
];
}
}
1 change: 1 addition & 0 deletions tests/unit/Type/PositiveIntTypeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public function getInvalidCoercions(): iterable
yield [$this->stringable('-9223372036854775809')];
yield ['0xFF'];
yield ['-0xFF'];
yield ["\xc1\xbf"];
yield [''];
}

Expand Down

0 comments on commit 1d015ad

Please sign in to comment.