Skip to content

Commit

Permalink
Improve mb_*trim polyfills
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolas-grekas committed Jun 20, 2024
1 parent 9f6d1a0 commit 3e6c61a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 42 deletions.
47 changes: 22 additions & 25 deletions Php84.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
*/
final class Php84
{
private const CHARACTERS = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";

public static function mb_ucfirst(string $string, ?string $encoding = null): string
{
if (null === $encoding) {
Expand Down Expand Up @@ -113,20 +111,20 @@ public static function array_all(array $array, callable $callback): bool

public static function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string
{
return self::mb_internal_trim('^[%s]+|[%s]+$', $string, $characters, $encoding);
return self::mb_internal_trim('{^[%s]+|[%1$s]+$}Du', $string, $characters, $encoding, __FUNCTION__);
}

public static function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string
{
return self::mb_internal_trim('^[%s]+', $string, $characters, $encoding);
return self::mb_internal_trim('{^[%s]+}Du', $string, $characters, $encoding, __FUNCTION__);
}

public static function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string
{
return self::mb_internal_trim('[%s]+$', $string, $characters, $encoding);
return self::mb_internal_trim('{[%s]+$}Du', $string, $characters, $encoding, __FUNCTION__);
}

private static function mb_internal_trim(string $regex, string $string, ?string $characters = null, ?string $encoding = null): string
private static function mb_internal_trim(string $regex, string $string, ?string $characters, ?string $encoding, string $function): string
{
if (null === $encoding) {
$encoding = mb_internal_encoding();
Expand All @@ -135,41 +133,40 @@ private static function mb_internal_trim(string $regex, string $string, ?string
try {
$validEncoding = @mb_check_encoding('', $encoding);
} catch (\ValueError $e) {
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given.', debug_backtrace()[1]['function'], $encoding));
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', $function, $encoding));
}

// BC for PHP 7.3 and lower
if (!$validEncoding) {
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given.', debug_backtrace()[1]['function'], $encoding));
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', $function, $encoding));
}

if ('' === $characters) {
return null === $encoding ? $string : mb_convert_encoding($string, $encoding);
}

if (null === $characters) {
$characters = self::CHARACTERS;
if ('UTF-8' === $encoding || \in_array(strtolower($encoding), ['utf-8', 'utf8'], true)) {
$encoding = 'UTF-8';
}

$regexCharacter = preg_quote($characters ?? '', '/');
$regex = sprintf($regex, $regexCharacter, $regexCharacter);
$string = mb_convert_encoding($string, 'UTF-8', $encoding);

if ('ASCII' === mb_detect_encoding($characters) && 'ASCII' === mb_detect_encoding($string) && !empty(array_intersect(str_split(self::CHARACTERS), str_split($string)))) {
$options = 'g';
if (null !== $characters) {
$characters = mb_convert_encoding($characters, 'UTF-8', $encoding);
}

if (null === $characters) {
$characters = "\\0 \f\n\r\t\v\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";
} else {
$options = '';
$characters = preg_quote($characters);
}

try {
$test = mb_ereg_replace($regex, "", $string, $options);

if (null === $test) {
throw new \Exception();
}
$string = preg_replace(sprintf($regex, $characters), '', $string);

return $test;
} catch (\Exception $e) {
return preg_replace(sprintf('/%s/', $regex), "", $string);
if ('UTF-8' === $encoding) {
return $string;
}
}

return mb_convert_encoding($string, $encoding, 'UTF-8');
}
}
2 changes: 1 addition & 1 deletion Resources/stubs/Deprecated.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ final class Deprecated
{
public readonly ?string $message;
public readonly ?string $since;

public function __construct(?string $message = null, ?string $since = null)
{
$this->message = $message;
Expand Down
34 changes: 18 additions & 16 deletions bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,6 @@
return;
}

if (!function_exists('mb_ucfirst')) {
function mb_ucfirst($string, ?string $encoding = null): string { return p\Php84::mb_ucfirst($string, $encoding); }
}

if (!function_exists('mb_lcfirst')) {
function mb_lcfirst($string, ?string $encoding = null): string { return p\Php84::mb_lcfirst($string, $encoding); }
}

if (!function_exists('array_find')) {
function array_find(array $array, callable $callback) { return p\Php84::array_find($array, $callback); }
}
Expand All @@ -39,14 +31,24 @@ function array_any(array $array, callable $callback): bool { return p\Php84::arr
function array_all(array $array, callable $callback): bool { return p\Php84::array_all($array, $callback); }
}

if (!function_exists('mb_trim') && extension_loaded('mbstring')) {
function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_trim($string, $characters, $encoding); }
}
if (extension_loaded('mbstring')) {
if (!function_exists('mb_ucfirst')) {
function mb_ucfirst($string, ?string $encoding = null): string { return p\Php84::mb_ucfirst($string, $encoding); }
}

if (!function_exists('mb_ltrim') && extension_loaded('mbstring')) {
function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_ltrim($string, $characters, $encoding); }
}
if (!function_exists('mb_lcfirst')) {
function mb_lcfirst($string, ?string $encoding = null): string { return p\Php84::mb_lcfirst($string, $encoding); }
}

if (!function_exists('mb_trim')) {
function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_trim($string, $characters, $encoding); }
}

if (!function_exists('mb_ltrim')) {
function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_ltrim($string, $characters, $encoding); }
}

if (!function_exists('mb_rtrim') && extension_loaded('mbstring')) {
function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_rtrim($string, $characters, $encoding); }
if (!function_exists('mb_rtrim')) {
function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_rtrim($string, $characters, $encoding); }
}
}

0 comments on commit 3e6c61a

Please sign in to comment.