Skip to content

Commit

Permalink
[PHP 8.4][Intl] Add grapheme_str_split
Browse files Browse the repository at this point in the history
Add a polyfill for the `grapheme_str_split` function added in PHP 8.4.

Requires PHP 7.3, because the polyfill is based on `\X` Regex, and it
only works properly on PCRE2, which
[only comes with PHP 7.3+](https://php.watch/versions/7.3/pcre2).

Further, there are some cases that the polyfill cannot split complex
characters (such as two consecutive country flag Emojis). This is now
fixed in
However, this change will likely only make it to PHP 8.4.

References:
 - [RFC: Grapheme cluster for `str_split` function: `grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split)
 - [PHP.Watch: PHP 8.4: New `grapheme_str_split` function](https://php.watch/versions/8.4/grapheme_str_split)
  • Loading branch information
Ayesh committed Jun 8, 2024
1 parent e85ab80 commit 75b1867
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Polyfills are provided for:
- the `Date*Exception/Error` classes introduced in PHP 8.3;
- the `SQLite3Exception` class introduced in PHP 8.3;
- the `mb_ucfirst` and `mb_lcfirst` functions introduced in PHP 8.4;
- the `grapheme_str_split` function introduced in PHP 8.4 (requires PHP >= 7.3);

It is strongly recommended to upgrade your PHP version and/or install the missing
extensions whenever possible. This polyfill should be used only when there is no
Expand Down
33 changes: 33 additions & 0 deletions src/Intl/Grapheme/Grapheme.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* - grapheme_strrpos - Find position (in grapheme units) of last occurrence of a string
* - grapheme_strstr - Returns part of haystack string from the first occurrence of needle to the end of haystack
* - grapheme_substr - Return part of a string
* - grapheme_str_split - Splits a string into an array of individual or chunks of graphemes.
*
* @author Nicolas Grekas <p@tchwork.com>
*
Expand Down Expand Up @@ -191,6 +192,38 @@ public static function grapheme_strstr($s, $needle, $beforeNeedle = false)
return mb_strstr($s, $needle, $beforeNeedle, 'UTF-8');
}

public static function grapheme_str_split($s, $len = 1) {
if ($len < 0 || $len > 1073741823) {
if (80000 > \PHP_VERSION_ID) {
return false;
}

throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.');
}

if ($s === '') {
return [];
}

preg_match_all('/\X/u', $s, $matches);

if (empty($matches[0])) {
return false;
}

if ($len === 1) {
return $matches[0];
}

$chunks = array_chunk($matches[0], $len);

array_walk($chunks, static function(&$value) {
$value = implode('', $value);
});

return $chunks;
}

private static function grapheme_position($s, $needle, $offset, $mode)
{
$needle = (string) $needle;
Expand Down
1 change: 1 addition & 0 deletions src/Intl/Grapheme/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ This component provides a partial, native PHP implementation of the
- [`grapheme_strstr`](https://php.net/grapheme_strstr): Returns part of haystack string from
the first occurrence of needle to the end of haystack
- [`grapheme_substr`](https://php.net/grapheme_substr): Return part of a string
- [`grapheme_str_split](https://php.net/grapheme_str_split): Splits a string into an array of individual or chunks of graphemes.

More information can be found in the
[main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md).
Expand Down
4 changes: 4 additions & 0 deletions src/Intl/Grapheme/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,7 @@ function grapheme_strstr($haystack, $needle, $beforeNeedle = false) { return p\G
if (!function_exists('grapheme_substr')) {
function grapheme_substr($string, $offset, $length = null) { return p\Grapheme::grapheme_substr($string, $offset, $length); }
}

if (\PHP_VERSION_ID >= 70300) {
require __DIR__.'/bootstrap73.php';
}
17 changes: 17 additions & 0 deletions src/Intl/Grapheme/bootstrap73.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\Polyfill\Php84 as p;

if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) {
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); }
}

3 changes: 3 additions & 0 deletions src/Intl/Grapheme/bootstrap80.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ function grapheme_strstr(?string $haystack, ?string $needle, ?bool $beforeNeedle
if (!function_exists('grapheme_substr')) {
function grapheme_substr(?string $string, ?int $offset, ?int $length = null): string|false { return p\Grapheme::grapheme_substr((string) $string, (int) $offset, $length); }
}
if (!function_exists('grapheme_str_split')) {
function grapheme_str_split(string $string, int $length = 1): array|false { return p\Grapheme::grapheme_str_split($string, $length); }
}
29 changes: 29 additions & 0 deletions src/Php84/Php84.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,33 @@ public static function mb_lcfirst(string $string, ?string $encoding = null): str

return $firstChar . mb_substr($string, 1, null, $encoding);
}

public static function grapheme_str_split(string $string, int $length)
{
if ($length < 0 || $length > 1073741823) {
throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.');
}

if ($string === '') {
return [];
}

preg_match_all('/\X/u', $string, $matches);

if (empty($matches[0])) {
return false;
}

if ($length === 1) {
return $matches[0];
}

$chunks = array_chunk($matches[0], $length);

array_walk($chunks, static function(&$value) {
$value = implode('', $value);
});

return $chunks;
}
}
1 change: 1 addition & 0 deletions src/Php84/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Symfony Polyfill / Php84
This component provides features added to PHP 8.4 core:

- [`mb_ucfirst` and `mb_lcfirst`](https://wiki.php.net/rfc/mb_ucfirst)
- [`grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split)

More information can be found in the
[main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md).
Expand Down
4 changes: 4 additions & 0 deletions src/Php84/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ function mb_ucfirst($string, ?string $encoding = null): string { return p\Php84:
if (!function_exists('mb_lcfirst')) {
function mb_lcfirst($string, ?string $encoding = null): string { return p\Php84::mb_lcfirst($string, $encoding); }
}

if (\PHP_VERSION_ID >= 70300) {
require __DIR__.'/bootstrap73.php';
}
21 changes: 21 additions & 0 deletions src/Php84/bootstrap73.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\Polyfill\Php84 as p;

if (\PHP_VERSION_ID >= 80400) {
return;
}

if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) {
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); }
}

24 changes: 24 additions & 0 deletions tests/Intl/Grapheme/GraphemeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -207,4 +207,28 @@ public function testGraphemeStrstr()
$this->assertSame('국어', grapheme_strstr('한국어', ''));
$this->assertSame('ÉJÀ', grapheme_stristr('DÉJÀ', 'é'));
}

/**
* @dataProvider graphemeStrSplitDataProvider
* @requires PHP 7.3
*/
public function testGraphemeStrSplit(string $string, int $length, array $expectedValues) {
$this->assertSame($expectedValues, grapheme_str_split($string, $length));
}

public static function graphemeStrSplitDataProvider(): array {
$return = [
['', 1, []],
['PHP', 1, ['P', 'H', 'P']],
['你好', 1, ['', '']],
['අයේෂ්', 1, ['', 'යේ', 'ෂ්']],
['สวัสดี', 2, ['สวั', 'สดี']],
['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]],
];

// https://github.com/PCRE2Project/pcre2/issues/410
if (PCRE_VERSION_MAJOR > 10 && PCRE_VERSION_MAJOR >= 44) {
$return[] = ['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]];
}
}
}
20 changes: 20 additions & 0 deletions tests/Php84/Php84Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,24 @@ public static function lcFirstDataProvider(): array {
["ß", "ß"],
];
}

/**
* @dataProvider graphemeStrSplitDataProvider
* @requires PHP 7.3
*/
public function testGraphemeStrSplit(string $string, int $length, array $expectedValues) {
$this->assertSame($expectedValues, grapheme_str_split($string, $length));
}

public static function graphemeStrSplitDataProvider(): array {
return [
['', 1, []],
['PHP', 1, ['P', 'H', 'P']],
['你好', 1, ['', '']],
['අයේෂ්', 1, ['', 'යේ', 'ෂ්']],
['สวัสดี', 2, ['สวั', 'สดี']],
['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]],
// ['👭🏻👰🏿‍♂️', 2, ['👭🏻', '👰🏿‍♂️']], // https://github.com/PCRE2Project/pcre2/issues/410
];
}
}

0 comments on commit 75b1867

Please sign in to comment.