-
-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Regex] Introduce matching() function
- Loading branch information
Showing
8 changed files
with
282 additions
and
14 deletions.
There are no files selected for viewing
79 changes: 79 additions & 0 deletions
79
integration/Psalm/EventHandler/RegexCaptureGroupsFunctionReturnTypeProvider.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Psl\Integration\Psalm\EventHandler; | ||
|
||
use Psalm\Plugin\EventHandler\Event\FunctionReturnTypeProviderEvent; | ||
use Psalm\Plugin\EventHandler\FunctionReturnTypeProviderInterface; | ||
use Psalm\Type; | ||
|
||
final class RegexCaptureGroupsFunctionReturnTypeProvider implements FunctionReturnTypeProviderInterface | ||
{ | ||
/** | ||
* @return array<lowercase-string> | ||
*/ | ||
public static function getFunctionIds(): array | ||
{ | ||
return [ | ||
'psl\regex\capture_groups' | ||
]; | ||
} | ||
|
||
public static function getFunctionReturnType(FunctionReturnTypeProviderEvent $event): ?Type\Union | ||
{ | ||
$statements_source = $event->getStatementsSource(); | ||
$call_args = $event->getCallArgs(); | ||
|
||
$argument = $call_args[0] ?? null; | ||
if (null === $argument) { | ||
return self::fallbackType(); | ||
} | ||
|
||
$type = null; | ||
$argument_value = $argument->value; | ||
$type = $statements_source->getNodeTypeProvider()->getType($argument_value); | ||
if (null === $type) { | ||
return self::fallbackType(); | ||
} | ||
|
||
$atomic = $type->getAtomicTypes(); | ||
$capture_groups = $atomic['array'] ?? null; | ||
if (!$capture_groups instanceof Type\Atomic\TKeyedArray) { | ||
return self::fallbackType(); | ||
} | ||
|
||
$string = static fn (): Type\Union => new Type\Union([new Type\Atomic\TString()]); | ||
$properties = [ | ||
0 => $string() | ||
]; | ||
foreach ($capture_groups->properties as $index => $value) { | ||
$type = array_values($value->getAtomicTypes())[0] ?? null; | ||
if (!$type instanceof Type\Atomic\TLiteralInt && !$type instanceof Type\Atomic\TLiteralString) { | ||
return self::fallbackType(); | ||
} | ||
|
||
$name = $type->value; | ||
|
||
$properties[$name] = $string(); | ||
} | ||
|
||
return new Type\Union([new Type\Atomic\TGenericObject('Psl\Type\TypeInterface', [ | ||
new Type\Union([ | ||
new Type\Atomic\TKeyedArray($properties) | ||
]) | ||
])]); | ||
} | ||
|
||
private static function fallbackType(): Type\Union | ||
{ | ||
return new Type\Union([new Type\Atomic\TGenericObject('Psl\Type\TypeInterface', [ | ||
new Type\Union([ | ||
new Type\Atomic\TArray([ | ||
new Type\Union([new Type\Atomic\TArrayKey()]), | ||
new Type\Union([new Type\Atomic\TString()]) | ||
]) | ||
]) | ||
])]); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Psl\Regex; | ||
|
||
use Psl\Dict; | ||
use Psl\Type; | ||
|
||
/** | ||
* @param list<array-key> $groups | ||
* | ||
* @return Type\TypeInterface<array<array-key, string>> | ||
* | ||
* @psalm-suppress MixedReturnTypeCoercion - Psalm loses track of the keys. No worries, another psalm plugin fixes this! | ||
*/ | ||
function capture_groups(array $groups): Type\TypeInterface | ||
{ | ||
return Type\shape( | ||
Dict\from_keys( | ||
Dict\unique([0, ...$groups]), | ||
/** | ||
* @return Type\TypeInterface<string> | ||
*/ | ||
static fn(): Type\TypeInterface => Type\string() | ||
) | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Psl\Regex; | ||
|
||
use Psl\Type; | ||
|
||
use function preg_match; | ||
|
||
/** | ||
* Determine if $subject matches the given $pattern and return the matches. | ||
* | ||
* @template T of array | ||
* | ||
* @param non-empty-string $pattern The pattern to match against. | ||
* @param Type\TypeInterface<T> $capture_groups What shape does the matching items have? | ||
* | ||
* @return T|null | ||
* | ||
* @throws Exception\RuntimeException If an internal error accord. | ||
* @throws Exception\InvalidPatternException If $pattern is invalid. | ||
*/ | ||
function first_match(string $subject, string $pattern, Type\TypeInterface $capture_groups, int $offset = 0): ?array | ||
{ | ||
$matching = Internal\call_preg( | ||
'preg_match', | ||
static function () use ($subject, $pattern, $offset): ?array { | ||
$matching = []; | ||
$matches = preg_match($pattern, $subject, $matching, 0, $offset); | ||
|
||
return $matches === 0 ? null : $matching; | ||
} | ||
); | ||
|
||
if ($matching === null) { | ||
return null; | ||
} | ||
|
||
try { | ||
return $capture_groups->coerce($matching); | ||
} catch (Type\Exception\CoercionException $e) { | ||
throw new Exception\RuntimeException('Invalid capture groups', 0, $e); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Psl\Tests\Regex; | ||
|
||
use PHPUnit\Framework\TestCase; | ||
|
||
use function Psl\Regex\capture_groups; | ||
|
||
final class CaptureGroupsTest extends TestCase | ||
{ | ||
public function testItAlwaysAddsZeroCaptureResult(): void | ||
{ | ||
$data = [0 => 'Hello', 1 => 'World']; | ||
$shape = capture_groups([1]); | ||
$actual = $shape->coerce($data); | ||
|
||
static::assertSame($actual, $data); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace Psl\Tests\Regex; | ||
|
||
use PHPUnit\Framework\TestCase; | ||
use Psl\Regex; | ||
use Psl\Type\TypeInterface; | ||
|
||
use function Psl\Regex\capture_groups; | ||
|
||
final class FirstMatchTest extends TestCase | ||
{ | ||
/** | ||
* @dataProvider provideMatchingData | ||
*/ | ||
public function testMatching( | ||
array $expected, | ||
string $subject, | ||
string $pattern, | ||
TypeInterface $shape, | ||
int $offset = 0 | ||
): void { | ||
static::assertSame($expected, Regex\first_match($subject, $pattern, $shape, $offset)); | ||
} | ||
|
||
/** | ||
* @dataProvider provideNonMatchingData | ||
*/ | ||
public function testNotMatching(string $subject, string $pattern, int $offset = 0) | ||
{ | ||
static::assertNull(Regex\first_match($subject, $pattern, capture_groups([]), $offset)); | ||
} | ||
|
||
public function testMatchingWithInvalidPattern(): void | ||
{ | ||
$this->expectException(Regex\Exception\InvalidPatternException::class); | ||
$this->expectExceptionMessage("No ending delimiter '/' found"); | ||
|
||
Regex\first_match('hello', '/hello', capture_groups([])); | ||
} | ||
|
||
public function provideMatchingData(): iterable | ||
{ | ||
yield [ | ||
[ | ||
0 => 'PHP', | ||
1 => 'PHP', | ||
], | ||
'PHP is the web scripting language of choice.', | ||
'/(php)/i', | ||
capture_groups([0, 1]) | ||
]; | ||
yield [ | ||
[ | ||
0 => 'Hello world', | ||
1 => 'Hello', | ||
], | ||
'Hello world is the web scripting language of choice.', | ||
'/(hello) world/i', | ||
capture_groups([0, 1]) | ||
]; | ||
yield [ | ||
[ | ||
0 => 'web', | ||
1 => 'web', | ||
], | ||
'PHP is the web scripting language of choice.', | ||
'/(\bweb\b)/i', | ||
capture_groups([0, 1]) | ||
]; | ||
yield [ | ||
[ | ||
0 => 'PHP', | ||
'language' => 'PHP', | ||
], | ||
'PHP is the web scripting language of choice.', | ||
'/(?P<language>PHP)/', | ||
capture_groups([0, 'language']) | ||
]; | ||
yield [ | ||
[ | ||
0 => 'http://www.php.net', | ||
1 => 'www.php.net' | ||
], | ||
'http://www.php.net/index.html', | ||
'@^(?:http://)?([^/]+)@i', | ||
capture_groups([1]) | ||
]; | ||
} | ||
|
||
public function provideNonMatchingData(): iterable | ||
{ | ||
yield ['PHP is the web scripting language of choice.', '/php/']; | ||
yield ['PHP is the website scripting language of choice.', '/\bweb\b/i']; | ||
yield ['php is the web scripting language of choice.', '/PHP/']; | ||
yield ['hello', '/[^.]+\.[^.]+$/']; | ||
} | ||
} |