Skip to content

Commit

Permalink
Merge pull request #508 from PHPCSStandards/feature/tokenizer-php-har…
Browse files Browse the repository at this point in the history
…den-the-dnf-layer-some-more

Tokenizer/PHP: add extra hardening to the (DNF) type handling + efficiency improvement
  • Loading branch information
jrfnl committed May 22, 2024
2 parents 027c0cb + 83afad8 commit fb351b3
Show file tree
Hide file tree
Showing 5 changed files with 379 additions and 10 deletions.
37 changes: 27 additions & 10 deletions src/Tokenizers/PHP.php
Original file line number Diff line number Diff line change
Expand Up @@ -2602,7 +2602,9 @@ protected function processAdditional()

$this->createAttributesNestingMap();

$numTokens = count($this->tokens);
$numTokens = count($this->tokens);
$lastSeenTypeToken = $numTokens;

for ($i = ($numTokens - 1); $i >= 0; $i--) {
// Check for any unset scope conditions due to alternate IF/ENDIF syntax.
if (isset($this->tokens[$i]['scope_opener']) === true
Expand Down Expand Up @@ -3036,9 +3038,14 @@ protected function processAdditional()
continue;
} else if ($this->tokens[$i]['code'] === T_BITWISE_OR
|| $this->tokens[$i]['code'] === T_BITWISE_AND
|| $this->tokens[$i]['code'] === T_OPEN_PARENTHESIS
|| $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS
) {
if ($lastSeenTypeToken < $i) {
// We've already examined this code to check if it is a type declaration and concluded it wasn't.
// No need to do it again.
continue;
}

/*
Convert "|" to T_TYPE_UNION or leave as T_BITWISE_OR.
Convert "&" to T_TYPE_INTERSECTION or leave as T_BITWISE_AND.
Expand Down Expand Up @@ -3133,9 +3140,14 @@ protected function processAdditional()

$typeTokenCountBefore = 0;
$typeOperators = [$i];
$parenthesesCount = 0;
$confirmed = false;
$maybeNullable = null;

if ($this->tokens[$i]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
++$parenthesesCount;
}

for ($x = ($i - 1); $x >= 0; $x--) {
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
continue;
Expand Down Expand Up @@ -3167,7 +3179,7 @@ protected function processAdditional()
$confirmed = true;
break;
} else {
// This may still be an arrow function which hasn't be handled yet.
// This may still be an arrow function which hasn't been handled yet.
for ($y = ($x - 1); $y > 0; $y--) {
if (isset(Tokens::$emptyTokens[$this->tokens[$y]['code']]) === false
&& $this->tokens[$y]['code'] !== T_BITWISE_AND
Expand Down Expand Up @@ -3202,11 +3214,13 @@ protected function processAdditional()
continue;
}

if ($this->tokens[$x]['code'] === T_BITWISE_OR
|| $this->tokens[$x]['code'] === T_BITWISE_AND
|| $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
|| $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS
) {
if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) {
$typeOperators[] = $x;
continue;
}

if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) {
++$parenthesesCount;
$typeOperators[] = $x;
continue;
}
Expand Down Expand Up @@ -3244,6 +3258,9 @@ protected function processAdditional()
break;
}//end for

// Remember the last token we examined as part of the (non-)"type declaration".
$lastSeenTypeToken = $x;

if ($confirmed === false
&& $suspectedType === 'property or parameter'
&& isset($this->tokens[$i]['nested_parenthesis']) === true
Expand Down Expand Up @@ -3288,8 +3305,8 @@ protected function processAdditional()
unset($parens, $last);
}//end if

if ($confirmed === false) {
// Not a union or intersection type after all, move on.
if ($confirmed === false || ($parenthesesCount % 2) !== 0) {
// Not a (valid) union, intersection or DNF type after all, move on.
continue;
}

Expand Down
17 changes: 17 additions & 0 deletions tests/Core/Tokenizer/PHP/DNFTypesParseError1Test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
class ParseErrors {
/* testBrokenConstDNFTypeEndOnOpenParenthesis */
const A|(B PARSE_ERROR = null;

/* testBrokenPropertyDNFTypeEndOnOpenParenthesis */
public A|(B $parseError;

function unmatchedParens {
/* testBrokenParamDNFTypeEndOnOpenParenthesis */
A|(B $parseError,
/* testBrokenReturnDNFTypeEndOnOpenParenthesis */
) : A|(B {}
}
69 changes: 69 additions & 0 deletions tests/Core/Tokenizer/PHP/DNFTypesParseError1Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<?php
/**
* Tests that parentheses tokens are not converted to type parentheses tokens in broken DNF types.
*
* @author Juliette Reinders Folmer <phpcs_nospam@adviesenzo.nl>
* @copyright 2024 PHPCSStandards and contributors
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;

use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;

final class DNFTypesParseError1Test extends AbstractTokenizerTestCase
{


/**
* Document handling for a DNF type / parse error where the last significant type specific token is an open parenthesis.
*
* @param string $testMarker The comment prefacing the target token.
*
* @dataProvider dataBrokenDNFTypeCantEndOnOpenParenthesis
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
*
* @return void
*/
public function testBrokenDNFTypeCantEndOnOpenParenthesis($testMarker)
{
$tokens = $this->phpcsFile->getTokens();

$openPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
$token = $tokens[$openPtr];

// Verify that the open parenthesis is tokenized as a normal parenthesis.
$this->assertSame(T_OPEN_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (code)');
$this->assertSame('T_OPEN_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (type)');

// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
// is not recognized as a valid type declaration.
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
$token = $tokens[$unionPtr];

$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');

}//end testBrokenDNFTypeCantEndOnOpenParenthesis()


/**
* Data provider.
*
* @see testBrokenDNFTypeCantEndOnOpenParenthesis()
*
* @return array<string, array<string, string>>
*/
public static function dataBrokenDNFTypeCantEndOnOpenParenthesis()
{
return [
'OO const type' => ['/* testBrokenConstDNFTypeEndOnOpenParenthesis */'],
'OO property type' => ['/* testBrokenPropertyDNFTypeEndOnOpenParenthesis */'],
'Parameter type' => ['/* testBrokenParamDNFTypeEndOnOpenParenthesis */'],
'Return type' => ['/* testBrokenReturnDNFTypeEndOnOpenParenthesis */'],
];

}//end dataBrokenDNFTypeCantEndOnOpenParenthesis()


}//end class
48 changes: 48 additions & 0 deletions tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
//
// Also note that the order of these tests is deliberate to try and trick the parentheses handling
// in the Tokenizer class into matching parentheses pairs, even though the parentheses do
// no belong together.

class UnmatchedParentheses {
/* testBrokenConstDNFTypeParensMissingClose */
const A|(B&C PARSE_ERROR_1 = null;

/* testBrokenConstDNFTypeParensMissingOpen */
const A|B&C) PARSE_ERROR_2 = null;

/* testBrokenPropertyDNFTypeParensMissingClose */
private A|(B&C $parseError1;

/* testBrokenPropertyDNFTypeParensMissingOpen */
protected A|B&C) $parseError2;

function unmatchedParens1 (
/* testBrokenParamDNFTypeParensMissingClose */
A|(B&C $parseError,
/* testBrokenReturnDNFTypeParensMissingOpen */
) : A|B&C) {}

function unmatchedParens2 (
/* testBrokenParamDNFTypeParensMissingOpen */
A|B&C) $parseError
/* testBrokenReturnDNFTypeParensMissingClose */
) : A|(B&C {}
}

class MatchedAndUnmatchedParentheses {
/* testBrokenConstDNFTypeParensMissingOneClose */
const (A&B)|(B&C PARSE_ERROR = null;

/* testBrokenPropertyDNFTypeParensMissingOneOpen */
protected (A&B)|B&C) $parseError;

function unmatchedParens (
/* testBrokenParamDNFTypeParensMissingOneClose */
(A&B)|(B&C $parseError,
/* testBrokenReturnDNFTypeParensMissingOneOpen */
) : (A&B)|B&C) {}
}
Loading

0 comments on commit fb351b3

Please sign in to comment.