Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tag Processor: Add ability to stop at tag closers, if requested #45789

Merged
merged 1 commit into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 103 additions & 34 deletions lib/experimental/html/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,14 @@ class WP_HTML_Tag_Processor {
*/
private $sought_match_offset;

/**
* Whether to visit tag closers, e.g. </div>, when walking an input document.
*
* @since 6.2.0
* @var boolean
*/
private $stop_on_tag_closers;

/**
* The updated HTML document.
*
Expand Down Expand Up @@ -276,6 +284,29 @@ class WP_HTML_Tag_Processor {
*/
private $tag_name_length;

/**
* Byte offset in input document where current tag token ends.
*
* Example:
* ```
* <div id="test">...
* 0 1 |
* 01234567890123456
* --- tag name ends at 14
* ```
*
* @since 6.2.0
* @var ?int
*/
private $tag_ends_at;

/**
* Whether the current tag is an opening tag, e.g. <div>, or a closing tag, e.g. </div>.
*
* @var boolean
*/
private $is_closing_tag;

/**
* Lazily-built index of attributes found within an HTML tag, keyed by the attribute name.
*
Expand Down Expand Up @@ -412,7 +443,16 @@ public function next_tag( $query = null ) {
return false;
}

$this->parse_tag_opener_attributes();
while ( $this->parse_next_attribute() ) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the inlining

continue;
}

$tag_ends_at = strpos( $this->html, '>', $this->parsed_bytes );
if ( false === $tag_ends_at ) {
return false;
}
$this->tag_ends_at = $tag_ends_at;
$this->parsed_bytes = $tag_ends_at;

if ( $this->matches() ) {
++$already_found;
Expand Down Expand Up @@ -495,7 +535,9 @@ private function skip_rcdata( $tag_name ) {
continue;
}

$this->skip_tag_closer_attributes();
while ( $this->parse_next_attribute() ) {
continue;
}
$at = $this->parsed_bytes;
if ( $at >= strlen( $this->html ) ) {
return false;
Expand Down Expand Up @@ -620,12 +662,14 @@ private function skip_script_data() {

if ( $is_closing ) {
$this->parsed_bytes = $at;
$this->skip_tag_closer_attributes();

if ( $this->parsed_bytes >= $doc_length ) {
return false;
}

while ( $this->parse_next_attribute() ) {
continue;
}

if ( '>' === $html[ $this->parsed_bytes ] ) {
++$this->parsed_bytes;
return true;
Expand Down Expand Up @@ -656,6 +700,13 @@ private function parse_next_tag() {
return false;
}

if ( '/' === $this->html[ $at + 1 ] ) {
$this->is_closing_tag = true;
$at++;
} else {
$this->is_closing_tag = false;
}

/*
* HTML tag names must start with [a-zA-Z] otherwise they are not tags.
* For example, "<3" is rendered as text, not a tag opener. This means
Expand Down Expand Up @@ -777,35 +828,12 @@ private function parse_next_tag() {
return false;
}

/**
* Parses all attributes of the current tag.
*
* @since 6.2.0
*/
private function parse_tag_opener_attributes() {
while ( $this->parse_next_attribute() ) {
continue;
}
}

/**
* Skips all attributes of the current tag.
*
* @since 6.2.0
*/
private function skip_tag_closer_attributes() {
while ( $this->parse_next_attribute( 'tag-closer' ) ) {
continue;
}
}

/**
* Parses the next attribute.
*
* @param string $context tag-opener or tag-closer.
* @since 6.2.0
*/
private function parse_next_attribute( $context = 'tag-opener' ) {
private function parse_next_attribute() {
// Skip whitespace and slashes.
$this->parsed_bytes += strspn( $this->html, " \t\f\r\n/", $this->parsed_bytes );
if ( $this->parsed_bytes >= strlen( $this->html ) ) {
Expand Down Expand Up @@ -872,7 +900,7 @@ private function parse_next_attribute( $context = 'tag-opener' ) {
return false;
}

if ( 'tag-opener' !== $context ) {
if ( $this->is_closing_tag ) {
return true;
}

Expand Down Expand Up @@ -914,6 +942,8 @@ private function after_tag() {
$this->apply_attributes_updates();
$this->tag_name_starts_at = null;
$this->tag_name_length = null;
$this->tag_ends_at = null;
$this->is_closing_tag = null;
$this->attributes = array();
}

Expand Down Expand Up @@ -1159,6 +1189,25 @@ public function get_tag() {
return strtoupper( $tag_name );
}

/**
* Indicates if the current tag token is a tag closer.
*
* Example:
* <code>
* $p = new WP_HTML_Tag_Processor( '<div></div>' );
* $p->next_tag( [ 'tag_name' => 'div', 'tag_closers' => 'visit' ] );
* $p->is_tag_closer() === false;
*
* $p->next_tag( [ 'tag_name' => 'div', 'tag_closers' => 'visit' ] );
* $p->is_tag_closer() === true;
* </code>
*
* @return bool
*/
public function is_tag_closer() {
return $this->is_closing_tag;
}

/**
* Updates or creates a new attribute on the currently matched tag with the value passed.
*
Expand All @@ -1175,8 +1224,8 @@ public function get_tag() {
* @throws Exception When WP_DEBUG is true and the attribute name is invalid.
*/
public function set_attribute( $name, $value ) {
if ( null === $this->tag_name_starts_at ) {
return;
if ( $this->is_closing_tag || null === $this->tag_name_starts_at ) {
return false;
}

/*
Expand Down Expand Up @@ -1286,8 +1335,8 @@ public function set_attribute( $name, $value ) {
* @param string $name The attribute name to remove.
*/
public function remove_attribute( $name ) {
if ( ! isset( $this->attributes[ $name ] ) ) {
return;
if ( $this->is_closing_tag || ! isset( $this->attributes[ $name ] ) ) {
return false;
}

/*
Expand Down Expand Up @@ -1316,6 +1365,10 @@ public function remove_attribute( $name ) {
* @param string $class_name The class name to add.
*/
public function add_class( $class_name ) {
if ( $this->is_closing_tag ) {
return false;
}

if ( null !== $this->tag_name_starts_at ) {
$this->classname_updates[ $class_name ] = self::ADD_CLASS;
}
Expand All @@ -1329,6 +1382,10 @@ public function add_class( $class_name ) {
* @param string $class_name The class name to remove.
*/
public function remove_class( $class_name ) {
if ( $this->is_closing_tag ) {
return false;
}

if ( null !== $this->tag_name_starts_at ) {
$this->classname_updates[ $class_name ] = self::REMOVE_CLASS;
}
Expand Down Expand Up @@ -1392,7 +1449,9 @@ public function get_updated_html() {

// Parse the attributes in the updated markup.
$this->attributes = array();
$this->parse_tag_opener_attributes();
while ( $this->parse_next_attribute() ) {
continue;
}

return $this->html;
}
Expand All @@ -1407,6 +1466,7 @@ public function get_updated_html() {
*
* @type string|null $tag_name Which tag to find, or `null` for "any tag."
* @type string|null $class_name Tag must contain this class name to match.
* @type string $tag_closers "visit" or "skip": whether to stop on tag closers, e.g. </div>.
* }
*/
private function parse_query( $query ) {
Expand All @@ -1418,6 +1478,7 @@ private function parse_query( $query ) {
$this->sought_tag_name = null;
$this->sought_class_name = null;
$this->sought_match_offset = 1;
$this->stop_on_tag_closers = false;

// A single string value means "find the tag of this name".
if ( is_string( $query ) ) {
Expand All @@ -1441,6 +1502,10 @@ private function parse_query( $query ) {
if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) {
$this->sought_match_offset = $query['match_offset'];
}

if ( isset( $query['tag_closers'] ) ) {
$this->stop_on_tag_closers = 'visit' === $query['tag_closers'];
}
}


Expand All @@ -1452,6 +1517,10 @@ private function parse_query( $query ) {
* @return boolean
*/
private function matches() {
if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) {
return false;
}

// Do we match a case-insensitive HTML tag name?
if ( null !== $this->sought_tag_name ) {
/*
Expand Down
56 changes: 56 additions & 0 deletions phpunit/html/wp-html-tag-processor-test.php
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,35 @@ public function test_next_tag_should_return_false_for_a_non_existing_tag() {
$this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
}

/**
* @covers next_tag
* @covers is_tag_closer
*/
public function test_next_tag_should_stop_on_closers_only_when_requested() {
$p = new WP_HTML_Tag_Processor( '<div><img /></div>' );
$this->assertTrue( $p->next_tag( array( 'tag_name' => 'div' ) ), 'Did not find desired tag opener' );
$this->assertFalse( $p->next_tag( array( 'tag_name' => 'div' ) ), 'Visited an unwanted tag, a tag closer' );

$p = new WP_HTML_Tag_Processor( '<div><img /></div>' );
$p->next_tag(
array(
'tag_name' => 'div',
'tag_closers' => 'visit',
)
);
$this->assertFalse( $p->is_tag_closer(), 'Indicated a tag opener is a tag closer' );
$this->assertTrue(
$p->next_tag(
array(
'tag_name' => 'div',
'tag_closers' => 'visit',
)
),
'Did not stop at desired tag closer'
);
$this->assertTrue( $p->is_tag_closer(), 'Indicated a tag closer is a tag opener' );
}

/**
* @ticket 56299
*
Expand All @@ -255,6 +284,33 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar
);
}

public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
$p = new WP_HTML_Tag_Processor( '<div id=3></div invalid-id=4>' );
$p->next_tag(
array(
'tag_name' => 'div',
'tag_closers' => 'visit',
)
);
$this->assertFalse( $p->is_tag_closer(), 'Skipped tag opener' );
$p->next_tag(
array(
'tag_name' => 'div',
'tag_closers' => 'visit',
)
);
$this->assertTrue( $p->is_tag_closer(), 'Skipped tag closer' );
$this->assertFalse( $p->set_attribute( 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" );
$this->assertFalse( $p->remove_attribute( 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" );
$this->assertFalse( $p->add_class( 'sneaky' ), "Allowed adding a class on a tag closer when it shouldn't have" );
$this->assertFalse( $p->remove_class( 'not-appearing-in-this-test' ), "Allowed removing a class on a tag closer when it shouldn't have" );
$this->assertSame(
'<div id=3></div invalid-id=4>',
$p->get_updated_html(),
'Calling get_updated_html after updating a non-existing tag returned an HTML that was different from the original HTML'
);
}

/**
* Passing a double quote inside of an attribute values could lead to an XSS attack as follows:
*
Expand Down