Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Skip over contents of RAWTEXT elements such as STYLE. #5145

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@
* unquoted values will appear in the output with double-quotes.
*
* @since 6.2.0
* @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive.
* @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE.
*/
class WP_HTML_Tag_Processor {
/**
Expand Down Expand Up @@ -568,7 +570,14 @@ public function next_tag( $query = null ) {
* of the tag name as a pre-check avoids a string allocation when it's not needed.
*/
$t = $this->html[ $this->tag_name_starts_at ];
if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) {
if (
! $this->is_closing_tag &&
(
'i' === $t || 'I' === $t ||
'n' === $t || 'N' === $t ||
's' === $t || 'S' === $t ||
't' === $t || 'T' === $t
) ) {
$tag_name = $this->get_tag();

if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
Expand All @@ -580,6 +589,25 @@ public function next_tag( $query = null ) {
) {
$this->bytes_already_parsed = strlen( $this->html );
return false;
} elseif (
(
'IFRAME' === $tag_name ||
'NOEMBED' === $tag_name ||
'NOFRAMES' === $tag_name ||
'NOSCRIPT' === $tag_name ||
'STYLE' === $tag_name
) &&
! $this->skip_rawtext( $tag_name )
) {
/*
* "XMP" should be here too but its rules are more complicated and require the
* complexity of the HTML Processor (it needs to close out any open P element,
* meaning it can't be skipped here or else the HTML Processor will lose its
* place). For now, it can be ignored as it's a rare HTML tag in practice and
* any normative HTML should be using PRE instead.
*/
$this->bytes_already_parsed = strlen( $this->html );
return false;
}
}
} while ( $already_found < $this->sought_match_offset );
Expand Down Expand Up @@ -710,15 +738,33 @@ public function release_bookmark( $name ) {
return true;
}

/**
* Skips contents of generic rawtext elements.
*
* @since 6.3.2
*
* @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm
*
* @param string $tag_name The uppercase tag name which will close the RAWTEXT region.
* @return bool Whether an end to the RAWTEXT region was found before the end of the document.
*/
private function skip_rawtext( $tag_name ) {
/*
* These two functions distinguish themselves on whether character references are
* decoded, and since functionality to read the inner markup isn't supported, it's
* not necessary to implement these two functions separately.
*/
return $this->skip_rcdata( $tag_name );
}

/**
* Skips contents of title and textarea tags.
* Skips contents of RCDATA elements, namely title and textarea tags.
*
* @since 6.2.0
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
*
* @param string $tag_name The lowercase tag name which will close the RCDATA region.
* @param string $tag_name The uppercase tag name which will close the RCDATA region.
* @return bool Whether an end to the RCDATA region was found before the end of the document.
*/
private function skip_rcdata( $tag_name ) {
Expand Down
37 changes: 37 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1871,6 +1871,43 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() {
);
}

/**
* @ticket 59292
*
* @covers WP_HTML_Tag_Processor::next_tag
*
* @dataProvider data_next_tag_ignores_contents_of_rawtext_tags
*
* @param string $rawtext_element_then_target_node HTML starting with a RAWTEXT-specifying element such as STYLE,
* then an element afterward containing the "target" attribute.
*/
public function test_next_tag_ignores_contents_of_rawtext_tags( $rawtext_element_then_target_node ) {
$processor = new WP_HTML_Tag_Processor( $rawtext_element_then_target_node );
$processor->next_tag();

$processor->next_tag();
$this->assertNotNull(
$processor->get_attribute( 'target' ),
"Expected to find element with target attribute but found {$processor->get_tag()} instead."
);
}

/**
* Data provider.
*
* @return array[].
*/
public function data_next_tag_ignores_contents_of_rawtext_tags() {
return array(
'IFRAME' => array( '<iframe><section>Inside</section></iframe><section target>' ),
'NOEMBED' => array( '<noembed><p></p></noembed><div target>' ),
'NOFRAMES' => array( '<noframes><p>Check the rules here.</p></noframes><div target>' ),
'NOSCRIPT' => array( '<noscript><span>This assumes that scripting mode is enabled.</span></noscript><p target>' ),
'STYLE' => array( '<style>* { margin: 0 }</style><div target>' ),
'STYLE hiding DIV' => array( '<style>li::before { content: "<div non-target>" }</style><div target>' ),
);
}

/**
* Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
*
Expand Down
Loading