Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Add explicit handling or failure for all tags #5762

Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 116 additions & 25 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -830,41 +830,132 @@ private function step_in_body() {
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
return true;
}

/*
* These tags require special handling in the 'in body' insertion mode
* but that handling hasn't yet been implemented.
*
* As the rules for each tag are implemented, the corresponding tag
* name should be removed from this list. An accompanying test should
* help ensure this list is maintained.
*
* @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags
*
* Since this switch structure throws a WP_HTML_Unsupported_Exception, it's
* possible to handle "any other start tag" and "any other end tag" below,
* as that guarantees execution doesn't proceed for the unimplemented tags.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
*/
switch ( $tag_name ) {
case 'APPLET':
case 'AREA':
case 'BASE':
case 'BASEFONT':
case 'BGSOUND':
case 'BODY':
case 'BR':
case 'CAPTION':
case 'COL':
case 'COLGROUP':
case 'DD':
case 'DT':
case 'EMBED':
case 'FORM':
case 'FRAME':
case 'FRAMESET':
case 'HEAD':
case 'HR':
case 'HTML':
case 'IFRAME':
case 'INPUT':
case 'KEYGEN':
case 'LI':
case 'LINK':
case 'LISTING':
case 'MARQUEE':
case 'MATH':
case 'META':
case 'NOBR':
case 'NOEMBED':
case 'NOFRAMES':
case 'NOSCRIPT':
case 'OBJECT':
case 'OL':
case 'OPTGROUP':
case 'OPTION':
case 'PARAM':
case 'PLAINTEXT':
case 'PRE':
case 'RB':
case 'RP':
case 'RT':
case 'RTC':
case 'SARCASM':
case 'SCRIPT':
case 'SELECT':
case 'SOURCE':
case 'STYLE':
case 'SVG':
case 'TABLE':
case 'TBODY':
case 'TD':
case 'TEMPLATE':
case 'TEXTAREA':
case 'TFOOT':
case 'TH':
case 'THEAD':
case 'TITLE':
case 'TR':
case 'TRACK':
case 'UL':
case 'WBR':
case 'XMP':
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." );
sirreal marked this conversation as resolved.
Show resolved Hide resolved
}

if ( ! $this->is_tag_closer() ) {
/*
* > Any other start tag
*/
case '+SPAN':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
return true;
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
return true;
} else {
/*
* > Any other end tag
*/

/*
* Any other end tag
* Find the corresponding tag opener in the stack of open elements, if
* it exists before reaching a special element, which provides a kind
* of boundary in the stack. For example, a `</custom-tag>` should not
* close anything beyond its containing `P` or `DIV` element.
*/
case '-SPAN':
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
// > If node is an HTML element with the same tag name as the token, then:
if ( $item->node_name === $tag_name ) {
$this->generate_implied_end_tags( $tag_name );
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
if ( $tag_name === $node->node_name ) {
break;
}

// > If node is not the current node, then this is a parse error.
if ( self::is_special( $node->node_name ) ) {
// This is a parse error, ignore the token.
return $this->step();
}
}

$this->state->stack_of_open_elements->pop_until( $tag_name );
return true;
}
$this->generate_implied_end_tags( $tag_name );
if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
// @todo Record parse error: this error doesn't impact parsing.
}

// > Otherwise, if node is in the special category, then this is a parse error; ignore the token, and return.
if ( self::is_special( $item->node_name ) ) {
return $this->step();
}
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
$this->state->stack_of_open_elements->pop();
if ( $node === $item ) {
return true;
}
// Execution should not reach here; if it does then something went wrong.
return false;

default:
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." );
}
}
}

Expand Down Expand Up @@ -1264,7 +1355,7 @@ private function run_adoption_agency_algorithm() {

// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) {
$this->state->active_formatting_elements->remove_node( $formatting_element->bookmark_name );
$this->state->active_formatting_elements->remove_node( $formatting_element );
return;
}

Expand Down
106 changes: 90 additions & 16 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,22 +60,6 @@ public function test_get_tag_is_null_once_document_is_finished() {
$this->assertNull( $p->get_tag() );
}

/**
* Ensures that if the HTML Processor encounters inputs that it can't properly handle,
* that it stops processing the rest of the document. This prevents data corruption.
*
* @ticket 59167
*
* @covers WP_HTML_Processor::next_tag
*/
public function test_stops_processing_after_unsupported_elements() {
$p = WP_HTML_Processor::create_fragment( '<p><x-not-supported></p><p></p>' );
$p->next_tag( 'P' );
$this->assertFalse( $p->next_tag(), 'Stepped into a tag after encountering X-NOT-SUPPORTED element when it should have aborted.' );
$this->assertNull( $p->get_tag(), "Should have aborted processing, but still reported tag {$p->get_tag()} after properly failing to step into tag." );
$this->assertFalse( $p->next_tag( 'P' ), 'Stepped into normal P element after X-NOT-SUPPORTED element when it should have aborted.' );
}

/**
* Ensures that the HTML Processor maintains its internal state through seek calls.
*
Expand Down Expand Up @@ -147,4 +131,94 @@ public function test_fails_to_reconstruct_formatting_elements() {
$this->assertTrue( $p->next_tag( 'EM' ), 'Could not find first EM.' );
$this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
}

/**
* Ensures that special handling of unsupported tags is cleaned up
* as handling is implemented. Otherwise there's risk of leaving special
* handling (that is never reached) when tag handling is implemented.
*
* @dataProvider data_unsupported_special_in_body_tags
*
* @param string $tag_name Name of the tag to test.
*
* @covers WP_HTML_Processor::step_in_body
*/
public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) {
$fragment = WP_HTML_Processor::create_fragment( '<' . $tag_name . '></' . $tag_name . '>' );
$this->assertFalse( $fragment->next_tag(), 'Should fail to find tag: ' . $tag_name . '.' );
$this->assertEquals( $fragment->get_last_error(), WP_HTML_Processor::ERROR_UNSUPPORTED, 'Should have unsupported last error.' );
}

/**
* Data provider.
*
* @return array[]
*/
public function data_unsupported_special_in_body_tags() {
return array(
'APPLET' => array( 'APPLET' ),
'AREA' => array( 'AREA' ),
'BASE' => array( 'BASE' ),
'BASEFONT' => array( 'BASEFONT' ),
'BGSOUND' => array( 'BGSOUND' ),
'BODY' => array( 'BODY' ),
'BR' => array( 'BR' ),
'CAPTION' => array( 'CAPTION' ),
'COL' => array( 'COL' ),
'COLGROUP' => array( 'COLGROUP' ),
'DD' => array( 'DD' ),
'DT' => array( 'DT' ),
'EMBED' => array( 'EMBED' ),
'FORM' => array( 'FORM' ),
'FRAME' => array( 'FRAME' ),
'FRAMESET' => array( 'FRAMESET' ),
'HEAD' => array( 'HEAD' ),
'HR' => array( 'HR' ),
'HTML' => array( 'HTML' ),
'IFRAME' => array( 'IFRAME' ),
'INPUT' => array( 'INPUT' ),
'KEYGEN' => array( 'KEYGEN' ),
'LI' => array( 'LI' ),
'LINK' => array( 'LINK' ),
'LISTING' => array( 'LISTING' ),
'MARQUEE' => array( 'MARQUEE' ),
'MATH' => array( 'MATH' ),
'META' => array( 'META' ),
'NOBR' => array( 'NOBR' ),
'NOEMBED' => array( 'NOEMBED' ),
'NOFRAMES' => array( 'NOFRAMES' ),
'NOSCRIPT' => array( 'NOSCRIPT' ),
'OBJECT' => array( 'OBJECT' ),
'OL' => array( 'OL' ),
'OPTGROUP' => array( 'OPTGROUP' ),
'OPTION' => array( 'OPTION' ),
'PARAM' => array( 'PARAM' ),
'PLAINTEXT' => array( 'PLAINTEXT' ),
'PRE' => array( 'PRE' ),
'RB' => array( 'RB' ),
'RP' => array( 'RP' ),
'RT' => array( 'RT' ),
'RTC' => array( 'RTC' ),
'SARCASM' => array( 'SARCASM' ),
'SCRIPT' => array( 'SCRIPT' ),
'SELECT' => array( 'SELECT' ),
'SOURCE' => array( 'SOURCE' ),
'STYLE' => array( 'STYLE' ),
'SVG' => array( 'SVG' ),
'TABLE' => array( 'TABLE' ),
'TBODY' => array( 'TBODY' ),
'TD' => array( 'TD' ),
'TEMPLATE' => array( 'TEMPLATE' ),
'TEXTAREA' => array( 'TEXTAREA' ),
'TFOOT' => array( 'TFOOT' ),
'TH' => array( 'TH' ),
'THEAD' => array( 'THEAD' ),
'TITLE' => array( 'TITLE' ),
'TR' => array( 'TR' ),
'TRACK' => array( 'TRACK' ),
'UL' => array( 'UL' ),
'WBR' => array( 'WBR' ),
'XMP' => array( 'XMP' ),
);
}
}
Loading
Loading