Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Implement reset_insertion_mode #6020

Closed
273 changes: 273 additions & 0 deletions src/wp-includes/html-api/class-wp-html-processor-state.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,114 @@ class WP_HTML_Processor_State {
*/
const INSERTION_MODE_IN_BODY = 'insertion-mode-in-body';

/**
* In select insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-inselect
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_SELECT = 'insertion-mode-in-select';

/**
* In select in table insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-inselectintable
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_SELECT_IN_TABLE = 'insertion-mode-in-select-in-table';

/**
* In table insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-intable
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_TABLE = 'insertion-mode-in-table';

/**
* In caption insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-incaption
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_CAPTION = 'insertion-mode-in-caption';

/**
* In table body insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-intablebody
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_TABLE_BODY = 'insertion-mode-in-table-body';

/**
* In row insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-inrow
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_ROW = 'insertion-mode-in-row';

/**
* In cell insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-incell
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_CELL = 'insertion-mode-in-cell';

/**
* In column group insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-incolumngroup
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_COLUMN_GROUP = 'insertion-mode-in-column-group';

/**
* In frameset insertion mode for full HTML parser.
*
* @since 6.5.0
*
* @see https://html.spec.whatwg.org/#parsing-main-inframeset
* @see WP_HTML_Processor_State::$insertion_mode
*
* @var string
*/
const INSERTION_MODE_IN_FRAMESET = 'insertion-mode-in-frameset';

/**
* Tracks open elements while scanning HTML.
*
Expand Down Expand Up @@ -140,4 +248,169 @@ public function __construct() {
$this->stack_of_open_elements = new WP_HTML_Open_Elements();
$this->active_formatting_elements = new WP_HTML_Active_Formatting_Elements();
}

/**
* Runs the reset the insertion mode appropriately algorithm.
*
* @since 6.7.0
*
* @throws WP_HTML_Unsupported_Exception When encoutering unsupported nodes.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately
*/
public function reset_insertion_mode(): void {
/*
* > 1. Let _last_ be false.
* > 2. Let _node_ be the last node in the stack of open elements.
* > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_
* > to true, and, if the parser was created as part of the HTML fragment parsing
* > algorithm (fragment case), set node to the context element passed to
* > that algorithm.
* > …
*/
$last = false;
$last_index = $this->stack_of_open_elements->count() - 1;
sirreal marked this conversation as resolved.
Show resolved Hide resolved
foreach ( $this->stack_of_open_elements->walk_up() as $i => $node ) {
if ( $i === $last_index ) {
sirreal marked this conversation as resolved.
Show resolved Hide resolved
$last = true;
}
switch ( $node->node_name ) {
/*
* > 4. If node is a `select` element, run these substeps:
* > 1. If _last_ is true, jump to the step below labeled done.
* > 2. Let _ancestor_ be _node_.
* > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements,
* > jump to the step below labeled done.
* > 4. Let ancestor be the node before ancestor in the stack of open elements.
* > …
* > 7. Jump back to the step labeled _loop_.
* > 8. _Done_: Switch the insertion mode to "in select" and return.
*/
case 'SELECT':
if ( ! $last ) {
foreach ( $this->stack_of_open_elements->walk_up( $node ) as $ancestor ) {
switch ( $ancestor->node_name ) {
/*
* > 5. If _ancestor_ is a `template` node, jump to the step below
* > labeled _done_.
*/
case 'TEMPLATE':
break 2;

/*
* > 6. If _ancestor_ is a `table` node, switch the insertion mode to
* > "in select in table" and return.
*/
case 'TABLE':
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE;
return;
}
}
}
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT;
return;

/*
* > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the
* > insertion mode to "in cell" and return.
*/
case 'TD':
case 'TH':
if ( ! $last ) {
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL;
return;
}
break;

/*
* > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row"
* > and return.
*/
case 'TR':
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
return;

/*
* > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the
* > insertion mode to "in table body" and return.
*/
case 'TBODY':
case 'THEAD':
case 'TFOOT':
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return;

/*
* > 8. If _node_ is a `caption` element, then switch the insertion mode to
* > "in caption" and return.
*/
case 'CAPTION':
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION;
return;

/*
* > 9. If _node_ is a `colgroup` element, then switch the insertion mode to
* > "in column group" and return.
*/
case 'COLGROUP':
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
return;

/*
* > 10. If _node_ is a `table` element, then switch the insertion mode to
* > "in table" and return.
*/
case 'TABLE':
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return;

/*
* > 11. If _node_ is a `template` element, then switch the insertion mode to the
* > current template insertion mode and return.
*/
case 'TEMPLATE':
throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at TEMPLATE node.' );

/*
* > 12. If _node_ is a `head` element and _last_ is false, then switch the
* > insertion mode to "in head" and return.
*/
case 'HEAD':
throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HEAD node.' );

/*
* > 13. If _node_ is a `body` element, then switch the insertion mode to "in body"
* > and return.
*/
case 'BODY':
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
return;

/*
* > 14. If _node_ is a `frameset` element, then switch the insertion mode to
* > "in frameset" and return. (fragment case)
*/
case 'FRAMESET':
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET;
return;

/*
* > 15. If _node_ is an `html` element, run these substeps:
* > 1. If the head element pointer is null, switch the insertion mode to
* > "before head" and return. (fragment case)
* > 2. Otherwise, the head element pointer is not null, switch the insertion mode to "after head" and return.
*/
case 'HTML':
throw new WP_HTML_Unsupported_Exception( 'Cannot reset insertion mode at HTML node.' );
}
}

/*
* > 16. If _last_ is true, then switch the insertion mode to "in body"
* > and return. (fragment case)
*
* `$last` will always be true here, we've reached the end of the stack.
*/
$this->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
}
}
107 changes: 107 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessorState.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<?php
/**
* Unit tests covering WP_HTML_Processor_State functionality.
*
* @package WordPress
* @subpackage HTML-API
*
* @since 6.7.0
*
* @group html-api
*
* @coversDefaultClass WP_HTML_Processor_State
*/
class Tests_HtmlApi_WpHtmlProcessorState extends WP_UnitTestCase {
/**
* @dataProvider data_insertion_mode_cases
*
* @ticket 61549
*
* @param array $stack_of_open_elements Stack of open elements.
* @param string $expected_insertion_mode Expected insertion mode after running the algorithm.
*/
public function test_reset_insertion_mode(
array $stack_of_open_elements,
string $expected_insertion_mode
): void {
$state = new WP_HTML_Processor_State();
$state->context_node = array( 'BODY', array() );

foreach ( $stack_of_open_elements as $i => $tag_name ) {
if ( ! ctype_upper( $tag_name ) ) {
throw new Error( 'Expected upper case tag names.' );
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like this error message. for one, it's testing the wrong thing in the wrong test. for two, it doesn't help explain why this test is failing.

what's the purpose of this? is it to get around markers and text nodes and comments, etc…?

}
$state->stack_of_open_elements->push( new WP_HTML_Token( $i, $tag_name, false ) );
}
$state->reset_insertion_mode();

$this->assertSame( $expected_insertion_mode, $state->insertion_mode );
}

/**
* Data provider.
*
* @return array[]
*/
public static function data_insertion_mode_cases(): array {
return array(
'SELECT last element' => array( array( 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ),
'SELECT' => array( array( 'HTML', 'BODY', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ),
'SELECT in table' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE ),
'SELECT in template in table' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD', 'TEMPLATE', 'SELECT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ),
'SELECT > OPTION' => array( array( 'HTML', 'BODY', 'SELECT', 'OPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ),
'SELECT > OPTGROUP > OPTION' => array( array( 'HTML', 'BODY', 'SELECT', 'OPTGROUP', 'OPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT ),
'TD' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CELL ),
'TD (last element)' => array( array( 'TD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ),
'TH' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR', 'TH' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CELL ),
'TH (last element)' => array( array( 'TH' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ),
'TR' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY', 'TR' ), WP_HTML_Processor_State::INSERTION_MODE_IN_ROW ),
'TBODY' => array( array( 'HTML', 'BODY', 'TABLE', 'TBODY' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ),
'THEAD' => array( array( 'HTML', 'BODY', 'TABLE', 'THEAD' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ),
'TFOOT' => array( array( 'HTML', 'BODY', 'TABLE', 'TFOOT' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY ),
'CAPTION' => array( array( 'HTML', 'BODY', 'TABLE', 'CAPTION' ), WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION ),
'COLGROUP' => array( array( 'HTML', 'BODY', 'TABLE', 'COLGROUP' ), WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP ),
'TABLE' => array( array( 'HTML', 'BODY', 'TABLE' ), WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE ),
'BODY' => array( array( 'HTML', 'BODY' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ),
'FRAMESET' => array( array( 'HTML', 'BODY', 'FRAMESET' ), WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET ),
'Last element (DIV)' => array( array( 'DIV' ), WP_HTML_Processor_State::INSERTION_MODE_IN_BODY ),
);
}

/**
* @dataProvider data_insertion_mode_unsupported
*
* @ticket 61549
*
* @param array $stack_of_open_elements Stack of open elements.
*/
public function test_reset_insertion_mode_unsupported( array $stack_of_open_elements ): void {
$this->expectException( WP_HTML_Unsupported_Exception::class );

$state = new WP_HTML_Processor_State();
$state->context_node = array( 'BODY', array() );

foreach ( $stack_of_open_elements as $i => $tag_name ) {
if ( ! ctype_upper( $tag_name ) ) {
throw new Error( 'Expected upper case tag names.' );
}
$state->stack_of_open_elements->push( new WP_HTML_Token( $i, $tag_name, false ) );
}
$state->reset_insertion_mode();
}

/**
* Data provider.
*
* These tests should be migrated to the supported tests as support for more elements is added.
*
* @return array[]
*/
public static function data_insertion_mode_unsupported(): array {
return array(
'TEMPLATE requires template insertion mode stack' => array( array( 'HTML', 'BODY', 'TEMPLATE' ) ),
'HEAD requires more insertion modes' => array( array( 'HTML', 'HEAD' ) ),
'HTML requires head pointer and insertion modes' => array( array( 'HTML' ) ),
);
}
}
Loading