From f35453b7efe8128f7672620c2581d81472ee41f0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 13 Dec 2023 17:37:42 +0100 Subject: [PATCH 01/18] Add throw on every special element --- .../html-api/class-wp-html-processor.php | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e46c368c702d4..3d285ade80713 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -603,6 +603,185 @@ private function step_in_body() { $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { + /* + * These tags require special handling in the 'in body' insertion mode + * but do not have special handling implemented yet. + * + * We throw the WP_HTML_Unsupported_Exception so we're free to implememnt + * the catch-all handling for any other start and end tag. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody + */ + case '+A': + case '+ADDRESS': + case '+APPLET': + case '+AREA': + case '+ARTICLE': + case '+ASIDE': + case '+B': + case '+BASE': + case '+BASEFONT': + case '+BGSOUND': + case '+BIG': + case '+BLOCKQUOTE': + case '+BODY': + case '+BR': + case '+BUTTON': + case '+CAPTION': + case '+CENTER': + case '+CODE': + case '+COL': + case '+COLGROUP': + case '+DD': + case '+DETAILS': + case '+DIALOG': + case '+DIR': + case '+DIV': + case '+DL': + case '+DT': + case '+EM': + case '+EMBED': + case '+FIELDSET': + case '+FIGCAPTION': + case '+FIGURE': + case '+FONT': + case '+FOOTER': + case '+FORM': + case '+FRAME': + case '+FRAMESET': + case '+H1': + case '+H2': + case '+H3': + case '+H4': + case '+H5': + case '+H6': + case '+HEAD': + case '+HEADER': + case '+HGROUP': + case '+HR': + case '+HTML': + case '+I': + case '+IFRAME': + case '+IMAGE': + case '+IMG': + case '+INPUT': + case '+KEYGEN': + case '+LI': + case '+LINK': + case '+LISTING': + case '+MAIN': + case '+MARQUEE': + case '+MATH': + case '+MENU': + case '+META': + case '+NAV': + case '+NOBR': + case '+NOEMBED': + case '+NOFRAMES': + case '+NOSCRIPT': // scripting flag?? + case '+OBJECT': + case '+OL': + case '+OPTGROUP': + case '+OPTION': + case '+P': + case '+PARAM': + case '+PLAINTEXT': + case '+PRE': + case '+RB': + case '+RP': + case '+RT': + case '+RTC': + case '+S': + case '+SCRIPT': + case '+SEARCH': + case '+SECTION': + case '+SELECT': + case '+SMALL': + case '+SOURCE': + case '+STRIKE': + case '+STRONG': + case '+STYLE': + case '+SUMMARY': + case '+SVG': + case '+TABLE': + case '+TBODY': + case '+TD': + case '+TEMPLATE': + case '+TEXTAREA': + case '+TFOOT': + case '+TH': + case '+THEAD': + case '+TITLE': + case '+TR': + case '+TRACK': + case '+TT': + case '+U': + case '+UL': + case '+WBR': + case '+XMP': + case '-A': + case '-ADDRESS': + case '-APPLET': + case '-ARTICLE': + case '-ASIDE': + case '-B': + case '-BIG': + case '-BLOCKQUOTE': + case '-BODY': + case '-BR': + case '-BUTTON': + case '-CENTER': + case '-CODE': + case '-DD': + case '-DETAILS': + case '-DIALOG': + case '-DIR': + case '-DIV': + case '-DL': + case '-DT': + case '-EM': + case '-FIELDSET': + case '-FIGCAPTION': + case '-FIGURE': + case '-FONT': + case '-FOOTER': + case '-FORM': + case '-H1': + case '-H2': + case '-H3': + case '-H4': + case '-H5': + case '-H6': + case '-HEADER': + case '-HGROUP': + case '-HTML': + case '-I': + case '-LI': + case '-LISTING': + case '-MAIN': + case '-MARQUEE': + case '-MENU': + case '-NAV': + case '-NOBR': + case '-OBJECT': + case '-OL': + case '-P': + case '-PRE': + case '-S': + case '-SEARCH': + case '-SECTION': + case '-SMALL': + case '-STRIKE': + case '-STRONG': + case '-SUMMARY': + case '-TEMPLATE': + case '-TT': + case '-U': + case '-UL': + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + + /* * > A start tag whose tag name is "button" */ From dbf5db80685a3c680a3d32e0e7fe268688c29c83 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 13 Dec 2023 17:57:56 +0100 Subject: [PATCH 02/18] Remove handled tags --- .../html-api/class-wp-html-processor.php | 73 ------------------- 1 file changed, 73 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 3d285ade80713..f7f874e86f69c 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -612,40 +612,20 @@ private function step_in_body() { * * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody */ - case '+A': - case '+ADDRESS': case '+APPLET': case '+AREA': - case '+ARTICLE': - case '+ASIDE': - case '+B': case '+BASE': case '+BASEFONT': case '+BGSOUND': - case '+BIG': case '+BLOCKQUOTE': case '+BODY': case '+BR': - case '+BUTTON': case '+CAPTION': - case '+CENTER': - case '+CODE': case '+COL': case '+COLGROUP': case '+DD': - case '+DETAILS': - case '+DIALOG': - case '+DIR': - case '+DIV': - case '+DL': case '+DT': - case '+EM': case '+EMBED': - case '+FIELDSET': - case '+FIGCAPTION': - case '+FIGURE': - case '+FONT': - case '+FOOTER': case '+FORM': case '+FRAME': case '+FRAMESET': @@ -656,25 +636,18 @@ private function step_in_body() { case '+H5': case '+H6': case '+HEAD': - case '+HEADER': - case '+HGROUP': case '+HR': case '+HTML': - case '+I': case '+IFRAME': case '+IMAGE': - case '+IMG': case '+INPUT': case '+KEYGEN': case '+LI': case '+LINK': case '+LISTING': - case '+MAIN': case '+MARQUEE': case '+MATH': - case '+MENU': case '+META': - case '+NAV': case '+NOBR': case '+NOEMBED': case '+NOFRAMES': @@ -683,7 +656,6 @@ private function step_in_body() { case '+OL': case '+OPTGROUP': case '+OPTION': - case '+P': case '+PARAM': case '+PLAINTEXT': case '+PRE': @@ -691,17 +663,10 @@ private function step_in_body() { case '+RP': case '+RT': case '+RTC': - case '+S': case '+SCRIPT': - case '+SEARCH': - case '+SECTION': case '+SELECT': - case '+SMALL': case '+SOURCE': - case '+STRIKE': - case '+STRONG': case '+STYLE': - case '+SUMMARY': case '+SVG': case '+TABLE': case '+TBODY': @@ -714,37 +679,15 @@ private function step_in_body() { case '+TITLE': case '+TR': case '+TRACK': - case '+TT': - case '+U': case '+UL': case '+WBR': case '+XMP': - case '-A': - case '-ADDRESS': case '-APPLET': - case '-ARTICLE': - case '-ASIDE': - case '-B': - case '-BIG': case '-BLOCKQUOTE': case '-BODY': case '-BR': - case '-BUTTON': - case '-CENTER': - case '-CODE': case '-DD': - case '-DETAILS': - case '-DIALOG': - case '-DIR': - case '-DIV': - case '-DL': case '-DT': - case '-EM': - case '-FIELDSET': - case '-FIGCAPTION': - case '-FIGURE': - case '-FONT': - case '-FOOTER': case '-FORM': case '-H1': case '-H2': @@ -752,31 +695,15 @@ private function step_in_body() { case '-H4': case '-H5': case '-H6': - case '-HEADER': - case '-HGROUP': case '-HTML': - case '-I': case '-LI': case '-LISTING': - case '-MAIN': case '-MARQUEE': - case '-MENU': - case '-NAV': case '-NOBR': case '-OBJECT': case '-OL': - case '-P': case '-PRE': - case '-S': - case '-SEARCH': - case '-SECTION': - case '-SMALL': - case '-STRIKE': - case '-STRONG': - case '-SUMMARY': case '-TEMPLATE': - case '-TT': - case '-U': case '-UL': $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); From 1e92ec9919477365e88ac4f09c446f10d2a7abec Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 13 Dec 2023 19:13:50 +0100 Subject: [PATCH 03/18] Rework to use tag name --- .../html-api/class-wp-html-processor.php | 193 ++++++++---------- 1 file changed, 86 insertions(+), 107 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index f7f874e86f69c..71615912a7f67 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -603,112 +603,6 @@ private function step_in_body() { $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { - /* - * These tags require special handling in the 'in body' insertion mode - * but do not have special handling implemented yet. - * - * We throw the WP_HTML_Unsupported_Exception so we're free to implememnt - * the catch-all handling for any other start and end tag. - * - * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody - */ - case '+APPLET': - case '+AREA': - case '+BASE': - case '+BASEFONT': - case '+BGSOUND': - case '+BLOCKQUOTE': - case '+BODY': - case '+BR': - case '+CAPTION': - case '+COL': - case '+COLGROUP': - case '+DD': - case '+DT': - case '+EMBED': - case '+FORM': - case '+FRAME': - case '+FRAMESET': - case '+H1': - case '+H2': - case '+H3': - case '+H4': - case '+H5': - case '+H6': - case '+HEAD': - case '+HR': - case '+HTML': - case '+IFRAME': - case '+IMAGE': - case '+INPUT': - case '+KEYGEN': - case '+LI': - case '+LINK': - case '+LISTING': - case '+MARQUEE': - case '+MATH': - case '+META': - case '+NOBR': - case '+NOEMBED': - case '+NOFRAMES': - case '+NOSCRIPT': // scripting flag?? - case '+OBJECT': - case '+OL': - case '+OPTGROUP': - case '+OPTION': - case '+PARAM': - case '+PLAINTEXT': - case '+PRE': - case '+RB': - case '+RP': - case '+RT': - case '+RTC': - case '+SCRIPT': - case '+SELECT': - case '+SOURCE': - case '+STYLE': - case '+SVG': - case '+TABLE': - case '+TBODY': - case '+TD': - case '+TEMPLATE': - case '+TEXTAREA': - case '+TFOOT': - case '+TH': - case '+THEAD': - case '+TITLE': - case '+TR': - case '+TRACK': - case '+UL': - case '+WBR': - case '+XMP': - case '-APPLET': - case '-BLOCKQUOTE': - case '-BODY': - case '-BR': - case '-DD': - case '-DT': - case '-FORM': - case '-H1': - case '-H2': - case '-H3': - case '-H4': - case '-H5': - case '-H6': - case '-HTML': - case '-LI': - case '-LISTING': - case '-MARQUEE': - case '-NOBR': - case '-OBJECT': - case '-OL': - case '-PRE': - case '-TEMPLATE': - case '-UL': - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); - - /* * > A start tag whose tag name is "button" */ @@ -968,10 +862,95 @@ private function step_in_body() { // Execution should not reach here; if it does then something went wrong. return false; - default: + } + + /* + * These tags require special handling in the 'in body' insertion mode + * that has not been implemented yet. + * + * As they're implemented, they should be removed from this list. An accompanying + * test should help with this. + * + * We throw the WP_HTML_Unsupported_Exception so we're free to implememnt + * the catch-all handling for any other start and end tag. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody + */ + switch ( $tag_name ) { + case 'APPLET': + case 'AREA': + case 'BASE': + case 'BASEFONT': + case 'BGSOUND': + case 'BODY': + case 'BR': + case 'CAPTION': + case 'COL': + case 'COLGROUP': + case 'DD': + case 'DT': + case 'EMBED': + case 'FORM': + case 'FRAME': + case 'FRAMESET': + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + case 'HEAD': + case 'HR': + case 'HTML': + case 'IFRAME': + case 'INPUT': + case 'KEYGEN': + case 'LI': + case 'LINK': + case 'LISTING': + case 'MARQUEE': + case 'MATH': + case 'META': + case 'NOBR': + case 'NOEMBED': + case 'NOFRAMES': + case 'NOSCRIPT': + case 'OBJECT': + case 'OL': + case 'OPTGROUP': + case 'OPTION': + case 'PARAM': + case 'PLAINTEXT': + case 'PRE': + case 'RB': + case 'RP': + case 'RT': + case 'RTC': + case 'SCRIPT': + case 'SELECT': + case 'SOURCE': + case 'STYLE': + case 'SVG': + case 'TABLE': + case 'TBODY': + case 'TD': + case 'TEMPLATE': + case 'TEXTAREA': + case 'TFOOT': + case 'TH': + case 'THEAD': + case 'TITLE': + case 'TR': + case 'TRACK': + case 'UL': + case 'WBR': + case 'XMP': $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); } + + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); } /* From 31abb4340503a6c2c9c8c4a98120a1411daa1caa Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 13 Dec 2023 19:14:14 +0100 Subject: [PATCH 04/18] Add tests --- .../tests/html-api/wpHtmlProcessor.php | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index a9af5d790fc53..fe39b592cddb5 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -147,4 +147,99 @@ public function test_fails_to_reconstruct_formatting_elements() { $this->assertTrue( $p->next_tag( 'EM' ), 'Could not find first EM.' ); $this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' ); } + + /** + * Ensures that special handling of unsupported tags is cleaned up + * as handling is implemented. Otherwise there's risk of leaving special + * handling (that is never reached) when tag handling is implemented. + * + * @dataProvider data_unsupported_special_in_body_tags + * + * @param string $tag_name Name of the tag to test. + * + * @covers WP_HTML_Processor::step_in_body + */ + public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) { + $fragment = WP_HTML_Processor::create_fragment( '<' . $tag_name . '>' ); + $this->assertFalse( $fragment->next_tag(), 'Should fail to find tag: ' . $tag_name . '.' ); + $this->assertEquals( $fragment->get_last_error(), WP_HTML_Processor::ERROR_UNSUPPORTED, 'Should have unsupported last error.' ); + } + + /** + * Data provider. + * + * @return array[] + */ + public function data_unsupported_special_in_body_tags() { + return array( + array( 'APPLET' ), + array( 'AREA' ), + array( 'BASE' ), + array( 'BASEFONT' ), + array( 'BGSOUND' ), + array( 'BODY' ), + array( 'BR' ), + array( 'CAPTION' ), + array( 'COL' ), + array( 'COLGROUP' ), + array( 'DD' ), + array( 'DT' ), + array( 'EMBED' ), + array( 'FORM' ), + array( 'FRAME' ), + array( 'FRAMESET' ), + array( 'H1' ), + array( 'H2' ), + array( 'H3' ), + array( 'H4' ), + array( 'H5' ), + array( 'H6' ), + array( 'HEAD' ), + array( 'HR' ), + array( 'HTML' ), + array( 'IFRAME' ), + array( 'INPUT' ), + array( 'KEYGEN' ), + array( 'LI' ), + array( 'LINK' ), + array( 'LISTING' ), + array( 'MARQUEE' ), + array( 'MATH' ), + array( 'META' ), + array( 'NOBR' ), + array( 'NOEMBED' ), + array( 'NOFRAMES' ), + array( 'NOSCRIPT' ), + array( 'OBJECT' ), + array( 'OL' ), + array( 'OPTGROUP' ), + array( 'OPTION' ), + array( 'PARAM' ), + array( 'PLAINTEXT' ), + array( 'PRE' ), + array( 'RB' ), + array( 'RP' ), + array( 'RT' ), + array( 'RTC' ), + array( 'SCRIPT' ), + array( 'SELECT' ), + array( 'SOURCE' ), + array( 'STYLE' ), + array( 'SVG' ), + array( 'TABLE' ), + array( 'TBODY' ), + array( 'TD' ), + array( 'TEMPLATE' ), + array( 'TEXTAREA' ), + array( 'TFOOT' ), + array( 'TH' ), + array( 'THEAD' ), + array( 'TITLE' ), + array( 'TR' ), + array( 'TRACK' ), + array( 'UL' ), + array( 'WBR' ), + array( 'XMP' ), + ); + } } From 2eb3c97658341a9512554923d1f9e7440ab2bd20 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 13 Dec 2023 19:18:36 +0100 Subject: [PATCH 05/18] Remove H1-H6 tests and handling (now supported) --- src/wp-includes/html-api/class-wp-html-processor.php | 6 ------ tests/phpunit/tests/html-api/wpHtmlProcessor.php | 6 ------ 2 files changed, 12 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 71615912a7f67..4d9fc86202897 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -893,12 +893,6 @@ private function step_in_body() { case 'FORM': case 'FRAME': case 'FRAMESET': - case 'H1': - case 'H2': - case 'H3': - case 'H4': - case 'H5': - case 'H6': case 'HEAD': case 'HR': case 'HTML': diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index fe39b592cddb5..21a34490ac4d5 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -188,12 +188,6 @@ public function data_unsupported_special_in_body_tags() { array( 'FORM' ), array( 'FRAME' ), array( 'FRAMESET' ), - array( 'H1' ), - array( 'H2' ), - array( 'H3' ), - array( 'H4' ), - array( 'H5' ), - array( 'H6' ), array( 'HEAD' ), array( 'HR' ), array( 'HTML' ), From ade089ebbaad403e140f437fdb63c8d0b22ec5f8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 13 Dec 2023 19:20:51 +0100 Subject: [PATCH 06/18] Add note mentioning test --- src/wp-includes/html-api/class-wp-html-processor.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4d9fc86202897..176686140fcdd 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -869,7 +869,9 @@ private function step_in_body() { * that has not been implemented yet. * * As they're implemented, they should be removed from this list. An accompanying - * test should help with this. + * test should help ensure this list is maintained. + * + * @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags * * We throw the WP_HTML_Unsupported_Exception so we're free to implememnt * the catch-all handling for any other start and end tag. From 2a8fc4640c33a74b0c12699d221651fafd4758dc Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 18 Dec 2023 13:09:15 -0600 Subject: [PATCH 07/18] Add support for "any other tag" categories in IN BODY --- .../html-api/class-wp-html-processor.php | 43 +++++++++++++++++++ .../tests/html-api/wpHtmlProcessor.php | 16 ------- .../html-api/wpHtmlProcessorBreadcrumbs.php | 8 ++-- 3 files changed, 47 insertions(+), 20 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 176686140fcdd..ea9e33ed2829d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -879,6 +879,8 @@ private function step_in_body() { * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody */ switch ( $tag_name ) { + case 'ABBR': + case 'ACRONYM': case 'APPLET': case 'AREA': case 'BASE': @@ -945,6 +947,47 @@ private function step_in_body() { throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); } + if ( ! $this->is_tag_closer() ) { + // > Any other start tag. + $this->reconstruct_active_formatting_elements(); + $this->insert_html_element( $this->state->current_token ); + return true; + } else { + // > Any other end tag + $node = $this->state->stack_of_open_elements->current_node(); + + in_body_any_other_end_tag_loop: + if ( $tag_name === $node->node_name ) { + $this->generate_implied_end_tags( $tag_name ); + if ( $node !== $this->state->stack_of_open_elements->current_node() ) { + // @todo Record parse error: this error doesn't impact parsing. + } + $pop_count = 0; + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + ++$pop_count; + if ( $node === $item ) { + break; + } + } + while ( $pop_count-- > 0 ) { + $this->state->stack_of_open_elements->pop(); + } + return true; + } elseif ( self::is_special( $node->node_name ) ) { + // This is a parse error, ignore the token. + return $this->step(); + } + $one_shot = false; + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + if ( $one_shot ) { + $node = $item; + goto in_body_any_other_end_tag_loop; + } + + $one_shot = true; + } + } + $this->last_error = self::ERROR_UNSUPPORTED; throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 21a34490ac4d5..622d52ac47871 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -60,22 +60,6 @@ public function test_get_tag_is_null_once_document_is_finished() { $this->assertNull( $p->get_tag() ); } - /** - * Ensures that if the HTML Processor encounters inputs that it can't properly handle, - * that it stops processing the rest of the document. This prevents data corruption. - * - * @ticket 59167 - * - * @covers WP_HTML_Processor::next_tag - */ - public function test_stops_processing_after_unsupported_elements() { - $p = WP_HTML_Processor::create_fragment( '

' ); - $p->next_tag( 'P' ); - $this->assertFalse( $p->next_tag(), 'Stepped into a tag after encountering X-NOT-SUPPORTED element when it should have aborted.' ); - $this->assertNull( $p->get_tag(), "Should have aborted processing, but still reported tag {$p->get_tag()} after properly failing to step into tag." ); - $this->assertFalse( $p->next_tag( 'P' ), 'Stepped into normal P element after X-NOT-SUPPORTED element when it should have aborted.' ); - } - /** * Ensures that the HTML Processor maintains its internal state through seek calls. * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 2fd852e434412..e758bf72cd2f5 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -216,10 +216,6 @@ public function data_unsupported_elements() { 'VIDEO', 'WBR', 'XMP', // Deprecated, use PRE instead. - - // Made up elements, custom elements. - 'X-NOT-AN-HTML-ELEMENT', - 'HUMAN-TIME', ); $data = array(); @@ -360,6 +356,10 @@ public function data_html_target_with_breadcrumbs() { 'H4 inside H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H2', 'SPAN', 'H4' ), 1 ), 'H5 after unclosed H4 inside H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H2', 'SPAN', 'H5' ), 1 ), 'H5 after H4 inside H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H5' ), 1 ), + + // Custom elements. + 'WP-EMOJI' => array( '
', array( 'HTML', 'BODY', 'DIV', 'WP-EMOJI' ), 1 ), + 'WP-EMOJI then IMG' => array( '
', array( 'HTML', 'BODY', 'DIV', 'IMG' ), 1 ), ); } From eb99542e5a638b3ef1156b9316f0b2a21fdde731 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 18 Dec 2023 14:22:26 -0600 Subject: [PATCH 08/18] Update more tests, more checks, add SARCASM --- .../html-api/class-wp-html-processor.php | 3 +- .../tests/html-api/wpHtmlProcessor.php | 1 + .../html-api/wpHtmlProcessorBreadcrumbs.php | 70 +++++++------- .../wpHtmlSupportRequiredOpenElements.php | 91 +++++-------------- 4 files changed, 58 insertions(+), 107 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ea9e33ed2829d..4a3ccb94acf14 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -879,8 +879,6 @@ private function step_in_body() { * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody */ switch ( $tag_name ) { - case 'ABBR': - case 'ACRONYM': case 'APPLET': case 'AREA': case 'BASE': @@ -924,6 +922,7 @@ private function step_in_body() { case 'RP': case 'RT': case 'RTC': + case 'SARCASM': case 'SCRIPT': case 'SELECT': case 'SOURCE': diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 622d52ac47871..2dbd76b802959 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -199,6 +199,7 @@ public function data_unsupported_special_in_body_tags() { array( 'RP' ), array( 'RT' ), array( 'RTC' ), + array( 'SARCASM' ), array( 'SCRIPT' ), array( 'SELECT' ), array( 'SOURCE' ), diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index e758bf72cd2f5..9a9936a599118 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -37,14 +37,26 @@ public function test_navigates_into_normative_html_for_supported_elements( $html public function data_single_tag_of_supported_elements() { $supported_elements = array( 'A', + 'ABBR', + 'ACRONYM', // Neutralized 'ADDRESS', 'ARTICLE', 'ASIDE', + 'AUDIO', 'B', + 'BDI', + 'BDO', 'BIG', + 'BLINK', // Deprecated 'BUTTON', + 'CANVAS', 'CENTER', // Neutralized + 'CITE', 'CODE', + 'DATA', + 'DATALIST', + 'DEFN', + 'DEL', 'DETAILS', 'DIALOG', 'DIR', @@ -66,19 +78,42 @@ public function data_single_tag_of_supported_elements() { 'HGROUP', 'I', 'IMG', + 'INS', + 'ISINDEX', // Deprecated + 'KBD', + 'LABEL', + 'LEGEND', 'MAIN', + 'MAP', + 'MARK', 'MENU', + 'METER', + 'MULTICOL', // Deprecated 'NAV', + 'NEXTID',// Deprecated + 'OUTPUT', 'P', + 'PICTURE', + 'PROGRESS', + 'Q', + 'RUBY', + 'SAMP', 'SEARCH', 'SECTION', + 'SLOT', 'SMALL', + 'SPACER', // Deprecated 'SPAN', 'STRIKE', 'STRONG', + 'SUB', 'SUMMARY', + 'SUP', + 'TIME', 'TT', 'U', + 'VAR', + 'VIDEO', ); $data = array(); @@ -121,28 +156,16 @@ public function test_fails_when_encountering_unsupported_tag( $html ) { */ public function data_unsupported_elements() { $unsupported_elements = array( - 'ABBR', - 'ACRONYM', // Neutralized 'APPLET', // Deprecated 'AREA', - 'AUDIO', 'BASE', - 'BDI', - 'BDO', 'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal. - 'BLINK', // Deprecated 'BODY', 'BR', - 'CANVAS', 'CAPTION', - 'CITE', 'COL', 'COLGROUP', - 'DATA', - 'DATALIST', 'DD', - 'DEL', - 'DEFN', 'DT', 'EMBED', 'FORM', @@ -153,23 +176,13 @@ public function data_unsupported_elements() { 'HTML', 'IFRAME', 'INPUT', - 'INS', - 'ISINDEX', // Deprecated - 'KBD', 'KEYGEN', // Deprecated; void - 'LABEL', - 'LEGEND', 'LI', 'LINK', 'LISTING', // Deprecated, use PRE instead. - 'MAP', - 'MARK', 'MARQUEE', // Deprecated 'MATH', 'META', - 'METER', - 'MULTICOL', // Deprecated - 'NEXTID', // Deprecated 'NOBR', // Neutralized 'NOEMBED', // Neutralized 'NOFRAMES', // Neutralized @@ -178,26 +191,16 @@ public function data_unsupported_elements() { 'OL', 'OPTGROUP', 'OPTION', - 'OUTPUT', - 'PICTURE', 'PLAINTEXT', // Neutralized 'PRE', - 'PROGRESS', - 'Q', 'RB', // Neutralized 'RP', 'RT', 'RTC', // Neutralized - 'RUBY', - 'SAMP', 'SCRIPT', 'SELECT', - 'SLOT', 'SOURCE', - 'SPACER', // Deprecated 'STYLE', - 'SUB', - 'SUP', 'SVG', 'TABLE', 'TBODY', @@ -207,13 +210,10 @@ public function data_unsupported_elements() { 'TFOOT', 'TH', 'THEAD', - 'TIME', 'TITLE', 'TR', 'TRACK', 'UL', - 'VAR', - 'VIDEO', 'WBR', 'XMP', // Deprecated, use PRE instead. ); diff --git a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php index 9dbb689df0329..a66a8a689ced1 100644 --- a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php +++ b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php @@ -72,21 +72,14 @@ public function test_has_element_in_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements - $this->ensure_support_is_added_everywhere( 'MI' ); - $this->ensure_support_is_added_everywhere( 'MO' ); - $this->ensure_support_is_added_everywhere( 'MN' ); - $this->ensure_support_is_added_everywhere( 'MS' ); - $this->ensure_support_is_added_everywhere( 'MTEXT' ); - $this->ensure_support_is_added_everywhere( 'ANNOTATION-XML' ); + // MathML Elements. + $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. */ - $this->ensure_support_is_added_everywhere( 'FOREIGNOBJECT' ); - $this->ensure_support_is_added_everywhere( 'DESC' ); - $this->ensure_support_is_added_everywhere( 'TITLE' ); + $this->ensure_support_is_added_everywhere( 'SVG' ); } /** @@ -115,21 +108,14 @@ public function test_has_element_in_list_item_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements - $this->ensure_support_is_added_everywhere( 'MI' ); - $this->ensure_support_is_added_everywhere( 'MO' ); - $this->ensure_support_is_added_everywhere( 'MN' ); - $this->ensure_support_is_added_everywhere( 'MS' ); - $this->ensure_support_is_added_everywhere( 'MTEXT' ); - $this->ensure_support_is_added_everywhere( 'ANNOTATION-XML' ); + // MathML Elements. + $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. */ - $this->ensure_support_is_added_everywhere( 'FOREIGNOBJECT' ); - $this->ensure_support_is_added_everywhere( 'DESC' ); - $this->ensure_support_is_added_everywhere( 'TITLE' ); + $this->ensure_support_is_added_everywhere( 'SVG' ); // These elements are specific to list item scope. $this->ensure_support_is_added_everywhere( 'OL' ); @@ -161,21 +147,14 @@ public function test_has_element_in_button_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements - $this->ensure_support_is_added_everywhere( 'MI' ); - $this->ensure_support_is_added_everywhere( 'MO' ); - $this->ensure_support_is_added_everywhere( 'MN' ); - $this->ensure_support_is_added_everywhere( 'MS' ); - $this->ensure_support_is_added_everywhere( 'MTEXT' ); - $this->ensure_support_is_added_everywhere( 'ANNOTATION-XML' ); + // MathML Elements. + $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. */ - $this->ensure_support_is_added_everywhere( 'FOREIGNOBJECT' ); - $this->ensure_support_is_added_everywhere( 'DESC' ); - $this->ensure_support_is_added_everywhere( 'TITLE' ); + $this->ensure_support_is_added_everywhere( 'SVG' ); } /** @@ -201,21 +180,14 @@ public function test_after_element_pop_must_maintain_p_in_button_scope_flag() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements - $this->ensure_support_is_added_everywhere( 'MI' ); - $this->ensure_support_is_added_everywhere( 'MO' ); - $this->ensure_support_is_added_everywhere( 'MN' ); - $this->ensure_support_is_added_everywhere( 'MS' ); - $this->ensure_support_is_added_everywhere( 'MTEXT' ); - $this->ensure_support_is_added_everywhere( 'ANNOTATION-XML' ); + // MathML Elements. + $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. */ - $this->ensure_support_is_added_everywhere( 'FOREIGNOBJECT' ); - $this->ensure_support_is_added_everywhere( 'DESC' ); - $this->ensure_support_is_added_everywhere( 'TITLE' ); + $this->ensure_support_is_added_everywhere( 'SVG' ); } /** @@ -241,21 +213,14 @@ public function test_after_element_push_must_maintain_p_in_button_scope_flag() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements - $this->ensure_support_is_added_everywhere( 'MI' ); - $this->ensure_support_is_added_everywhere( 'MO' ); - $this->ensure_support_is_added_everywhere( 'MN' ); - $this->ensure_support_is_added_everywhere( 'MS' ); - $this->ensure_support_is_added_everywhere( 'MTEXT' ); - $this->ensure_support_is_added_everywhere( 'ANNOTATION-XML' ); + // MathML Elements. + $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. */ - $this->ensure_support_is_added_everywhere( 'FOREIGNOBJECT' ); - $this->ensure_support_is_added_everywhere( 'DESC' ); - $this->ensure_support_is_added_everywhere( 'TITLE' ); + $this->ensure_support_is_added_everywhere( 'SVG' ); } /** @@ -280,21 +245,14 @@ public function test_has_element_in_table_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements - $this->ensure_support_is_added_everywhere( 'MI' ); - $this->ensure_support_is_added_everywhere( 'MO' ); - $this->ensure_support_is_added_everywhere( 'MN' ); - $this->ensure_support_is_added_everywhere( 'MS' ); - $this->ensure_support_is_added_everywhere( 'MTEXT' ); - $this->ensure_support_is_added_everywhere( 'ANNOTATION-XML' ); + // MathML Elements. + $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. */ - $this->ensure_support_is_added_everywhere( 'FOREIGNOBJECT' ); - $this->ensure_support_is_added_everywhere( 'DESC' ); - $this->ensure_support_is_added_everywhere( 'TITLE' ); + $this->ensure_support_is_added_everywhere( 'SVG' ); // These elements are specific to TABLE scope. $this->ensure_support_is_added_everywhere( 'HTML' ); @@ -335,21 +293,14 @@ public function test_has_element_in_select_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements - $this->ensure_support_is_added_everywhere( 'MI' ); - $this->ensure_support_is_added_everywhere( 'MO' ); - $this->ensure_support_is_added_everywhere( 'MN' ); - $this->ensure_support_is_added_everywhere( 'MS' ); - $this->ensure_support_is_added_everywhere( 'MTEXT' ); - $this->ensure_support_is_added_everywhere( 'ANNOTATION-XML' ); + // MathML Elements. + $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. */ - $this->ensure_support_is_added_everywhere( 'FOREIGNOBJECT' ); - $this->ensure_support_is_added_everywhere( 'DESC' ); - $this->ensure_support_is_added_everywhere( 'TITLE' ); + $this->ensure_support_is_added_everywhere( 'SVG' ); // These elements are specific to SELECT scope. $this->ensure_support_is_added_everywhere( 'OPTGROUP' ); From 7e8eb4cfb8b18cf4fe7753d5cef8742a41925df2 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 19 Dec 2023 14:49:49 -0600 Subject: [PATCH 09/18] WPCS --- phpcs.xml.dist | 11 ++++++++++- .../tests/html-api/wpHtmlProcessorBreadcrumbs.php | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/phpcs.xml.dist b/phpcs.xml.dist index 3defbc290a6b2..b177cf1f7a990 100644 --- a/phpcs.xml.dist +++ b/phpcs.xml.dist @@ -224,7 +224,7 @@ ############################################################################# SELECTIVE EXCLUSIONS Exclude specific files for specific sniffs and/or exclude sub-groups in sniffs. - + These exclusions are listed ordered by alphabetic sniff name. ############################################################################# --> @@ -250,6 +250,15 @@ /wp-tests-config-sample\.php + + + /wp-includes/html-api/class-wp-html-processor\.php + + diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 9a9936a599118..b584740eefeab 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -90,7 +90,7 @@ public function data_single_tag_of_supported_elements() { 'METER', 'MULTICOL', // Deprecated 'NAV', - 'NEXTID',// Deprecated + 'NEXTID', // Deprecated 'OUTPUT', 'P', 'PICTURE', From ccc297d9053c4800a5e3d88e2e0e1a089ad0c3a2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Dec 2023 18:10:50 +0100 Subject: [PATCH 10/18] Better data source descriptions --- .../tests/html-api/wpHtmlProcessor.php | 126 +++++++++--------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 2dbd76b802959..906565e4239cd 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -156,69 +156,69 @@ public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) { */ public function data_unsupported_special_in_body_tags() { return array( - array( 'APPLET' ), - array( 'AREA' ), - array( 'BASE' ), - array( 'BASEFONT' ), - array( 'BGSOUND' ), - array( 'BODY' ), - array( 'BR' ), - array( 'CAPTION' ), - array( 'COL' ), - array( 'COLGROUP' ), - array( 'DD' ), - array( 'DT' ), - array( 'EMBED' ), - array( 'FORM' ), - array( 'FRAME' ), - array( 'FRAMESET' ), - array( 'HEAD' ), - array( 'HR' ), - array( 'HTML' ), - array( 'IFRAME' ), - array( 'INPUT' ), - array( 'KEYGEN' ), - array( 'LI' ), - array( 'LINK' ), - array( 'LISTING' ), - array( 'MARQUEE' ), - array( 'MATH' ), - array( 'META' ), - array( 'NOBR' ), - array( 'NOEMBED' ), - array( 'NOFRAMES' ), - array( 'NOSCRIPT' ), - array( 'OBJECT' ), - array( 'OL' ), - array( 'OPTGROUP' ), - array( 'OPTION' ), - array( 'PARAM' ), - array( 'PLAINTEXT' ), - array( 'PRE' ), - array( 'RB' ), - array( 'RP' ), - array( 'RT' ), - array( 'RTC' ), - array( 'SARCASM' ), - array( 'SCRIPT' ), - array( 'SELECT' ), - array( 'SOURCE' ), - array( 'STYLE' ), - array( 'SVG' ), - array( 'TABLE' ), - array( 'TBODY' ), - array( 'TD' ), - array( 'TEMPLATE' ), - array( 'TEXTAREA' ), - array( 'TFOOT' ), - array( 'TH' ), - array( 'THEAD' ), - array( 'TITLE' ), - array( 'TR' ), - array( 'TRACK' ), - array( 'UL' ), - array( 'WBR' ), - array( 'XMP' ), + 'APPLET' => array( 'APPLET' ), + 'AREA' => array( 'AREA' ), + 'BASE' => array( 'BASE' ), + 'BASEFONT' => array( 'BASEFONT' ), + 'BGSOUND' => array( 'BGSOUND' ), + 'BODY' => array( 'BODY' ), + 'BR' => array( 'BR' ), + 'CAPTION' => array( 'CAPTION' ), + 'COL' => array( 'COL' ), + 'COLGROUP' => array( 'COLGROUP' ), + 'DD' => array( 'DD' ), + 'DT' => array( 'DT' ), + 'EMBED' => array( 'EMBED' ), + 'FORM' => array( 'FORM' ), + 'FRAME' => array( 'FRAME' ), + 'FRAMESET' => array( 'FRAMESET' ), + 'HEAD' => array( 'HEAD' ), + 'HR' => array( 'HR' ), + 'HTML' => array( 'HTML' ), + 'IFRAME' => array( 'IFRAME' ), + 'INPUT' => array( 'INPUT' ), + 'KEYGEN' => array( 'KEYGEN' ), + 'LI' => array( 'LI' ), + 'LINK' => array( 'LINK' ), + 'LISTING' => array( 'LISTING' ), + 'MARQUEE' => array( 'MARQUEE' ), + 'MATH' => array( 'MATH' ), + 'META' => array( 'META' ), + 'NOBR' => array( 'NOBR' ), + 'NOEMBED' => array( 'NOEMBED' ), + 'NOFRAMES' => array( 'NOFRAMES' ), + 'NOSCRIPT' => array( 'NOSCRIPT' ), + 'OBJECT' => array( 'OBJECT' ), + 'OL' => array( 'OL' ), + 'OPTGROUP' => array( 'OPTGROUP' ), + 'OPTION' => array( 'OPTION' ), + 'PARAM' => array( 'PARAM' ), + 'PLAINTEXT' => array( 'PLAINTEXT' ), + 'PRE' => array( 'PRE' ), + 'RB' => array( 'RB' ), + 'RP' => array( 'RP' ), + 'RT' => array( 'RT' ), + 'RTC' => array( 'RTC' ), + 'SARCASM' => array( 'SARCASM' ), + 'SCRIPT' => array( 'SCRIPT' ), + 'SELECT' => array( 'SELECT' ), + 'SOURCE' => array( 'SOURCE' ), + 'STYLE' => array( 'STYLE' ), + 'SVG' => array( 'SVG' ), + 'TABLE' => array( 'TABLE' ), + 'TBODY' => array( 'TBODY' ), + 'TD' => array( 'TD' ), + 'TEMPLATE' => array( 'TEMPLATE' ), + 'TEXTAREA' => array( 'TEXTAREA' ), + 'TFOOT' => array( 'TFOOT' ), + 'TH' => array( 'TH' ), + 'THEAD' => array( 'THEAD' ), + 'TITLE' => array( 'TITLE' ), + 'TR' => array( 'TR' ), + 'TRACK' => array( 'TRACK' ), + 'UL' => array( 'UL' ), + 'WBR' => array( 'WBR' ), + 'XMP' => array( 'XMP' ), ); } } From b20806af5958309892584b389075e3816c4529c3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Dec 2023 18:13:56 +0100 Subject: [PATCH 11/18] Close tags so processing doesn't pause --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 906565e4239cd..2e70bee2196aa 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -144,7 +144,7 @@ public function test_fails_to_reconstruct_formatting_elements() { * @covers WP_HTML_Processor::step_in_body */ public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) { - $fragment = WP_HTML_Processor::create_fragment( '<' . $tag_name . '>' ); + $fragment = WP_HTML_Processor::create_fragment( '<' . $tag_name . '>' ); $this->assertFalse( $fragment->next_tag(), 'Should fail to find tag: ' . $tag_name . '.' ); $this->assertEquals( $fragment->get_last_error(), WP_HTML_Processor::ERROR_UNSUPPORTED, 'Should have unsupported last error.' ); } From 556f3cf8a012ad789728045ccb44cc398244ae43 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 18 Dec 2023 15:57:43 -0600 Subject: [PATCH 12/18] Fix bugs uncovered by tests --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4a3ccb94acf14..70448345d9aa0 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1387,7 +1387,7 @@ private function run_adoption_agency_algorithm() { // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) { - $this->state->active_formatting_elements->remove_node( $formatting_element->bookmark_name ); + $this->state->active_formatting_elements->remove_node( $formatting_element ); return; } From 2cab6cf3250f284c3acec22f4330a41f98416eca Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 22 Dec 2023 15:01:36 -0600 Subject: [PATCH 13/18] Refactor closing tag loop, eliminate goto --- phpcs.xml.dist | 9 ---- .../html-api/class-wp-html-processor.php | 47 +++++++++---------- 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/phpcs.xml.dist b/phpcs.xml.dist index b177cf1f7a990..fabbe2ef7d520 100644 --- a/phpcs.xml.dist +++ b/phpcs.xml.dist @@ -250,15 +250,6 @@ /wp-tests-config-sample\.php - - - /wp-includes/html-api/class-wp-html-processor\.php - - diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 70448345d9aa0..fcada612d4166 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -953,37 +953,34 @@ private function step_in_body() { return true; } else { // > Any other end tag - $node = $this->state->stack_of_open_elements->current_node(); - in_body_any_other_end_tag_loop: - if ( $tag_name === $node->node_name ) { - $this->generate_implied_end_tags( $tag_name ); - if ( $node !== $this->state->stack_of_open_elements->current_node() ) { - // @todo Record parse error: this error doesn't impact parsing. - } - $pop_count = 0; - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - ++$pop_count; - if ( $node === $item ) { - break; - } + /* + * Find the corresponding tag opener in the stack of open elements, if + * it exists before reaching a special element, which provides a kind + * of boundary in the stack. For example, a `` should not + * close anything beyond its containing `P` or `DIV` element. + */ + foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + if ( $tag_name === $node->node_name ) { + break; } - while ( $pop_count-- > 0 ) { - $this->state->stack_of_open_elements->pop(); + + if ( self::is_special( $node->node_name ) ) { + // This is a parse error, ignore the token. + return $this->step(); } - return true; - } elseif ( self::is_special( $node->node_name ) ) { - // This is a parse error, ignore the token. - return $this->step(); } - $one_shot = false; + + $this->generate_implied_end_tags( $tag_name ); + if ( $node !== $this->state->stack_of_open_elements->current_node() ) { + // @todo Record parse error: this error doesn't impact parsing. + } + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - if ( $one_shot ) { - $node = $item; - goto in_body_any_other_end_tag_loop; + $this->state->stack_of_open_elements->pop(); + if ( $node === $item ) { + return true; } - - $one_shot = true; } } From 98c46e9aad0201e3d8b9c9d720972b68e566ffa1 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 22 Dec 2023 15:08:42 -0600 Subject: [PATCH 14/18] Undo whitespace change in phpcs rules --- phpcs.xml.dist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phpcs.xml.dist b/phpcs.xml.dist index fabbe2ef7d520..3defbc290a6b2 100644 --- a/phpcs.xml.dist +++ b/phpcs.xml.dist @@ -224,7 +224,7 @@ ############################################################################# SELECTIVE EXCLUSIONS Exclude specific files for specific sniffs and/or exclude sub-groups in sniffs. - + These exclusions are listed ordered by alphabetic sniff name. ############################################################################# --> From 620bdec71e9b5774c8c154eb5fb916481d4f6c07 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 22 Dec 2023 15:16:43 -0600 Subject: [PATCH 15/18] Update comments, remove redundant code. --- .../html-api/class-wp-html-processor.php | 55 +++++-------------- 1 file changed, 13 insertions(+), 42 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index fcada612d4166..72a3c30fb0794 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -830,51 +830,21 @@ private function step_in_body() { $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); return true; - - /* - * > Any other start tag - */ - case '+SPAN': - $this->reconstruct_active_formatting_elements(); - $this->insert_html_element( $this->state->current_token ); - return true; - - /* - * Any other end tag - */ - case '-SPAN': - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - // > If node is an HTML element with the same tag name as the token, then: - if ( $item->node_name === $tag_name ) { - $this->generate_implied_end_tags( $tag_name ); - - // > If node is not the current node, then this is a parse error. - - $this->state->stack_of_open_elements->pop_until( $tag_name ); - return true; - } - - // > Otherwise, if node is in the special category, then this is a parse error; ignore the token, and return. - if ( self::is_special( $item->node_name ) ) { - return $this->step(); - } - } - // Execution should not reach here; if it does then something went wrong. - return false; - } /* * These tags require special handling in the 'in body' insertion mode - * that has not been implemented yet. + * but that handling hasn't yet been implemented. * - * As they're implemented, they should be removed from this list. An accompanying - * test should help ensure this list is maintained. + * As the rules for each tag are implemented, the corresponding tag + * name should be removed from this list. An accompanying test should + * help ensure this list is maintained. * * @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags * - * We throw the WP_HTML_Unsupported_Exception so we're free to implememnt - * the catch-all handling for any other start and end tag. + * Since this switch structure throws a WP_HTML_Unsupported_Exception, it's + * possible to handle "any other start tag" and "any other end tag" below, + * as that guarantees execution doesn't proceed for the unimplemented tags. * * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody */ @@ -947,12 +917,16 @@ private function step_in_body() { } if ( ! $this->is_tag_closer() ) { - // > Any other start tag. + /* + * > Any other start tag + */ $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); return true; } else { - // > Any other end tag + /* + * > Any other end tag + */ /* * Find the corresponding tag opener in the stack of open elements, if @@ -983,9 +957,6 @@ private function step_in_body() { } } } - - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); } /* From a98f30bb943cbefe568fc654b3697d882a29581b Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 22 Dec 2023 15:55:37 -0600 Subject: [PATCH 16/18] Add newly-supported elements to docblock, fix typo in tests. --- src/wp-includes/html-api/class-wp-html-processor.php | 9 ++++++--- .../tests/html-api/wpHtmlProcessorBreadcrumbs.php | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 72a3c30fb0794..3263ffe1dc90a 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -100,15 +100,18 @@ * The following list specifies the HTML tags that _are_ supported: * * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. - * - Form elements: BUTTON, FIELDSET, SEARCH. + * - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH. * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. * - Links: A. * - Lists: DL. - * - Media elements: FIGCAPTION, FIGURE, IMG. + * - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO. * - Paragraph: P. + * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION - * - Deprecated elements: CENTER, DIR + * - Templating elements: SLOT + * - Text decoration: RUBY + * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER * * ### Supported markup * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index b584740eefeab..3b339e4f82ee9 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -55,7 +55,7 @@ public function data_single_tag_of_supported_elements() { 'CODE', 'DATA', 'DATALIST', - 'DEFN', + 'DFN', 'DEL', 'DETAILS', 'DIALOG', From 9f9ce0d7e960e777f761cff7bd71944d2eb44806 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 22 Dec 2023 15:57:26 -0600 Subject: [PATCH 17/18] fixup! Add newly-supported elements to docblock, fix typo in tests. --- src/wp-includes/html-api/class-wp-html-processor.php | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 3263ffe1dc90a..41823af00ff93 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -100,6 +100,7 @@ * The following list specifies the HTML tags that _are_ supported: * * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. + * - Custom elements: All custom elements are supported. :) * - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH. * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. @@ -107,11 +108,11 @@ * - Lists: DL. * - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO. * - Paragraph: P. - * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR - * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION - * - Templating elements: SLOT - * - Text decoration: RUBY - * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER + * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR. + * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION. + * - Templating elements: SLOT. + * - Text decoration: RUBY. + * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER. * * ### Supported markup * From 3f9670a5573a53334cfec7803c1839a1c22621b5 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 22 Dec 2023 19:40:41 -0600 Subject: [PATCH 18/18] Update docblock lints and re-add MathML and SVG comments. --- .../tests/html-api/wpHtmlProcessor.php | 6 ++-- .../wpHtmlSupportRequiredOpenElements.php | 28 ++++++++++++++----- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 2e70bee2196aa..2e5565c9734fa 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -137,11 +137,13 @@ public function test_fails_to_reconstruct_formatting_elements() { * as handling is implemented. Otherwise there's risk of leaving special * handling (that is never reached) when tag handling is implemented. * - * @dataProvider data_unsupported_special_in_body_tags + * @ticket 60092 * - * @param string $tag_name Name of the tag to test. + * @dataProvider data_unsupported_special_in_body_tags * * @covers WP_HTML_Processor::step_in_body + * + * @param string $tag_name Name of the tag to test. */ public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) { $fragment = WP_HTML_Processor::create_fragment( '<' . $tag_name . '>' ); diff --git a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php index a66a8a689ced1..a0c9c600c1e45 100644 --- a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php +++ b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php @@ -72,12 +72,14 @@ public function test_has_element_in_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements. + // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. + * + * FOREIGNOBJECT, DESC, TITLE. */ $this->ensure_support_is_added_everywhere( 'SVG' ); } @@ -108,12 +110,14 @@ public function test_has_element_in_list_item_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements. + // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. + * + * FOREIGNOBJECT, DESC, TITLE. */ $this->ensure_support_is_added_everywhere( 'SVG' ); @@ -147,12 +151,14 @@ public function test_has_element_in_button_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements. + // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. + * + * FOREIGNOBJECT, DESC, TITLE. */ $this->ensure_support_is_added_everywhere( 'SVG' ); } @@ -180,12 +186,14 @@ public function test_after_element_pop_must_maintain_p_in_button_scope_flag() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements. + // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. + * + * FOREIGNOBJECT, DESC, TITLE. */ $this->ensure_support_is_added_everywhere( 'SVG' ); } @@ -213,12 +221,14 @@ public function test_after_element_push_must_maintain_p_in_button_scope_flag() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements. + // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. + * + * FOREIGNOBJECT, DESC, TITLE. */ $this->ensure_support_is_added_everywhere( 'SVG' ); } @@ -245,12 +255,14 @@ public function test_has_element_in_table_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements. + // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. + * + * FOREIGNOBJECT, DESC, TITLE. */ $this->ensure_support_is_added_everywhere( 'SVG' ); @@ -293,12 +305,14 @@ public function test_has_element_in_select_scope_needs_support() { $this->ensure_support_is_added_everywhere( 'OBJECT' ); $this->ensure_support_is_added_everywhere( 'TEMPLATE' ); - // MathML Elements. + // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. $this->ensure_support_is_added_everywhere( 'MATH' ); /* * SVG elements: note that TITLE is both an HTML element and an SVG element * so care must be taken when adding support for either one. + * + * FOREIGNOBJECT, DESC, TITLE. */ $this->ensure_support_is_added_everywhere( 'SVG' );