From 2098d0c4305c232b2fd64cf317305a8bd2130984 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 11 Apr 2023 23:39:41 +0200 Subject: [PATCH] HTML API: Delete all copies of an attribute when removing and duplicates exist. (#4317) When encountering an HTML tag with duplicate copies of an attribute the tag processor ignores the duplicate values, according to the specification. However, when removing an attribute it must remove all copies of that attribute lest one of the duplicates becomes the primary and it appears as if no attributes were removed. In this patch we're adding tests that will be used to ensure that all attribute copies are removed from a tag when one is requested to be removed. Co-authored-by: Bernie Reiter <96308+ockham@users.noreply.github.com> --- .../html-api/class-wp-html-tag-processor.php | 51 ++++++++++++-- .../tests/html-api/wpHtmlTagProcessor.php | 69 ++++++++++++++++--- 2 files changed, 105 insertions(+), 15 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 79d96dc8be3f1..7aea3d40cda72 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -406,6 +406,16 @@ class WP_HTML_Tag_Processor { */ private $attributes = array(); + /** + * Tracks spans of duplicate attributes on a given tag, used for removing + * all copies of an attribute when calling `remove_attribute()`. + * + * @since 6.3.2 + * + * @var (WP_HTML_Span[])[]|null + */ + private $duplicate_attributes = null; + /** * Which class names to add or remove from a tag. * @@ -1286,6 +1296,25 @@ private function parse_next_attribute() { $attribute_end, ! $has_value ); + + return true; + } + + /* + * Track the duplicate attributes so if we remove it, all disappear together. + * + * While `$this->duplicated_attributes` could always be stored as an `array()`, + * which would simplify the logic here, storing a `null` and only allocating + * an array when encountering duplicates avoids needless allocations in the + * normative case of parsing tags with no duplicate attributes. + */ + $duplicate_span = new WP_HTML_Span( $attribute_start, $attribute_end ); + if ( null === $this->duplicate_attributes ) { + $this->duplicate_attributes = array( $comparable_name => array( $duplicate_span ) ); + } elseif ( ! array_key_exists( $comparable_name, $this->duplicate_attributes ) ) { + $this->duplicate_attributes[ $comparable_name ] = array( $duplicate_span ); + } else { + $this->duplicate_attributes[ $comparable_name ][] = $duplicate_span; } return true; @@ -1307,11 +1336,12 @@ private function skip_whitespace() { */ private function after_tag() { $this->get_updated_html(); - $this->tag_name_starts_at = null; - $this->tag_name_length = null; - $this->tag_ends_at = null; - $this->is_closing_tag = null; - $this->attributes = array(); + $this->tag_name_starts_at = null; + $this->tag_name_length = null; + $this->tag_ends_at = null; + $this->is_closing_tag = null; + $this->attributes = array(); + $this->duplicate_attributes = null; } /** @@ -2080,6 +2110,17 @@ public function remove_attribute( $name ) { '' ); + // Removes any duplicated attributes if they were also present. + if ( null !== $this->duplicate_attributes && array_key_exists( $name, $this->duplicate_attributes ) ) { + foreach ( $this->duplicate_attributes[ $name ] as $attribute_token ) { + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $attribute_token->start, + $attribute_token->end, + '' + ); + } + } + return true; } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 31430d07a9e02..7111571b1f113 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -1049,15 +1049,9 @@ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the * Removing an attribute that's listed many times, e.g. `
` should remove * all its instances and output just `
`. * - * Today, however, WP_HTML_Tag_Processor only removes the first such attribute. It seems like a corner case - * and introducing additional complexity to correctly handle this scenario doesn't seem to be worth it. - * Let's revisit if and when this becomes a problem. + * @since 6.3.2 Removes all duplicated attributes as expected. * - * This test is in place to confirm this behavior, which while incorrect, is well-defined. - * A later fix introduced to the Tag Processor should update this test to reflect the - * wanted and correct behavior. - * - * @ticket 56299 + * @ticket 58119 * * @covers WP_HTML_Tag_Processor::remove_attribute */ @@ -1066,8 +1060,8 @@ public function test_remove_first_when_duplicated_attribute() { $p->next_tag(); $p->remove_attribute( 'id' ); - $this->assertSame( - '
Text
', + $this->assertStringNotContainsString( + 'update-me', $p->get_updated_html(), 'First attribute (when duplicates exist) was not removed' ); @@ -1090,6 +1084,61 @@ public function test_remove_attribute_with_an_existing_attribute_name_removes_it ); } + /** + * @ticket 58119 + * + * @since 6.3.2 Removes all duplicated attributes as expected. + * + * @covers WP_HTML_Tag_Processor::remove_attribute + * + * @dataProvider data_html_with_duplicated_attributes + */ + public function test_remove_attribute_with_duplicated_attributes_removes_all_of_them( $html_with_duplicate_attributes, $attribute_to_remove ) { + $p = new WP_HTML_Tag_Processor( $html_with_duplicate_attributes ); + $p->next_tag(); + + $p->remove_attribute( $attribute_to_remove ); + $this->assertNull( $p->get_attribute( $attribute_to_remove ), 'Failed to remove all copies of an attribute when duplicated in modified source.' ); + + // Recreate a tag processor with the updated HTML after removing the attribute. + $p = new WP_HTML_Tag_Processor( $p->get_updated_html() ); + $p->next_tag(); + $this->assertNull( $p->get_attribute( $attribute_to_remove ), 'Failed to remove all copies of duplicated attributes when getting updated HTML.' ); + } + + /** + * @ticket 58119 + * + * @since 6.3.2 Removes all duplicated attributes as expected. + * + * @covers WP_HTML_Tag_Processor::remove_attribute + */ + public function test_previous_duplicated_attributes_are_not_removed_on_successive_tag_removal() { + $p = new WP_HTML_Tag_Processor( '' ); + $p->next_tag(); + $p->next_tag(); + $p->remove_attribute( 'id' ); + + $this->assertSame( '', $p->get_updated_html() ); + } + + /** + * Data provider. + * + * @ticket 58119 + * + * @return array[]. + */ + public function data_html_with_duplicated_attributes() { + return array( + 'Double attributes' => array( '
', 'id' ), + 'Triple attributes' => array( '
', 'id' ), + 'Duplicates around another' => array( 'kites flying in the wind', 'src' ), + 'Case-variants of attribute' => array( '