From 9e752e5810bc4947774363059b0b8c0c442230ee Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 14 Dec 2022 16:50:11 -0700 Subject: [PATCH] Make things worse, but lead towards making them better; GOTO --- .../html/class-wp-html-attribute-sourcer.php | 138 ++++++++++++++++-- .../html/class-wp-html-processor.php | 34 +++-- .../html/wp-html-attribute-sourcer-test.php | 20 +-- phpunit/html/wp-html-processor-test.php | 15 +- 4 files changed, 165 insertions(+), 42 deletions(-) diff --git a/lib/experimental/html/class-wp-html-attribute-sourcer.php b/lib/experimental/html/class-wp-html-attribute-sourcer.php index c2b3a182f1fee4..697228df04b34a 100644 --- a/lib/experimental/html/class-wp-html-attribute-sourcer.php +++ b/lib/experimental/html/class-wp-html-attribute-sourcer.php @@ -128,37 +128,149 @@ public function source_attributes() { ); } + public static function select_match( $tags, $s ) { + if ( ! empty( $s['tag_name'] ) && strtoupper( $s['tag_name'] ) !== $tags->get_tag() ) { + return null; + } + + if ( ! empty( $s['class_names'] ) ) { + $classes = $tags->get_attribute( 'class' ); + if ( null === $classes ) { + return null; + } + + foreach ( $s['class_names'] as $class_name ) { + if ( ! preg_match( "~\b{$class_name}\b~", $classes ) ) { + return null; + } + } + } + + if ( isset( $s['hash'] ) && $s['identifier'] !== $tags->get_attribute( 'id' ) ) { + return null; + } + + if ( isset( $s['has_attribute'] ) && null === $tags->get_attribute( $s['has_attribute'] ) ) { + return null; + } + + return $tags; + } + public static function select( $selectors, $html ) { $tags = new WP_HTML_Processor( $html ); + if ( ! $tags->next_tag() ) { + return null; + } + + $tags->set_bookmark( 'start' ); - while ( $tags->next_tag() ) { - foreach ( $selectors as $s ) { - if ( ! empty( $s['tag_name'] ) && strtoupper( $s['tag_name'] ) !== $tags->get_tag() ) { + foreach ( $selectors as $s ) { + $tags->seek( 'start' ); + $max = 100; + while ( --$max > 0 ) { + $next = $s; + + // This label is probably where some stack-level data should reside. + next: + // Find the next starting point + while ( null === self::select_match( $tags, $next ) && $tags->next_tag() ) { continue; } - if ( ! empty( $s['class_names'] ) ) { - $classes = $tags->get_attribute( 'class' ); - if ( null === $classes ) { + // We're out of possible starting points + if ( null === self::select_match( $tags, $next ) ) { + continue 2; + } + + // No further selectors, then bingo! + if ( ! isset( $next['then'] ) ) { + return $tags; + } + + $next = $next['then']; + + // Adjacent sibling must be the immediately-following element. + if ( '+' === $next['combinator'] ) { + var_dump( [ + 'msg' => "Processing adjacent sibling", + 'html' => $html, + 'tag' => $tags->get_tag(), + 'selector' => $next + ] ); + $state = []; + while ( $tags->next_within_balanced_tags( $state ) ) { continue; } - foreach ( $s['class_names'] as $class_name ) { - if ( ! preg_match( "~\b{$class_name}\b~", $classes ) ) { - continue 2; - } + $tags->next_tag(); + if ( null === self::select_match( $tags, $next ) ) { + continue; } + + if ( isset( $next['then'] ) ) { + goto next; + } + + // @TODO: Recurse here so we can handle more than one level. + return $tags; } - if ( isset( $s['hash'] ) && $s['identifier'] !== $tags->get_attribute( 'id' ) ) { + // Child must be one level into current tag. + if ( '>' === $next['combinator'] ) { + var_dump( [ + 'msg' => "Processing child", + 'html' => $html, + 'tag' => $tags->get_tag(), + 'selector' => $next + ] ); + $state = []; + while ( $tags->next_within_balanced_tags( $state, null, 1 ) ) { + if ( null === self::select_match( $tags, $next ) ) { + continue; + } + + if ( isset( $next['then'] ) ) { + goto next; + } + + // @TODO: Recurse here so we can handle more than one level. + return $tags; + } + continue; } - if ( isset( $s['has_attribute'] ) && null === $tags->get_attribute( $s['has_attribute'] ) ) { + // Descendant can be anywhere inside current tag. + if ( ' ' === $next['combinator'] ) { + var_dump( [ + 'msg' => "Processing descendant", + 'html' => $html, + 'tag' => $tags->get_tag(), + 'selector' => $next + ] ); + $state = []; + while ( $tags->next_within_balanced_tags( $state ) ) { + if ( null === self::select_match( $tags, $next ) ) { + continue; + } + + if ( isset( $next['then'] ) ) { + goto next; + } + + // @TODO: Recurse here so we can handle more than one level. + return $tags; + } + continue; } - return $tags; + // General sibling must be anything at current level. + if ( '~' === $next['combinator'] ) { + // @TODO: Support this. + return null; + } } } diff --git a/lib/experimental/html/class-wp-html-processor.php b/lib/experimental/html/class-wp-html-processor.php index e1caf7d4e7140c..a4179455892e6c 100644 --- a/lib/experimental/html/class-wp-html-processor.php +++ b/lib/experimental/html/class-wp-html-processor.php @@ -17,22 +17,28 @@ */ class WP_HTML_Processor extends WP_HTML_Tag_Processor { - public function next_within_balanced_tags( $query, $max_depth = 1000 ) { - $budget = 1000; - if ( self::is_html_void_element( $this->get_tag() ) ) { - return false; + public function next_within_balanced_tags( &$state, $query = null, $max_depth = 1000 ) { + if ( empty( $state ) ) { + $state['budget'] = 1000; + $state['tag_name'] = $this->get_tag(); + $state['balanced_depth'] = 1; + $state['depth'] = 1; + + if ( self::is_html_void_element( $this->get_tag() ) ) { + return false; + } } - $tag_name = $this->get_tag(); - $balanced_depth = 1; - $depth = 1; - - while ( $this->next_tag( array( 'tag_closers' => 'visit' ) ) && $budget-- > 0 ) { - if ( $this->get_tag() === $tag_name && $this->is_tag_closer() && $balanced_depth === 1 ) { + while ( $this->next_tag( array( 'tag_closers' => 'visit' ) ) && $state['budget']-- > 0 ) { + if ( + $this->get_tag() === $state['tag_name'] && + $this->is_tag_closer() && + $state['balanced_depth'] === 1 + ) { return false; } - if ( $depth <= $max_depth ) { + if ( $state['depth'] <= $max_depth ) { $this->parse_query( $query ); if ( $this->matches() ) { return true; @@ -40,11 +46,11 @@ public function next_within_balanced_tags( $query, $max_depth = 1000 ) { } if ( ! self::is_html_void_element( $this->get_tag() ) ) { - $depth += $this->is_tag_closer() ? -1 : 1; + $state['depth'] += $this->is_tag_closer() ? -1 : 1; } - if ( $this->get_tag() === $tag_name ) { - $balanced_depth += $this->is_tag_closer() ? -1 : 1; + if ( $this->get_tag() === $state['tag_name'] ) { + $state['balanced_depth'] += $this->is_tag_closer() ? -1 : 1; } } diff --git a/phpunit/html/wp-html-attribute-sourcer-test.php b/phpunit/html/wp-html-attribute-sourcer-test.php index ca61c94f27acd5..a402956a09588f 100644 --- a/phpunit/html/wp-html-attribute-sourcer-test.php +++ b/phpunit/html/wp-html-attribute-sourcer-test.php @@ -26,16 +26,16 @@ public function data_sourced_attributes() { array( array( 'attributes' => array( 'link' => 'docs.html' ), 'unparsed' => array() ), << -
Just another section
-
blah
-

Stuff

-
blarg
-
Still another section
-
image
-
Still another section
-
docs
- +
+
Just another section
+
blah
+

Stuff

+
blarg
+
Still another section
+
image
+
Still another section
+
docs
+
EOF, array( 'link' => array( diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index 3a0e1521937f9b..027a314cd3325e 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -19,30 +19,35 @@ public function test_find_descendant_tag() { $tags = new WP_HTML_Processor( '
outside
inside
' ); $tags->next_tag( 'div' ); - $this->assertFalse( $tags->next_within_balanced_tags( 'img' ) ); + $state = []; + $this->assertFalse( $tags->next_within_balanced_tags( $state, 'img' ) ); $this->assertTrue( $tags->next_tag( 'div' ) ); - $this->assertTrue( $tags->next_within_balanced_tags( 'img' ) ); + $state = []; + $this->assertTrue( $tags->next_within_balanced_tags( $state, 'img' ) ); } public function test_find_immediate_child_tag() { $tags = new WP_HTML_Processor( '
' ); $tags->next_tag( 'div' ); - $this->assertFalse( $tags->next_within_balanced_tags( 'img', 1 ) ); + $state = []; + $this->assertFalse( $tags->next_within_balanced_tags( $state, 'img', 1 ) ); } public function test_find_immediate_child_tag2() { $tags = new WP_HTML_Processor( '
' ); $tags->next_tag( 'div' ); - $this->assertTrue( $tags->next_within_balanced_tags( 'img', 1 ) ); + $state = []; + $this->assertTrue( $tags->next_within_balanced_tags( $state, 'img', 1 ) ); } public function test_find_child_tag() { $tags = new WP_HTML_Processor( '
' ); $tags->next_tag( 'div' ); - $this->assertTrue( $tags->next_within_balanced_tags( 'img', 3 ) ); + $state = []; + $this->assertTrue( $tags->next_within_balanced_tags( $state, 'img', 3 ) ); } }