Skip to content

Commit

Permalink
Make things worse, but lead towards making them better; GOTO
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Dec 14, 2022
1 parent 68d72e8 commit 9e752e5
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 42 deletions.
138 changes: 125 additions & 13 deletions lib/experimental/html/class-wp-html-attribute-sourcer.php
Original file line number Diff line number Diff line change
Expand Up @@ -128,37 +128,149 @@ public function source_attributes() {
);
}

public static function select_match( $tags, $s ) {
if ( ! empty( $s['tag_name'] ) && strtoupper( $s['tag_name'] ) !== $tags->get_tag() ) {
return null;
}

if ( ! empty( $s['class_names'] ) ) {
$classes = $tags->get_attribute( 'class' );
if ( null === $classes ) {
return null;
}

foreach ( $s['class_names'] as $class_name ) {
if ( ! preg_match( "~\b{$class_name}\b~", $classes ) ) {
return null;
}
}
}

if ( isset( $s['hash'] ) && $s['identifier'] !== $tags->get_attribute( 'id' ) ) {
return null;
}

if ( isset( $s['has_attribute'] ) && null === $tags->get_attribute( $s['has_attribute'] ) ) {
return null;
}

return $tags;
}

public static function select( $selectors, $html ) {
$tags = new WP_HTML_Processor( $html );
if ( ! $tags->next_tag() ) {
return null;
}

$tags->set_bookmark( 'start' );

while ( $tags->next_tag() ) {
foreach ( $selectors as $s ) {
if ( ! empty( $s['tag_name'] ) && strtoupper( $s['tag_name'] ) !== $tags->get_tag() ) {
foreach ( $selectors as $s ) {
$tags->seek( 'start' );
$max = 100;
while ( --$max > 0 ) {
$next = $s;

// This label is probably where some stack-level data should reside.
next:
// Find the next starting point
while ( null === self::select_match( $tags, $next ) && $tags->next_tag() ) {
continue;
}

if ( ! empty( $s['class_names'] ) ) {
$classes = $tags->get_attribute( 'class' );
if ( null === $classes ) {
// We're out of possible starting points
if ( null === self::select_match( $tags, $next ) ) {
continue 2;
}

// No further selectors, then bingo!
if ( ! isset( $next['then'] ) ) {
return $tags;
}

$next = $next['then'];

// Adjacent sibling must be the immediately-following element.
if ( '+' === $next['combinator'] ) {
var_dump( [
'msg' => "Processing adjacent sibling",
'html' => $html,
'tag' => $tags->get_tag(),
'selector' => $next
] );
$state = [];
while ( $tags->next_within_balanced_tags( $state ) ) {
continue;
}

foreach ( $s['class_names'] as $class_name ) {
if ( ! preg_match( "~\b{$class_name}\b~", $classes ) ) {
continue 2;
}
$tags->next_tag();
if ( null === self::select_match( $tags, $next ) ) {
continue;
}

if ( isset( $next['then'] ) ) {
goto next;
}

// @TODO: Recurse here so we can handle more than one level.
return $tags;
}

if ( isset( $s['hash'] ) && $s['identifier'] !== $tags->get_attribute( 'id' ) ) {
// Child must be one level into current tag.
if ( '>' === $next['combinator'] ) {
var_dump( [
'msg' => "Processing child",
'html' => $html,
'tag' => $tags->get_tag(),
'selector' => $next
] );
$state = [];
while ( $tags->next_within_balanced_tags( $state, null, 1 ) ) {
if ( null === self::select_match( $tags, $next ) ) {
continue;
}

if ( isset( $next['then'] ) ) {
goto next;
}

// @TODO: Recurse here so we can handle more than one level.
return $tags;
}

continue;
}

if ( isset( $s['has_attribute'] ) && null === $tags->get_attribute( $s['has_attribute'] ) ) {
// Descendant can be anywhere inside current tag.
if ( ' ' === $next['combinator'] ) {
var_dump( [
'msg' => "Processing descendant",
'html' => $html,
'tag' => $tags->get_tag(),
'selector' => $next
] );
$state = [];
while ( $tags->next_within_balanced_tags( $state ) ) {
if ( null === self::select_match( $tags, $next ) ) {
continue;
}

if ( isset( $next['then'] ) ) {
goto next;
}

// @TODO: Recurse here so we can handle more than one level.
return $tags;
}

continue;
}

return $tags;
// General sibling must be anything at current level.
if ( '~' === $next['combinator'] ) {
// @TODO: Support this.
return null;
}
}
}

Expand Down
34 changes: 20 additions & 14 deletions lib/experimental/html/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,40 @@
*/

class WP_HTML_Processor extends WP_HTML_Tag_Processor {
public function next_within_balanced_tags( $query, $max_depth = 1000 ) {
$budget = 1000;
if ( self::is_html_void_element( $this->get_tag() ) ) {
return false;
public function next_within_balanced_tags( &$state, $query = null, $max_depth = 1000 ) {
if ( empty( $state ) ) {
$state['budget'] = 1000;
$state['tag_name'] = $this->get_tag();
$state['balanced_depth'] = 1;
$state['depth'] = 1;

if ( self::is_html_void_element( $this->get_tag() ) ) {
return false;
}
}

$tag_name = $this->get_tag();
$balanced_depth = 1;
$depth = 1;

while ( $this->next_tag( array( 'tag_closers' => 'visit' ) ) && $budget-- > 0 ) {
if ( $this->get_tag() === $tag_name && $this->is_tag_closer() && $balanced_depth === 1 ) {
while ( $this->next_tag( array( 'tag_closers' => 'visit' ) ) && $state['budget']-- > 0 ) {
if (
$this->get_tag() === $state['tag_name'] &&
$this->is_tag_closer() &&
$state['balanced_depth'] === 1
) {
return false;
}

if ( $depth <= $max_depth ) {
if ( $state['depth'] <= $max_depth ) {
$this->parse_query( $query );
if ( $this->matches() ) {
return true;
}
}

if ( ! self::is_html_void_element( $this->get_tag() ) ) {
$depth += $this->is_tag_closer() ? -1 : 1;
$state['depth'] += $this->is_tag_closer() ? -1 : 1;
}

if ( $this->get_tag() === $tag_name ) {
$balanced_depth += $this->is_tag_closer() ? -1 : 1;
if ( $this->get_tag() === $state['tag_name'] ) {
$state['balanced_depth'] += $this->is_tag_closer() ? -1 : 1;
}
}

Expand Down
20 changes: 10 additions & 10 deletions phpunit/html/wp-html-attribute-sourcer-test.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ public function data_sourced_attributes() {
array(
array( 'attributes' => array( 'link' => 'docs.html' ), 'unparsed' => array() ),
<<<EOF
<main>
<section>Just another section</section>
<section><div><a href="blah">blah</a></div></section>
<p>Stuff</p>
<div><a href="blarg">blarg</a></div>
<section>Still another section</section>
<div><img><a href="image">image</a></div>
<section>Still another section</section>
<div><a href="docs.html">docs</a></div>
</main>
<main>
<section>Just another section</section>
<section><div><a href="blah">blah</a></div></section>
<p>Stuff</p>
<div><a href="blarg">blarg</a></div>
<section>Still another section</section>
<div><img><a href="image">image</a></div>
<section>Still another section</section>
<div><a href="docs.html">docs</a></div>
</main>
EOF,
array(
'link' => array(
Expand Down
15 changes: 10 additions & 5 deletions phpunit/html/wp-html-processor-test.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,35 @@ public function test_find_descendant_tag() {
$tags = new WP_HTML_Processor( '<div>outside</div><section><div><img>inside</div></section>' );

$tags->next_tag( 'div' );
$this->assertFalse( $tags->next_within_balanced_tags( 'img' ) );
$state = [];
$this->assertFalse( $tags->next_within_balanced_tags( $state, 'img' ) );

$this->assertTrue( $tags->next_tag( 'div' ) );
$this->assertTrue( $tags->next_within_balanced_tags( 'img' ) );
$state = [];
$this->assertTrue( $tags->next_within_balanced_tags( $state, 'img' ) );
}

public function test_find_immediate_child_tag() {
$tags = new WP_HTML_Processor( '<div><div><div><img></div></div></div>' );

$tags->next_tag( 'div' );
$this->assertFalse( $tags->next_within_balanced_tags( 'img', 1 ) );
$state = [];
$this->assertFalse( $tags->next_within_balanced_tags( $state, 'img', 1 ) );
}

public function test_find_immediate_child_tag2() {
$tags = new WP_HTML_Processor( '<div><div><div><img></div></div><img></div>' );

$tags->next_tag( 'div' );
$this->assertTrue( $tags->next_within_balanced_tags( 'img', 1 ) );
$state = [];
$this->assertTrue( $tags->next_within_balanced_tags( $state, 'img', 1 ) );
}

public function test_find_child_tag() {
$tags = new WP_HTML_Processor( '<div><div><div><img></div></div></div>' );

$tags->next_tag( 'div' );
$this->assertTrue( $tags->next_within_balanced_tags( 'img', 3 ) );
$state = [];
$this->assertTrue( $tags->next_within_balanced_tags( $state, 'img', 3 ) );
}
}

0 comments on commit 9e752e5

Please sign in to comment.