' );
* false === $p->next_tag();
@@ -418,6 +418,93 @@ public function next_tag( $query = null ) {
return false;
}
+ /**
+ * Returns the raw HTML content inside a matched tag.
+ *
+ * "Markup" differs from inner HTML in that it returns the raw HTML inside the matched tag.
+ * This means that it's possible this returns HTML without matching tags, or with HTML attributes
+ * serialized differently than a DOM API would return.
+ *
+ * Example:
+ *
+ * $processor = WP_HTML_Processor::createFragment( '' );
+ * $processor->next_tag( 'P' );
+ * 'Inside P tags' === $processor->get_inner_markup();
+ *
+ * @since 6.4.0
+ *
+ * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag.
+ *
+ * @return string|null The inner markup if available, else NULL.
+ */
+ public function get_inner_markup() {
+ if ( null === $this->get_tag() ) {
+ return null;
+ }
+
+ $this->set_bookmark( 'start' );
+ $found_tag = $this->step_until_tag_is_closed();
+ $this->set_bookmark( 'end' );
+
+ if ( $found_tag ) {
+ $inner_markup = $this->substr_bookmarks( 'after', 'start', 'before', 'end' );
+ } else {
+ // If there's no closing tag then the inner markup continues to the end of the document.
+ $inner_markup = $this->substr_bookmark( 'after', 'start' );
+ }
+
+ $this->seek( 'start' );
+ $this->release_bookmark( 'start' );
+ $this->release_bookmark( 'end' );
+
+ return $inner_markup;
+ }
+
+ /**
+ * Returns the raw HTML content around a matched tag, including the tag itself.
+ *
+ * "Markup" differs from outer HTML in that it returns the raw HTML inside the matched tag.
+ * This means that it's possible this returns HTML without matching tags, or with HTML attributes
+ * serialized differently than a DOM API would return.
+ *
+ * Example:
+ *
+ * $processor = WP_HTML_Processor::createFragment( '' );
+ * $processor->next_tag( 'P' );
+ * ' Inside P tags' === $processor->get_inner_markup();
+ *
+ * @since 6.4.0
+ *
+ * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag.
+ *
+ * @return string|null The outer markup if available, else NULL.
+ */
+ public function get_outer_markup() {
+ if ( null === $this->get_tag() ) {
+ return null;
+ }
+
+ $this->set_bookmark( 'start' );
+ $start_tag = $this->current_token->node_name;
+ $found_tag = $this->step_until_tag_is_closed();
+ $this->set_bookmark( 'end' );
+
+ if ( $found_tag ) {
+ $did_close = $this->get_tag() === $start_tag && $this->is_tag_closer();
+ $end_position = $did_close ? 'after' : 'before';
+ $outer_markup = $this->substr_bookmarks( 'before', 'start', $end_position, 'end' );
+ } else {
+ // If there's no closing tag then the outer markup continues to the end of the document.
+ $outer_markup = $this->substr_bookmark( 'before', 'start' );
+ }
+
+ $this->seek( 'start' );
+ $this->release_bookmark( 'start' );
+ $this->release_bookmark( 'end' );
+
+ return $outer_markup;
+ }
+
/**
* Steps through the HTML document and stop at the next tag, if any.
*
@@ -438,12 +525,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
$this->state->stack_of_open_elements->pop();
}
- parent::next_tag( self::VISIT_EVERYTHING );
- }
-
- // Finish stepping when there are no more tokens in the document.
- if ( null === $this->get_tag() ) {
- return false;
+ if ( ! parent::next_tag( self::VISIT_EVERYTHING ) ) {
+ return false;
+ }
}
$this->current_token = new WP_HTML_Token(
@@ -474,9 +558,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
/**
* Computes the HTML breadcrumbs for the currently-matched node, if matched.
*
- * Breadcrumbs start at the outer-most parent and descend toward the matched element.
+ * Breadcrumbs start at the outermost parent and descend toward the matched element.
*
- * Example
+ * Example:
*
* $p = WP_HTML_Processor::createFragment( '
' );
* $p->next_tag( 'IMG' );
@@ -723,6 +807,98 @@ private function bookmark_tag() {
return "{$this->bookmark_counter}";
}
+ /**
+ * Steps through the HTML document until the current open tag is closed.
+ *
+ * @since 6.4.0
+ *
+ * @throws Exception When unable to allocate bookmark for internal tracking.
+ *
+ * @return bool|null true if a closing tag was found, false if not, and null if not starting at a matched tag.
+ */
+ private function step_until_tag_is_closed() {
+ if ( null === $this->get_tag() ) {
+ return null;
+ }
+
+ /** @var WP_HTML_Token $start Reference to the opening tag when calling this function. */
+ $start = $this->current_token;
+
+ /** @var bool $keep_searching Whether to continue scanning for a point where the opening tag is closed. */
+ $keep_searching = true;
+
+ /**
+ * Sets a flag indicating that the starting tag has been closed once
+ * it's popped from the stack of open elements. This is a listener function.
+ *
+ * @since 6.4.0
+ *
+ * @see WP_HTML_Open_Elements::with_pop_listener()
+ *
+ * @param WP_HTML_Token $node Node that was popped.
+ */
+ $tag_is_closed = function ( $node ) use ( &$keep_searching, $start ) {
+ if ( $node === $start ) {
+ $keep_searching = false;
+ }
+ };
+
+ /*
+ * Normally, when stepping into each new elements, it would be required to walk up the
+ * stack of open elements and look to see if the starting tag is still open, if it's
+ * on the stack. By listening for elements that are popped from the stack, however, it's
+ * possible to know if the starting tag has been closed without anything more than a
+ * constant boolean access, as the listener is called for each tag that's closed.
+ *
+ * The use of the `foreach` here creates a context which ensures that the listener is
+ * properly removed and cleaned up without having to manually remove it.
+ */
+ foreach ( $this->state->stack_of_open_elements->with_pop_listener( $tag_is_closed ) as $_ ) {
+ // Find where the tag is closed by stepping forward until it's no longer on the stack of open elements.
+ do {
+ $found_tag = $this->step();
+ } while ( $found_tag && $keep_searching );
+ }
+
+ return $found_tag;
+ }
+
+ /**
+ * Returns a substring of the input HTML document from a bookmark until the end.
+ *
+ * @since 6.4.0
+ *
+ * @param string $start_position "before" to clip before bookmark, "after" to clip after.
+ * @param string $start Bookmark name at which to start clipping.
+ * @return string Clipped substring of input HTMl document.
+ */
+ private function substr_bookmark( $start_position, $start ) {
+ $start_bookmark = $this->bookmarks[ "_{$start}" ];
+ $start_offset = 'before' === $start_position ? $start_bookmark->start : $start_bookmark->end + 1;
+
+ return substr( $this->html, $start_offset );
+ }
+
+ /**
+ * Returns a substring of the input HTML document delimited by bookmarks.
+ *
+ * @since 6.4.0
+ *
+ * @param string $start_position "before" to clip before bookmark, "after" to clip after.
+ * @param string $start Bookmark name at which to start clipping.
+ * @param string $end_position "before" to clip before bookmark, "after" to clip after.
+ * @param string $end Bookmark name at which to end clipping.
+ * @return string Clipped substring of input HTMl document.
+ */
+ private function substr_bookmarks( $start_position, $start, $end_position, $end ) {
+ $start_bookmark = $this->bookmarks[ "_{$start}" ];
+ $end_bookmark = $this->bookmarks[ "_{$end}" ];
+ $start_offset = 'before' === $start_position ? $start_bookmark->start : $start_bookmark->end + 1;
+ $end_offset = 'before' === $end_position ? $end_bookmark->start : $end_bookmark->end + 1;
+
+ return substr( $this->html, $start_offset, $end_offset - $start_offset );
+ }
+
/*
* HTML semantic overrides for Tag Processor
*/
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorGetInnerMarkup.php b/tests/phpunit/tests/html-api/wpHtmlProcessorGetInnerMarkup.php
new file mode 100644
index 0000000000000..e8f67681491bc
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorGetInnerMarkup.php
@@ -0,0 +1,126 @@
+
' );
+
+ $this->assertNull( $p->get_inner_markup() );
+
+ $this->assertFalse( $p->next_tag( 'BUTTON' ), "Should not have found a BUTTON tag but stopped at {$p->get_tag()}." );
+ $this->assertNull( $p->get_inner_markup() );
+ }
+
+ /**
+ * @ticket {TICKET_NUMBER}
+ *
+ * @covers WP_HTML_Processor::get_inner_markup
+ *
+ * @dataProvider data_html_with_inner_markup
+ *
+ * @since 6.4.0
+ *
+ * @param string $html_with_target_node HTML containing a node with the `target` attribute set.
+ * @param string $expected_inner_markup Inner markup of target node.
+ */
+ public function test_returns_appropriate_inner_markup( $html_with_target_node, $expected_inner_markup ) {
+ $p = WP_HTML_Processor::createFragment( $html_with_target_node );
+
+ while ( $p->next_tag() && null === $p->get_attribute( 'target' ) ) {
+ continue;
+ }
+
+ $this->assertSame( $expected_inner_markup, $p->get_inner_markup(), 'Failed to return appropriate inner markup.' );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array[]
+ */
+ public function data_html_with_inner_markup() {
+ $data = array(
+ 'Void elements' => array( ' ', '' ),
+ 'Empty elements' => array( '
', '' ),
+ 'Element containing only text' => array( 'inside
', 'inside' ),
+ 'Element with nested tags' => array( 'inside the div
', 'inside the div' ),
+ 'Unclosed element' => array( 'This is
all inside the DIV', 'This is
all inside the DIV' ),
+ 'Unclosed elements' => array( '
Inside P tags
', 'Inside
P tags' ),
+ 'Partially-closed element' => array( ' This is all inside the DIV
all inside the DIV array( '
Outside the P', 'Inside the P' ),
+ );
+
+ $inner_html = <<This is inside the
Match
+
+
+
+
+ Look at the picture photograph.
+
+
+HTML;
+
+ $html = <<
+
This is not in the match.
+
This is another paragraph not in the match.
+
+ {$inner_html}
+
+
This is also note in the match.
+
+HTML;
+
+ $data['Complicated inner nesting'] = array( $html, $inner_html );
+
+ return $data;
+ }
+
+ /**
+ * Ensures that the cursor isn't moved when getting the inner markup.
+ * It should remain at the tag opener from where it was called.
+ *
+ * @ticket {TICKET_NUMBER}
+ *
+ * @covers WP_HTML_Processor::get_inner_markup
+ *
+ * @since 6.4.0
+ */
+ public function test_preserves_cursor() {
+ $p = WP_HTML_Processor::createFragment( 'The cursor
should not move unexpectedly .
' );
+
+ while ( $p->next_tag() && null === $p->get_attribute( 'target' ) ) {
+ continue;
+ }
+
+ $this->assertSame(
+ 'The cursor
should not move unexpectedly .',
+ $p->get_inner_markup(),
+ 'Failed to return appropriate inner markup.'
+ );
+
+ $this->assertSame( 'SPAN', $p->get_tag(), "Should have remained on SPAN, but found {$p->get_tag()} instead." );
+ $this->assertFalse( $p->is_tag_closer(), 'Should have remained on SPAN opening tag, but stopped at closing tag instead.' );
+
+ $p->next_tag();
+ $this->assertNotNull( $p->get_attribute( 'inner-target' ), "Expected to move to inner CODE element, but found {$p->get_tag()} instead." );
+ }
+}
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorGetOuterMarkup.php b/tests/phpunit/tests/html-api/wpHtmlProcessorGetOuterMarkup.php
new file mode 100644
index 0000000000000..f977e5529b8e2
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorGetOuterMarkup.php
@@ -0,0 +1,128 @@
+
' );
+
+ $this->assertNull( $p->get_outer_markup() );
+
+ $this->assertFalse( $p->next_tag( 'BUTTON' ), "Should not have found a BUTTON tag but stopped at {$p->get_tag()}." );
+ $this->assertNull( $p->get_outer_markup() );
+ }
+
+ /**
+ * @ticket {TICKET_NUMBER}
+ *
+ * @covers WP_HTML_Processor::get_outer_markup
+ *
+ * @dataProvider data_html_with_outer_markup
+ *
+ * @since 6.4.0
+ *
+ * @param string $html_with_target_node HTML containing a node with the `target` attribute set.
+ * @param string $expected_outer_markup Outer markup of target node.
+ */
+ public function test_returns_appropriate_outer_markup( $html_with_target_node, $expected_outer_markup ) {
+ $p = WP_HTML_Processor::createFragment( $html_with_target_node );
+
+ while ( $p->next_tag() && null === $p->get_attribute( 'target' ) ) {
+ continue;
+ }
+
+ $this->assertSame( $expected_outer_markup, $p->get_outer_markup(), 'Failed to return appropriate inner markup.' );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array[]
+ */
+ public function data_html_with_outer_markup() {
+ $data = array(
+ 'Void elements' => array( ' ', ' ' ),
+ 'Empty elements' => array( '
', '
' ),
+ 'Element containing only text' => array( 'inside
', 'inside
' ),
+ 'Element with nested tags' => array( 'inside the div
', 'inside the div
' ),
+ 'Unclosed element' => array( 'This is
all inside the DIV', '
This is
all inside the DIV' ),
+ 'Unclosed elements' => array( '
', '
Inside P tags' ),
+ 'Partially-closed element' => array( '
This is all inside the DIV
This is all inside the DIV array( '
Outside the P', '
Inside the P' ),
+ );
+
+ $inner_html = <<This is inside the Match
+
+
+
+
+ Look at the picture photograph.
+
+
+HTML;
+
+ $html = <<
+
This is not in the match.
+
This is another paragraph not in the match.
+
+ {$inner_html}
+
+
This is also note in the match.
+
+HTML;
+
+ $data['Complicated inner nesting'] = array( $html, "{$inner_html}
" );
+
+ return $data;
+ }
+
+ /**
+ * Ensures that the cursor isn't moved when getting the outer markup.
+ * It should remain at the tag opener from where it was called.
+ *
+ * @ticket {TICKET_NUMBER}
+ *
+ * @covers WP_HTML_Processor::get_outer_markup
+ *
+ * @since 6.4.0
+ */
+ public function test_preserves_cursor() {
+ $p = WP_HTML_Processor::createFragment( 'The cursor
should not move unexpectedly .
' );
+
+ while ( $p->next_tag() && null === $p->get_attribute( 'target' ) ) {
+ continue;
+ }
+
+ $this->assertSame(
+ 'The cursor
should not move unexpectedly . ',
+ $p->get_outer_markup(),
+ 'Failed to return appropriate outer markup.'
+ );
+
+ $this->assertSame( 'SPAN', $p->get_tag(), "Should have remained on SPAN, but found {$p->get_tag()} instead." );
+ $this->assertFalse( $p->is_tag_closer(), 'Should have remained on SPAN opening tag, but stopped at closing tag instead.' );
+
+ $p->next_tag();
+ $this->assertNotNull( $p->get_attribute( 'inner-target' ), "Expected to move to inner CODE element, but found {$p->get_tag()} instead." );
+ }
+}