WordPress · dmsnell · Aug 1, 2024 · Aug 5, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -281,24 +281,41 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 *
 	 * ## Current HTML Support
 	 *
-	 *  - The only supported context is `<body>`, which is the default value.
 	 *  - The only supported document encoding is `UTF-8`, which is the default value.
 	 *
+	 * @todo Verify that creating a fragment in self-contained elements works.
+	 *
 	 * @since 6.4.0
 	 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances.
+	 * @since 6.7.0 Can create fragment in any context.
 	 *
 	 * @param string $html     Input HTML fragment to process.
 	 * @param string $context  Context element for the fragment, must be default of `<body>`.
 	 * @param string $encoding Text encoding of the document; must be default of 'UTF-8'.
 	 * @return static|null The created processor if successful, otherwise null.
 	 */
 	public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8' ) {
-		if ( '<body>' !== $context || 'UTF-8' !== $encoding ) {
+		if ( 'UTF-8' !== $encoding ) {
+			return null;
+		}
+
+		$context_processor = new WP_HTML_Tag_Processor( $context );
+		if ( ! $context_processor->next_token() || '#tag' !== $context_processor->get_token_type() ) {
+			return null;
+		}
+
+		$context_tag        = $context_processor->get_tag();
+		$context_attributes = array();
+		foreach ( $context_processor->get_attribute_names_with_prefix( '' ) as $name ) {
+			$context_attributes[ $name ] = $context_processor->get_attribute( $name );
+		}
+
+		if ( $context_processor->next_token() ) {
 			return null;
 		}
 
 		$processor                             = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE );
-		$processor->state->context_node        = array( 'BODY', array() );
+		$processor->state->context_node        = array( $context_tag, $context_attributes );
 		$processor->state->insertion_mode      = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
 		$processor->state->encoding            = $encoding;
 		$processor->state->encoding_confidence = 'certain';

diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php
@@ -0,0 +1,114 @@
+<?php
+/**
+ * Unit tests covering WP_HTML_Processor fragment parsing functionality.
+ *
+ * @package WordPress
+ * @subpackage HTML-API
+ *
+ * @since 6.7.0
+ *
+ * @group html-api
+ *
+ * @coversDefaultClass WP_HTML_Processor
+ */
+class Tests_HtmlApi_WpHtmlProcessorFragmentParsing extends WP_UnitTestCase {
+	/**
+	 * Verifies that SCRIPT fragment parses behave as they should.
+	 *
+	 * @dataProvider data_script_fragments
+	 *
+	 * @param string      $inner_html    HTML to parse in SCRIPT fragment.
+	 * @param string|null $expected_html Expected output of the parse, or `null` if unsupported.
+	 */
+	public function test_script_tag( string $inner_html, ?string $expected_html ) {
+		$processor  = WP_HTML_Processor::create_fragment( $inner_html, '<script></script>' );
+		$normalized = static::normalize_html( $processor );
+
+		if ( isset( $expected_html ) ) {
+			$this->assertSame(
+				$expected_html,
+				$normalized,
+				'Failed to properly parse SCRIPT fragment.'
+			);
+		} else {
+			$this->assertNull(
+				$normalized,
+				"Should have bailed when parsing but didn't."
+			);
+		}
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @ticket 61576
+	 *
+	 * @return array[]
+	 */
+	public static function data_script_fragments() {
+		return array(
+			'Basic SCRIPT'      => array( 'const x = 5 < y;', 'const x = 5 < y;' ),
+			'Text after SCRIPT' => array( 'const x = 5 < y;</script>test', null ),
+			'Tag after SCRIPT'  => array( 'end</script><img>', null ),
+			'Double escape'     => array( "<!--<script>\nconsole.log('</script>');\n-->\nconsole.log('<img>');", "<!--<script>\nconsole.log('\</script>');\n-->\nconsole.log('<img'>);" ),
+		);
+	}
+
+	/**
+	 * Produces normalized HTML output given a processor as input, which has not
+	 * yet started to proceed through its document.
+	 *
+	 * This can be used with a full or a fragment parser.
+	 *
+	 * @param WP_HTML_Processor $processor HTML Processor in READY state at the beginning of its input.
+	 * @return string|null Normalized HTML from input processor.
+	 */
+	private static function normalize_html( WP_HTML_Processor $processor ): ?string {
+		$html = '';
+
+		while ( $processor->next_token() ) {
+			$token_name = $processor->get_token_name();
+			$token_type = $processor->get_token_type();
+			$is_closer  = $processor->is_tag_closer();
+
+			switch ( $token_type ) {
+				case '#text':
+					$html .= $processor->get_modifiable_text();
+					break;
+
+				case '#tag':
+					if ( $is_closer ) {
+						$html .= "</{$token_name}>";
+					} else {
+						$names = $processor->get_attribute_names_with_prefix( '' );
+						if ( ! isset( $names ) ) {
+							$html .= "<{$token_name}>";
+						} else {
+							$html .= "<{$token_name}";
+							foreach ( $names as $name ) {
+								$value = $processor->get_attribute( $name );
+								if ( true === $value ) {
+									$html .= " {$name}";
+								} else {
+									$value = strtr( $value, '"', '&quot;' );
+									$html .= " {$name}=\"{$value}\"";
+								}
+							}
+						}
+
+						$text = $processor->get_modifiable_text();
+						if ( '' !== $text ) {
+							$html .= "{$text}</{$token_name}>";
+						}
+					}
+					break;
+			}
+		}
+
+		if ( null !== $processor->get_last_error() ) {
+			return null;
+		}
+
+		return $html;
+	}
+}