diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9f3249db74f78..4c779a4dec32d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -281,24 +281,62 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * ## Current HTML Support * - * - The only supported context is ``, which is the default value. * - The only supported document encoding is `UTF-8`, which is the default value. * + * Example: + * + * // Usually, snippets of HTML ought to be processed in the default `` context. + * $processor = WP_HTML_Processor::create_fragment( '

Hi

' ); + * + * // Prevent inner closing tags from closing the containing element and leaking out. + * $processor = WP_HTML_Processor::create_fragment( 'No escape.', '
' ); + * + * @todo Set the SVG or MathML namespace when creating with context node SVG or MATH. + * * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. + * @since 6.7.0 Can create fragment in any context. * * @param string $html Input HTML fragment to process. - * @param string $context Context element for the fragment, must be default of ``. + * @param string $context Context element for the fragment, an HTML start tag like ``. * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. * @return static|null The created processor if successful, otherwise null. */ public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { - if ( '' !== $context || 'UTF-8' !== $encoding ) { + if ( 'UTF-8' !== $encoding ) { + return null; + } + + $context_node = self::parse_context_element( $context ); + if ( null === $context_node ) { + _doing_it_wrong( + __METHOD__, + __( 'The context argument must be an HTML start tag.' ), + '6.7.0' + ); + return null; + } + + if ( self::is_void( $context_node[0] ) ) { + _doing_it_wrong( + __METHOD__, + __( 'The context argument may not specify a void element.' ), + '6.7.0' + ); + return null; + } + + if ( in_array( $context_node[0], array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) { + _doing_it_wrong( + __METHOD__, + __( 'The context argument may not specify a self-contained element.' ), + '6.7.0' + ); return null; } $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); - $processor->state->context_node = array( 'BODY', array() ); + $processor->state->context_node = $context_node; $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; $processor->state->encoding = $encoding; $processor->state->encoding_confidence = 'certain'; @@ -327,6 +365,62 @@ public static function create_fragment( $html, $context = '', $encoding = return $processor; } + /** + * Parses an HTML span containing a context element for the fragment parser. + * + * Effectively this extracts the first token from an HTML input and if it's a + * starting tag, will return the tag name and any attributes on the tag. + * + * Example: + * + * array( 'BODY', array() ) === self::parse_context_element( '' ); + * array( 'SCRIPT', array( 'type' => 'javascript' ) ) === self::parse_context_element( ''; + } + + // Parse out the context element as well as the attributes. + $context_processor = new WP_HTML_Tag_Processor( $context ); + if ( + ! $context_processor->next_token() || + '#tag' !== $context_processor->get_token_type() || + $context_processor->is_tag_closer() + ) { + return null; + } + + $attributes = array(); + $attribute_names = $context_processor->get_attribute_names_with_prefix( '' ); + if ( isset( $attribute_names ) ) { + foreach ( $attribute_names as $name ) { + $attributes[ $name ] = $context_processor->get_attribute( $name ); + } + } + + return array( $context_processor->get_tag(), $attributes ); + } + /** * Creates an HTML processor in the full parsing mode. * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php new file mode 100644 index 0000000000000..cd7a6ee2badc1 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php @@ -0,0 +1,84 @@ +assertNull( + WP_HTML_Processor::create_fragment( 'just a test', $context ), + "Should not have been able to create a fragment parser with context node {$context}" + ); + } + + /** + * Data provider. + * + * @ticket 61576 + * + * @return array[] + */ + public static function data_invalid_fragment_contexts() { + return array( + // Invalid contexts. + 'Invalid text' => array( 'just some text' ), + 'Invalid comment' => array( '' ), + 'Invalid closing' => array( '
' ), + 'Invalid DOCTYPE' => array( '' ), + + // Void elements. + 'AREA' => array( '' ), + 'BASE' => array( '' ), + 'BASEFONT' => array( '' ), + 'BGSOUND' => array( '' ), + 'BR' => array( '
' ), + 'COL' => array( '' ), + 'EMBED' => array( '' ), + 'FRAME' => array( '' ), + 'HR' => array( '
' ), + 'IMG' => array( '' ), + 'INPUT' => array( '' ), + 'KEYGEN' => array( '' ), + 'LINK' => array( '' ), + 'META' => array( '' ), + 'PARAM' => array( '' ), + 'SOURCE' => array( '' ), + 'TRACK' => array( '' ), + 'WBR' => array( '' ), + + // Self-contained elements. + 'IFRAME' => array( '