WordPress · sirreal · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024 · Aug 23, 2024
diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -843,10 +843,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
 
 		if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
 			parent::next_token();
-			if (
-				WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state ||
-				WP_HTML_Tag_Processor::STATE_CDATA_NODE === $this->parser_state
-			) {
+			if ( WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state ) {
 				parent::subdivide_text_appropriately();
 			}
 		}
@@ -4375,7 +4372,6 @@ private function step_in_foreign_content(): bool {
 		}
 
 		switch ( $op ) {
-			case '#cdata-section':
 			case '#text':
 				/*
 				 * > A character token that is U+0000 NULL
@@ -4395,6 +4391,24 @@ private function step_in_foreign_content(): bool {
 				$this->insert_foreign_element( $this->state->current_token, false );
 				return true;
 
+			/*
+			 * CDATA sections are alternate wrappers for text content and therefore
+			 * ought to follow the same rules as text nodes.
+			 */
+			case '#cdata-section':
+				/*
+				 * NULL bytes and whitespace do not change the frameset-ok flag.
+				 */
+				$current_token        = $this->bookmarks[ $this->state->current_token->bookmark_name ];
+				$cdata_content_start  = $current_token->start + 9;
+				$cdata_content_length = $current_token->length - 12;
+				if ( strspn( $this->html, "\0 \t\n\f\r", $cdata_content_start, $cdata_content_length ) !== $cdata_content_length ) {
+					$this->state->frameset_ok = false;
+				}
+
+				$this->insert_foreign_element( $this->state->current_token, false );
+				return true;
+
 			/*
 			 * > A comment token
 			 */

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -3337,8 +3337,8 @@ public function get_comment_type(): ?string {
 	}
 
 	/**
-	 * Subdivides a matched text node or CDATA text node, splitting NULL byte sequences
-	 * and decoded whitespace as distinct prefixes.
+	 * Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as
+	 * distinct nodes prefixes.
 	 *
 	 * Note that once anything that's neither a NULL byte nor decoded whitespace is
 	 * encountered, then the remainder of the text node is left intact as generic text.
@@ -3368,70 +3368,55 @@ public function get_comment_type(): ?string {
 	 * @return bool Whether the text node was subdivided.
 	 */
 	public function subdivide_text_appropriately(): bool {
+		if ( self::STATE_TEXT_NODE !== $this->parser_state ) {
+			return false;
+		}
+
 		$this->text_node_classification = self::TEXT_IS_GENERIC;
 
-		if ( self::STATE_TEXT_NODE === $this->parser_state ) {
-			/*
-			 * NULL bytes are treated categorically different than numeric character
-			 * references whose number is zero. `&#x00;` is not the same as `"\x00"`.
-			 */
-			$leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length );
-			if ( $leading_nulls > 0 ) {
-				$this->token_length             = $leading_nulls;
-				$this->text_length              = $leading_nulls;
-				$this->bytes_already_parsed     = $this->token_starts_at + $leading_nulls;
-				$this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE;
-				return true;
-			}
+		/*
+		 * NULL bytes are treated categorically different than numeric character
+		 * references whose number is zero. `&#x00;` is not the same as `"\x00"`.
+		 */
+		$leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length );
+		if ( $leading_nulls > 0 ) {
+			$this->token_length             = $leading_nulls;
+			$this->text_length              = $leading_nulls;
+			$this->bytes_already_parsed     = $this->token_starts_at + $leading_nulls;
+			$this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE;
+			return true;
+		}
 
-			/*
-			 * Start a decoding loop to determine the point at which the
-			 * text subdivides. This entails raw whitespace bytes and any
-			 * character reference that decodes to the same.
-			 */
-			$at  = $this->text_starts_at;
-			$end = $this->text_starts_at + $this->text_length;
-			while ( $at < $end ) {
-				$skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at );
-				$at     += $skipped;
-
-				if ( $at < $end && '&' === $this->html[ $at ] ) {
-					$matched_byte_length = null;
-					$replacement         = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length );
-					if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) {
-						$at += $matched_byte_length;
-						continue;
-					}
+		/*
+		 * Start a decoding loop to determine the point at which the
+		 * text subdivides. This entails raw whitespace bytes and any
+		 * character reference that decodes to the same.
+		 */
+		$at  = $this->text_starts_at;
+		$end = $this->text_starts_at + $this->text_length;
+		while ( $at < $end ) {
+			$skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at );
+			$at     += $skipped;
+
+			if ( $at < $end && '&' === $this->html[ $at ] ) {
+				$matched_byte_length = null;
+				$replacement         = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length );
+				if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) {
+					$at += $matched_byte_length;
+					continue;
 				}
-
-				break;
-			}
-
-			if ( $at > $this->text_starts_at ) {
-				$new_length                     = $at - $this->text_starts_at;
-				$this->text_length              = $new_length;
-				$this->token_length             = $new_length;
-				$this->bytes_already_parsed     = $at;
-				$this->text_node_classification = self::TEXT_IS_WHITESPACE;
-				return true;
 			}
 
-			return false;
+			break;
 		}
 
-		// Unlike text nodes, there are no character references within CDATA sections.
-		if ( self::STATE_CDATA_NODE === $this->parser_state ) {
-			$leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length );
-			if ( $leading_nulls === $this->text_length ) {
-				$this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE;
-				return true;
-			}
-
-			$leading_ws = strspn( $this->html, " \t\f\r\n", $this->text_starts_at, $this->text_length );
-			if ( $leading_ws === $this->text_length ) {
-				$this->text_node_classification = self::TEXT_IS_WHITESPACE;
-				return true;
-			}
+		if ( $at > $this->text_starts_at ) {
+			$new_length                     = $at - $this->text_starts_at;
+			$this->text_length              = $new_length;
+			$this->token_length             = $new_length;
+			$this->bytes_already_parsed     = $at;
+			$this->text_node_classification = self::TEXT_IS_WHITESPACE;
+			return true;
 		}
 
 		return false;