Skip to content

Commit

Permalink
Feedback response
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Dec 10, 2023
1 parent 713e230 commit 603db00
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
* - Prune the whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c".
* This would increase the size of the changes for some operations but leave more
* natural-looking output HTML.
* - Decode HTML character references within class names when matching. E.g. match having
* class `1<"2` needs to recognize `class="1&lt;&quot;2"`. Currently the Tag Processor
* will fail to find the right tag if the class name is encoded as such.
* - Properly decode HTML character references in `get_attribute()`. PHP's
* `html_entity_decode()` is wrong in a couple ways: it doesn't account for the
* no-ambiguous-ampersand rule, and it improperly handles the way semicolons may
Expand Down Expand Up @@ -375,14 +372,14 @@ class WP_HTML_Tag_Processor {
*
* @since {WP_VERSION}
*
* @see WP_HTML_Tag_Processor::STATE_UNKNOWN
* @see WP_HTML_Tag_Processor::STATE_READY
* @see WP_HTML_Tag_Processor::STATE_COMPLETE
* @see WP_HTML_Tag_Processor::STATE_INCOMPLETE
* @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG
*
* @var string
*/
private $parser_state = self::STATE_UNKNOWN;
private $parser_state = self::STATE_READY;

/**
* How many bytes from the original HTML document have been read and parsed.
Expand Down Expand Up @@ -663,7 +660,7 @@ public function next_token() {
* The next step in the parsing loop determines the parsing state;
* clear it so that state doesn't linger from the previous step.
*/
$this->parser_state = self::STATE_UNKNOWN;
$this->parser_state = self::STATE_READY;

if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
$this->parser_state = self::STATE_COMPLETE;
Expand Down Expand Up @@ -721,7 +718,8 @@ public function next_token() {
's' === $t || 'S' === $t ||
't' === $t || 'T' === $t ||
'x' === $t || 'X' === $t
) ) {
)
) {
$tag_name = $this->get_tag();

if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
Expand Down Expand Up @@ -2647,7 +2645,7 @@ private function matches() {
return true;
}

const STATE_UNKNOWN = 'UNKNOWN: The parser is waiting for a state transition; it may not have started, or it may have been interrupted, or it may be waiting to restart after pausing.';
const STATE_READY = 'READY: The parser is waiting for a state transition; it may not have started, or it may have been interrupted, or it may be waiting to restart after pausing.';
const STATE_COMPLETE = 'COMPLETE: The parser has reached the end of the document without truncating any possible tokens. There is nothing left to scan.';
const STATE_INCOMPLETE = 'INCOMPLETE: The parser has reached the end of the document but it appears as thought the HTML is truncated inside a token. It has backed up to the last-known complete state and will not continue parsing.';
const STATE_MATCHED_TAG = 'MATCHED_TAG: The parser has found a tag and paused to allow reading from and modifying its attributes.';
Expand Down

0 comments on commit 603db00

Please sign in to comment.