Skip to content

Commit

Permalink
parser: fix a bug that &#0x...; is accepted as a character reference
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Oct 24, 2024
1 parent a09646d commit ce59f2e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
10 changes: 7 additions & 3 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ module Private
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
DEFAULT_ENTITIES_PATTERNS = {}
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
default_entities.each do |term|
Expand Down Expand Up @@ -570,8 +570,12 @@ def unnormalize( string, entities=nil, filter=nil )
return rv if matches.size == 0
rv.gsub!( Private::CHARACTER_REFERENCES ) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
if m.start_with?("x")
code_point = Integer(m[1..-1], 16)
else
code_point = Integer(m, 10)
end
[code_point].pack('U*')
}
matches.collect!{|x|x[0]}.compact!
if filter
Expand Down
6 changes: 6 additions & 0 deletions test/parse/test_character_reference.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,11 @@ def test_linear_performance_many_preceding_zeros
REXML::Document.new('<test testing="&#' + "0" * n + '97;"/>')
end
end

def test_hex_precedding_zero
parser = REXML::Parsers::PullParser.new("<root>&#x61;&#0x61;</root>")
parser.pull # :start_element
assert_equal("a&#0x61;", parser.pull[1]) # :text
end
end
end

0 comments on commit ce59f2e

Please sign in to comment.