Skip to content

Commit

Permalink
Fix calculation of Security.entity_expansion_text_limit in SAX/pull p…
Browse files Browse the repository at this point in the history
…arsers (#195)

GitHub: fix #193

## [Why?]
In SAX and pull parsers, the total value of rv.bytesize was checked, but
the summing process was unnecessary.

- Add Log
```patch
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index 28810bf..5cfc089 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -556,6 +556,7 @@ module REXML
                 re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
                 rv.gsub!( re, entity_value )
                 sum += rv.bytesize
+puts " rv.bytesize: #{rv.bytesize} sum: #{sum} > Security.entity_expansion_text_limit: #{Security.entity_expansion_text_limit} : #{rv}"
                 if sum > Security.entity_expansion_text_limit
                   raise "entity expansion has grown too large"
                 end
diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb
index 7e0befe..cc68dbf 100644
--- a/lib/rexml/text.rb
+++ b/lib/rexml/text.rb
@@ -415,6 +415,7 @@ module REXML
       sum = 0
       string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
         s = Text.expand($&, doctype, filter)
+puts " s.bytesize: #{s.bytesize} sum + s.bytesize : #{sum + s.bytesize } > Security.entity_expansion_text_limit: #{Security.entity_expansion_text_limit} : #{s}"
         if sum + s.bytesize > Security.entity_expansion_text_limit
           raise "entity expansion has grown too large"
         else
```

- entity_expansion_text_limit.rb
```ruby
$LOAD_PATH.unshift(File.expand_path("lib"))

require 'rexml'
require 'rexml/parsers/sax2parser'
require 'rexml/parsers/pullparser'

def dom_entity_expansion_count_check(xml)
  doc = REXML::Document.new(xml)
  doc.root.children.first.value
  puts "DOM: entity_expansion_count: #{doc.entity_expansion_count}"
end

def sax_entity_expansion_count_check(xml)
  sax = REXML::Parsers::SAX2Parser.new(xml)
  sax.parse
  puts "SAX: entity_expansion_count: #{sax.entity_expansion_count}"
end

def pull_entity_expansion_count_check(xml)
  parser = REXML::Parsers::PullParser.new(xml)
  while parser.has_next?
    parser.pull
  end
  puts "Pull: entity_expansion_count: #{parser.entity_expansion_count}"
end

xml = <<XML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
  <!ENTITY a "&b;&b;&b;">
  <!ENTITY b "&c;&d;&e;">
  <!ENTITY c "xxxxxxxxxx">
  <!ENTITY d "yyyyyyyyyy">
  <!ENTITY e "zzzzzzzzzz">
]>
<member>&a;</member>
XML

dom_entity_expansion_count_check(xml)
sax_entity_expansion_count_check(xml)
pull_entity_expansion_count_check(xml)
```

```
$ ruby entity_expansion_text_limit.rb
 s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx
 s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy
 s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz
 s.bytesize: 30 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx
 s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy
 s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz
 s.bytesize: 30 sum + s.bytesize : 60 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 s.bytesize: 10 sum + s.bytesize : 10 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx
 s.bytesize: 10 sum + s.bytesize : 20 > Security.entity_expansion_text_limit: 10240 : yyyyyyyyyy
 s.bytesize: 10 sum + s.bytesize : 30 > Security.entity_expansion_text_limit: 10240 : zzzzzzzzzz
 s.bytesize: 30 sum + s.bytesize : 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 s.bytesize: 90 sum + s.bytesize : 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
DOM: entity_expansion_count: 13
 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e;
 rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e;
 rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e;
 rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e;
 rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 90 sum: 180 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e;
 rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e;
 rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 90 sum: 270 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
SAX: entity_expansion_count: 13
 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e;
 rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e;
 rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e;
 rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e;
 rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 90 sum: 180 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 16 sum: 16 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxx&d;&e;
 rv.bytesize: 23 sum: 39 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyy&e;
 rv.bytesize: 30 sum: 69 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 90 sum: 270 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
 rv.bytesize: 90 sum: 90 > Security.entity_expansion_text_limit: 10240 : xxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzzxxxxxxxxxxyyyyyyyyyyzzzzzzzzzz
Pull: entity_expansion_count: 13
```

90 bytes is the expected value, but SAX and Pull exceed 90 bytes due to
unnecessary total processing.
  • Loading branch information
naitoh authored Aug 12, 2024
1 parent e3f747f commit 1892770
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 3 deletions.
4 changes: 1 addition & 3 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -548,15 +548,13 @@ def unnormalize( string, entities=nil, filter=nil )
}
matches.collect!{|x|x[0]}.compact!
if matches.size > 0
sum = 0
matches.each do |entity_reference|
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
rv.gsub!( re, entity_value )
sum += rv.bytesize
if sum > Security.entity_expansion_text_limit
if rv.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
end
else
Expand Down
20 changes: 20 additions & 0 deletions test/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ def test_new
class EntityExpansionLimitTest < Test::Unit::TestCase
def setup
@default_entity_expansion_limit = REXML::Security.entity_expansion_limit
@default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit
end

def teardown
REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit
end

class GeneralEntityTest < self
Expand Down Expand Up @@ -126,6 +128,24 @@ def test_with_default_entity
doc.root.children.first.value
end
end

def test_entity_expansion_text_limit
xml = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;">
<!ENTITY b "&c;&d;&e;">
<!ENTITY c "xxxxxxxxxx">
<!ENTITY d "yyyyyyyyyy">
<!ENTITY e "zzzzzzzzzz">
]>
<member>&a;</member>
XML

REXML::Security.entity_expansion_text_limit = 90
doc = REXML::Document.new(xml)
assert_equal(90, doc.root.children.first.value.bytesize)
end
end

class ParameterEntityTest < self
Expand Down
30 changes: 30 additions & 0 deletions test/test_pullparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,12 @@ def test_peek
class EntityExpansionLimitTest < Test::Unit::TestCase
def setup
@default_entity_expansion_limit = REXML::Security.entity_expansion_limit
@default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit
end

def teardown
REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit
end

class GeneralEntityTest < self
Expand Down Expand Up @@ -249,6 +251,34 @@ def test_with_default_entity
end
end
end

def test_entity_expansion_text_limit
source = <<-XML
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;">
<!ENTITY b "&c;&d;&e;">
<!ENTITY c "xxxxxxxxxx">
<!ENTITY d "yyyyyyyyyy">
<!ENTITY e "zzzzzzzzzz">
]>
<member>&a;</member>
XML

REXML::Security.entity_expansion_text_limit = 90
parser = REXML::Parsers::PullParser.new(source)
events = {}
element_name = ''
while parser.has_next?
event = parser.pull
case event.event_type
when :start_element
element_name = event[0]
when :text
events[element_name] = event[1]
end
end
assert_equal(90, events['member'].size)
end
end
end
end
Expand Down
24 changes: 24 additions & 0 deletions test/test_sax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,12 @@ def test_sax2
class EntityExpansionLimitTest < Test::Unit::TestCase
def setup
@default_entity_expansion_limit = REXML::Security.entity_expansion_limit
@default_entity_expansion_text_limit = REXML::Security.entity_expansion_text_limit
end

def teardown
REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit
end

class GeneralEntityTest < self
Expand Down Expand Up @@ -182,6 +184,28 @@ def test_with_default_entity
sax.parse
end
end

def test_entity_expansion_text_limit
source = <<-XML
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;">
<!ENTITY b "&c;&d;&e;">
<!ENTITY c "xxxxxxxxxx">
<!ENTITY d "yyyyyyyyyy">
<!ENTITY e "zzzzzzzzzz">
]>
<member>&a;</member>
XML

REXML::Security.entity_expansion_text_limit = 90
sax = REXML::Parsers::SAX2Parser.new(source)
text_size = nil
sax.listen(:characters, ["member"]) do |text|
text_size = text.size
end
sax.parse
assert_equal(90, text_size)
end
end
end

Expand Down

0 comments on commit 1892770

Please sign in to comment.