Skip to content

Commit

Permalink
Performance and memory optimizations (#94)
Browse files Browse the repository at this point in the history
Originally, the inefficiency was discovered when working through the bug
report in the `rubocop` repository -
rubocop/rubocop#11657.

Tested on the `rubocop` repository. `git clone` it, point `rexml` to the
local repository, `bundle install` etc and run inside it:
```
bundle exec rubocop --profile --memory --format junit --out results/rubocop.xml lib/rubocop/cop/layout
```

### Memory
#### Before
```
Total allocated: 630.15 MB (8838482 objects)
Total retained:  53.50 MB (445069 objects)

allocated memory by gem
-----------------------------------
 294.26 MB  rexml/lib
 214.78 MB  rubocop/lib
  38.60 MB  rubocop-ast/lib
  31.62 MB  parser-3.2.1.0
  31.43 MB  other
  10.02 MB  lib
   3.11 MB  rubocop-rspec-2.18.1
   1.95 MB  rubocop-performance-1.16.0
   1.83 MB  regexp_parser-2.7.0
   1.61 MB  ast-2.4.2
 405.71 kB  unicode-display_width-2.4.2
 287.16 kB  rubocop-capybara-2.17.1
 244.96 kB  rubocop-rake-0.6.0
   5.00 kB  rubygems

allocated memory by file
-----------------------------------
 123.30 MB  rexml/lib/rexml/text.rb
 101.92 MB  rubocop/lib/rubocop/formatter/junit_formatter.rb
  61.42 MB  rexml/lib/rexml/namespace.rb
  31.07 MB  rexml/lib/rexml/attribute.rb
  28.89 MB  rubocop/lib/rubocop/config.rb
  27.30 MB  rexml/lib/rexml/element.rb
  22.75 MB  rexml/lib/rexml/formatters/pretty.rb
  22.75 MB  rexml/lib/rexml/entity.rb
  22.75 MB  <internal:kernel>
  15.11 MB  parser-3.2.1.0/lib/parser/source/buffer.rb
  12.59 MB  rubocop-ast/lib/rubocop/ast/node.rb
  12.03 MB  rubocop/lib/rubocop/cop/registry.rb
  11.88 MB  rubocop/lib/rubocop/cop/team.rb
   5.90 MB  rubocop/lib/rubocop/cop/commissioner.rb
   5.87 MB  parser-3.2.1.0/lib/parser/lexer-F1.rb
   5.69 MB  rexml/lib/rexml/parent.rb
   5.44 MB  rubocop/lib/rubocop/cop/base.rb
   5.17 MB  rubocop-ast/lib/rubocop/ast/builder.rb
   4.56 MB  (eval)
   4.25 MB  parser-3.2.1.0/lib/parser/builders/default.rb
   3.75 MB  <internal:pack>
   3.59 MB  ruby/3.2.0/lib/ruby/3.2.0/psych/tree_builder.rb
   3.53 MB  rubocop/lib/rubocop/path_util.rb
   3.21 MB  rubocop/lib/rubocop/cli.rb
   2.45 MB  parser-3.2.1.0/lib/parser/ruby26.rb
   2.27 MB  rubocop-ast/lib/rubocop/ast/node_pattern/compiler/sequence_subcompiler.rb
   2.23 MB  rubocop-ast/lib/rubocop/ast/processed_source.rb
   2.05 MB  rubocop-ast/lib/rubocop/ast/node/if_node.rb
   2.00 MB  rubocop-ast/lib/rubocop/ast/token.rb
   1.73 MB  rubocop-ast/lib/rubocop/ast/node_pattern/method_definer.rb
   1.73 MB  ruby/3.2.0/lib/ruby/3.2.0/erb/compiler.rb
   1.61 MB  ast-2.4.2/lib/ast/node.rb
   1.54 MB  rubocop/lib/rubocop/cop/variable_force.rb
   1.53 MB  rubocop/lib/rubocop/cop/internal_affairs/cop_description.rb
   1.49 MB  rubocop/lib/rubocop/cop/naming/inclusive_language.rb
   1.47 MB  rubocop-ast/lib/rubocop/ast/node/mixin/parameterized_node.rb
   1.42 MB  rubocop-ast/lib/rubocop/ast/node_pattern/compiler.rb
   1.42 MB  rubocop-ast/lib/rubocop/ast/node_pattern/compiler/node_pattern_subcompiler.rb
   1.39 MB  rubocop/lib/rubocop/cop/layout/redundant_line_break.rb
   1.35 MB  rubocop/lib/rubocop/cop/util.rb
   1.29 MB  regexp_parser-2.7.0/lib/regexp_parser/scanner.rb
   1.29 MB  rubocop/lib/rubocop/cop/mixin/range_help.rb
   1.27 MB  ruby/3.2.0/lib/ruby/3.2.0/psych/parser.rb
   1.18 MB  rubocop/lib/rubocop/cop/layout/comment_indentation.rb
   1.17 MB  rubocop-ast/lib/rubocop/ast/node/mixin/descendence.rb
   1.10 MB  ruby/3.2.0/lib/ruby/3.2.0/erb.rb
   1.07 MB  rubocop/lib/rubocop/cop/variable_force/variable_table.rb
   1.04 MB  rubocop/lib/rubocop/cop/layout/end_of_line.rb
   1.01 MB  rubocop/lib/rubocop/cop/mixin/end_keyword_alignment.rb
 996.49 kB  rubocop/lib/rubocop/cop/metrics/utils/abc_size_calculator.rb

allocated memory by location
-----------------------------------
  87.70 MB  rubocop/lib/rubocop/formatter/junit_formatter.rb:65
  61.19 MB  rexml/lib/rexml/text.rb:385
  36.04 MB  rexml/lib/rexml/text.rb:134
  35.83 MB  rexml/lib/rexml/namespace.rb:19
  26.06 MB  rexml/lib/rexml/text.rb:374
  22.75 MB  rexml/lib/rexml/entity.rb:136
  22.75 MB  <internal:kernel>:49
  17.16 MB  rubocop/lib/rubocop/config.rb:37
  15.77 MB  rexml/lib/rexml/attribute.rb:127
  15.30 MB  rexml/lib/rexml/attribute.rb:125
  13.08 MB  rexml/lib/rexml/element.rb:331
  11.37 MB  rexml/lib/rexml/element.rb:2382
  11.37 MB  rubocop/lib/rubocop/formatter/junit_formatter.rb:56
   9.89 MB  parser-3.2.1.0/lib/parser/source/buffer.rb:205
   9.86 MB  rubocop/lib/rubocop/cop/team.rb:32
   8.53 MB  rexml/lib/rexml/namespace.rb:23
   8.53 MB  rexml/lib/rexml/namespace.rb:24
   8.53 MB  rexml/lib/rexml/namespace.rb:26
   5.86 MB  rubocop/lib/rubocop/cop/registry.rb:54
   5.69 MB  rexml/lib/rexml/formatters/pretty.rb:40
   5.69 MB  rexml/lib/rexml/formatters/pretty.rb:44
   5.39 MB  rubocop/lib/rubocop/config.rb:319
   4.55 MB  (eval):3
   4.20 MB  rubocop/lib/rubocop/config.rb:34
   3.84 MB  rubocop-ast/lib/rubocop/ast/node.rb:93
   3.73 MB  <internal:pack>:21
   3.71 MB  rubocop/lib/rubocop/cop/base.rb:346
   3.58 MB  ruby/3.2.0/lib/ruby/3.2.0/psych/tree_builder.rb:97
   3.52 MB  rubocop/lib/rubocop/path_util.rb:55
   3.50 MB  rubocop-ast/lib/rubocop/ast/builder.rb:99
   3.21 MB  rubocop/lib/rubocop/cli.rb:92
   3.00 MB  parser-3.2.1.0/lib/parser/lexer-F1.rb:14606
   2.91 MB  rubocop/lib/rubocop/cop/registry.rb:52
   2.84 MB  rexml/lib/rexml/parent.rb:116
   2.84 MB  rexml/lib/rexml/element.rb:330
   2.84 MB  rexml/lib/rexml/parent.rb:15
   2.84 MB  rexml/lib/rexml/formatters/pretty.rb:41
   2.84 MB  rexml/lib/rexml/formatters/pretty.rb:85
   2.84 MB  rexml/lib/rexml/formatters/pretty.rb:78
   2.84 MB  rexml/lib/rexml/formatters/pretty.rb:52
   2.84 MB  rubocop/lib/rubocop/formatter/junit_formatter.rb:52
   2.84 MB  rubocop-ast/lib/rubocop/ast/node.rb:236
   1.89 MB  parser-3.2.1.0/lib/parser/lexer-F1.rb:14602
   1.86 MB  parser-3.2.1.0/lib/parser/source/buffer.rb:117
   1.74 MB  rubocop-ast/lib/rubocop/ast/processed_source.rb:185
   1.69 MB  rubocop-ast/lib/rubocop/ast/token.rb:14
   1.67 MB  rubocop-ast/lib/rubocop/ast/builder.rb:98
   1.66 MB  rubocop/lib/rubocop/cop/commissioner.rb:125
   1.52 MB  rubocop/lib/rubocop/cop/base.rb:286
   1.49 MB  rubocop/lib/rubocop/cop/internal_affairs/cop_description.rb:80
```

#### After
```
Total allocated: 367.43 MB (4224322 objects) 🔥 🔥 🔥 
Total retained:  53.50 MB (445067 objects)

allocated memory by gem
-----------------------------------
 214.62 MB  rubocop/lib
  54.44 MB  rexml/lib
  38.60 MB  rubocop-ast/lib
  31.62 MB  parser-3.2.1.0
  10.02 MB  lib
   8.69 MB  other
   3.11 MB  rubocop-rspec-2.18.1
   1.95 MB  rubocop-performance-1.16.0
   1.83 MB  regexp_parser-2.7.0
   1.61 MB  ast-2.4.2
 405.71 kB  unicode-display_width-2.4.2
 287.16 kB  rubocop-capybara-2.17.1
 244.96 kB  rubocop-rake-0.6.0
   5.00 kB  rubygems

allocated memory by file
-----------------------------------
 101.92 MB  rubocop/lib/rubocop/formatter/junit_formatter.rb
  28.89 MB  rubocop/lib/rubocop/config.rb
  27.30 MB  rexml/lib/rexml/element.rb
  15.77 MB  rexml/lib/rexml/attribute.rb
  15.11 MB  parser-3.2.1.0/lib/parser/source/buffer.rb
  12.59 MB  rubocop-ast/lib/rubocop/ast/node.rb
  12.03 MB  rubocop/lib/rubocop/cop/registry.rb
  11.88 MB  rubocop/lib/rubocop/cop/team.rb
   5.90 MB  rubocop/lib/rubocop/cop/commissioner.rb
   5.87 MB  parser-3.2.1.0/lib/parser/lexer-F1.rb
   5.69 MB  rexml/lib/rexml/parent.rb
   5.69 MB  rexml/lib/rexml/formatters/pretty.rb
   5.44 MB  rubocop/lib/rubocop/cop/base.rb
   5.17 MB  rubocop-ast/lib/rubocop/ast/builder.rb
   4.56 MB  (eval)
   4.25 MB  parser-3.2.1.0/lib/parser/builders/default.rb
   3.75 MB  <internal:pack>
   3.59 MB  ruby/3.2.0/lib/ruby/3.2.0/psych/tree_builder.rb
   3.53 MB  rubocop/lib/rubocop/path_util.rb
   3.05 MB  rubocop/lib/rubocop/cli.rb
   2.45 MB  parser-3.2.1.0/lib/parser/ruby26.rb
   2.27 MB  rubocop-ast/lib/rubocop/ast/node_pattern/compiler/sequence_subcompiler.rb
   2.23 MB  rubocop-ast/lib/rubocop/ast/processed_source.rb
   2.05 MB  rubocop-ast/lib/rubocop/ast/node/if_node.rb
   2.00 MB  rubocop-ast/lib/rubocop/ast/token.rb
   1.73 MB  rubocop-ast/lib/rubocop/ast/node_pattern/method_definer.rb
   1.73 MB  ruby/3.2.0/lib/ruby/3.2.0/erb/compiler.rb
   1.61 MB  ast-2.4.2/lib/ast/node.rb
   1.54 MB  rubocop/lib/rubocop/cop/variable_force.rb
   1.53 MB  rubocop/lib/rubocop/cop/internal_affairs/cop_description.rb
   1.49 MB  rubocop/lib/rubocop/cop/naming/inclusive_language.rb
   1.47 MB  rubocop-ast/lib/rubocop/ast/node/mixin/parameterized_node.rb
   1.42 MB  rubocop-ast/lib/rubocop/ast/node_pattern/compiler.rb
   1.42 MB  rubocop-ast/lib/rubocop/ast/node_pattern/compiler/node_pattern_subcompiler.rb
   1.39 MB  rubocop/lib/rubocop/cop/layout/redundant_line_break.rb
   1.35 MB  rubocop/lib/rubocop/cop/util.rb
   1.29 MB  regexp_parser-2.7.0/lib/regexp_parser/scanner.rb
   1.29 MB  rubocop/lib/rubocop/cop/mixin/range_help.rb
   1.27 MB  ruby/3.2.0/lib/ruby/3.2.0/psych/parser.rb
   1.18 MB  rubocop/lib/rubocop/cop/layout/comment_indentation.rb
   1.17 MB  rubocop-ast/lib/rubocop/ast/node/mixin/descendence.rb
   1.10 MB  ruby/3.2.0/lib/ruby/3.2.0/erb.rb
   1.07 MB  rubocop/lib/rubocop/cop/variable_force/variable_table.rb
   1.04 MB  rubocop/lib/rubocop/cop/layout/end_of_line.rb
   1.01 MB  rubocop/lib/rubocop/cop/mixin/end_keyword_alignment.rb
 996.49 kB  rubocop/lib/rubocop/cop/metrics/utils/abc_size_calculator.rb
 970.58 kB  rubocop/lib/rubocop/cop/style/redundant_self.rb
 947.97 kB  rubocop/lib/rubocop/cop/layout/empty_comment.rb
 938.93 kB  rubocop/lib/rubocop/cop/mixin/empty_lines_around_body.rb
 871.31 kB  rubocop/lib/rubocop/cop/variable_force/variable.rb

allocated memory by location
-----------------------------------
  87.70 MB  rubocop/lib/rubocop/formatter/junit_formatter.rb:65
  17.16 MB  rubocop/lib/rubocop/config.rb:37
  15.77 MB  rexml/lib/rexml/attribute.rb:127
  13.08 MB  rexml/lib/rexml/element.rb:331
  11.37 MB  rexml/lib/rexml/element.rb:2382
  11.37 MB  rubocop/lib/rubocop/formatter/junit_formatter.rb:56
   9.89 MB  parser-3.2.1.0/lib/parser/source/buffer.rb:205
   9.86 MB  rubocop/lib/rubocop/cop/team.rb:32
   5.86 MB  rubocop/lib/rubocop/cop/registry.rb:54
   5.39 MB  rubocop/lib/rubocop/config.rb:319
   4.55 MB  (eval):3
   4.20 MB  rubocop/lib/rubocop/config.rb:34
   3.84 MB  rubocop-ast/lib/rubocop/ast/node.rb:93
   3.73 MB  <internal:pack>:21
   3.71 MB  rubocop/lib/rubocop/cop/base.rb:346
   3.58 MB  ruby/3.2.0/lib/ruby/3.2.0/psych/tree_builder.rb:97
   3.52 MB  rubocop/lib/rubocop/path_util.rb:55
   3.50 MB  rubocop-ast/lib/rubocop/ast/builder.rb:99
   3.05 MB  rubocop/lib/rubocop/cli.rb:92
   3.00 MB  parser-3.2.1.0/lib/parser/lexer-F1.rb:14606
   2.91 MB  rubocop/lib/rubocop/cop/registry.rb:52
   2.84 MB  rexml/lib/rexml/parent.rb:116
   2.84 MB  rexml/lib/rexml/element.rb:330
   2.84 MB  rexml/lib/rexml/parent.rb:15
   2.84 MB  rexml/lib/rexml/formatters/pretty.rb:40
   2.84 MB  rexml/lib/rexml/formatters/pretty.rb:41
   2.84 MB  rubocop/lib/rubocop/formatter/junit_formatter.rb:52
   2.84 MB  rubocop-ast/lib/rubocop/ast/node.rb:236
   1.89 MB  parser-3.2.1.0/lib/parser/lexer-F1.rb:14602
   1.86 MB  parser-3.2.1.0/lib/parser/source/buffer.rb:117
   1.74 MB  rubocop-ast/lib/rubocop/ast/processed_source.rb:185
   1.69 MB  rubocop-ast/lib/rubocop/ast/token.rb:14
   1.67 MB  rubocop-ast/lib/rubocop/ast/builder.rb:98
   1.66 MB  rubocop/lib/rubocop/cop/commissioner.rb:125
   1.52 MB  rubocop/lib/rubocop/cop/base.rb:286
   1.49 MB  rubocop/lib/rubocop/cop/internal_affairs/cop_description.rb:80
   1.47 MB  parser-3.2.1.0/lib/parser/source/buffer.rb:274
   1.41 MB  ast-2.4.2/lib/ast/node.rb:77
   1.35 MB  parser-3.2.1.0/lib/parser/ruby26.rb:0
   1.30 MB  rubocop/lib/rubocop/cop/commissioner.rb:153
   1.27 MB  ruby/3.2.0/lib/ruby/3.2.0/psych/parser.rb:62
   1.25 MB  rubocop-ast/lib/rubocop/ast/node.rb:106
   1.24 MB  rubocop/lib/rubocop/cop/registry.rb:181
   1.16 MB  parser-3.2.1.0/lib/parser/source/buffer.rb:254
   1.10 MB  ruby/3.2.0/lib/ruby/3.2.0/erb.rb:429
   1.07 MB  rubocop-ast/lib/rubocop/ast/node_pattern/method_definer.rb:58
   1.04 MB  rubocop/lib/rubocop/cop/layout/end_of_line.rb:50
 988.72 kB  rubocop/lib/rubocop/config.rb:322
 982.96 kB  rubocop-ast/lib/rubocop/ast/node/mixin/parameterized_node.rb:91
 975.88 kB  rubocop-ast/lib/rubocop/ast/node/if_node.rb:141
```

So, `-42%` of allocated memory and `-52%` of allocated objects.

### CPU
#### Before
```
     TOTAL    (pct)     SAMPLES    (pct)     FRAME
      2620  (10.0%)        2620  (10.0%)     Dir.pwd
  ==> 2314   (8.9%)        2314   (8.9%)     String#gsub
  ==> 1538   (5.9%)        1531   (5.9%)     String#scan
  ==> 4376  (16.8%)         960   (3.7%)     REXML::Text.normalize
      5223  (20.0%)         907   (3.5%)     Class#new
   ==> 895   (3.4%)         895   (3.4%)     Regexp#===
       879   (3.4%)         740   (2.8%)     Enumerable#find
       660   (2.5%)         660   (2.5%)     IO#write
   ==> 732   (2.8%)         641   (2.5%)     Kernel#clone
   ==> 618   (2.4%)         618   (2.4%)     String#=~
  ==> 2244   (8.6%)         579   (2.2%)     REXML::Formatters::Pretty#write_element
  ==> 1086   (4.2%)         484   (1.9%)     REXML::Namespace#name=
       795   (3.0%)         381   (1.5%)     Parser::Lexer#advance
       362   (1.4%)         362   (1.4%)     String#[]
       677   (2.6%)         308   (1.2%)     REXML::Attribute#to_string
       574   (2.2%)         286   (1.1%)     REXML::Namespace#name=
       286   (1.1%)         268   (1.0%)     REXML::Element#root
      1844   (7.1%)         256   (1.0%)     Racc::Parser#_racc_do_parse_c
       556   (2.1%)         236   (0.9%)     Kernel#require_relative
      8190  (31.3%)         233   (0.9%)     REXML::Attributes#[]=
      3913  (15.0%)         230   (0.9%)     RuboCop::Cop::Commissioner#trigger_responding_cops
     26099  (99.9%)         224   (0.9%)     Array#each
       820   (3.1%)         223   (0.9%)     RuboCop::Config#initialize
       273   (1.0%)         222   (0.8%)     Kernel#dup
      6009  (23.0%)         200   (0.8%)     Kernel#public_send
      4961  (19.0%)         189   (0.7%)     Hash#each_value
      3749  (14.4%)         173   (0.7%)     RuboCop::Formatter::JUnitFormatter#classname_attribute_value
     13301  (50.9%)         165   (0.6%)     RuboCop::Formatter::JUnitFormatter#add_testcase_element_to_testsuite_element
       325   (1.2%)         139   (0.5%)     RuboCop::Cop::Registry#clear_enrollment_queue
      1554   (5.9%)         134   (0.5%)     Array#select
```

#### After
```
     TOTAL    (pct)     SAMPLES    (pct)     FRAME
      1878  (12.1%)        1878  (12.1%)     Dir.pwd
       783   (5.1%)         783   (5.1%)     String#gsub
      3091  (20.0%)         739   (4.8%)     Class#new
       692   (4.5%)         607   (3.9%)     Enumerable#find
       702   (4.5%)         339   (2.2%)     Parser::Lexer#advance
       317   (2.0%)         317   (2.0%)     IO#write
       283   (1.8%)         283   (1.8%)     String#[]
       275   (1.8%)         275   (1.8%)     String#match?
       267   (1.7%)         262   (1.7%)     String#scan
       244   (1.6%)         230   (1.5%)     REXML::Element#root
      1551  (10.0%)         205   (1.3%)     Racc::Parser#_racc_do_parse_c
       236   (1.5%)         201   (1.3%)     Kernel#dup
       196   (1.3%)         179   (1.2%)     REXML::Attribute#to_string
      4037  (26.1%)         177   (1.1%)     Kernel#public_send
      3286  (21.2%)         176   (1.1%)     RuboCop::Cop::Commissioner#trigger_responding_cops
     15481 (100.0%)         176   (1.1%)     Array#each
       460   (3.0%)         166   (1.1%)     Kernel#require_relative
       661   (4.3%)         141   (0.9%)     RuboCop::Config#initialize
      2099  (13.6%)         141   (0.9%)     REXML::Attributes#[]=
      2866  (18.5%)         139   (0.9%)     RuboCop::Formatter::JUnitFormatter#classname_attribute_value
       292   (1.9%)         132   (0.9%)     RuboCop::Cop::Registry#clear_enrollment_queue
       126   (0.8%)         126   (0.8%)     File.fnmatch?
       874   (5.6%)         123   (0.8%)     REXML::Formatters::Pretty#write_element
       113   (0.7%)         113   (0.7%)     Symbol#to_s
      1348   (8.7%)         107   (0.7%)     Array#select
       103   (0.7%)         101   (0.7%)     RuboCop::Cop::Registry#initialize
      5611  (36.2%)          91   (0.6%)     RuboCop::Formatter::JUnitFormatter#add_testcase_element_to_testsuite_element
       269   (1.7%)          91   (0.6%)     REXML::Text.normalize
        89   (0.6%)          89   (0.6%)     String#tr
       161   (1.0%)          85   (0.5%)     Parser::Lexer#emit
```

### Time
#### Before
```
$ time bundle exec rubocop --cache false --format junit --out results/rubocop.xml lib/rubocop/cop/layout
bundle exec rubocop --cache false --format junit --out results/rubocop.xml   12.28s user 2.02s system 99% cpu 14.313 total
```

#### After
```
$ time bundle exec rubocop --cache false --format junit --out results/rubocop.xml lib/rubocop/cop/layout
bundle exec rubocop --cache false --format junit --out results/rubocop.xml   10.17s user 1.97s system 99% cpu 12.150 total
```

**Note**: There is also a difference in time needed to run this gem's
tests after this PR changes.

Feel free to ask clarifying questions if some changes are not clear.

Co-authored-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
fatkodima and kou authored Mar 21, 2023
1 parent cbb9c1f commit f44e88d
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 35 deletions.
11 changes: 7 additions & 4 deletions lib/rexml/attribute.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: false
# frozen_string_literal: true
require_relative "namespace"
require_relative 'text'

Expand Down Expand Up @@ -119,10 +119,13 @@ def hash
# b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'"
def to_string
value = to_s
if @element and @element.context and @element.context[:attribute_quote] == :quote
%Q^#@expanded_name="#{to_s().gsub(/"/, '&quot;')}"^
value = value.gsub('"', '&quot;') if value.include?('"')
%Q^#@expanded_name="#{value}"^
else
"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
value = value.gsub("'", '&apos;') if value.include?("'")
"#@expanded_name='#{value}'"
end
end

Expand Down Expand Up @@ -192,7 +195,7 @@ def node_type
end

def inspect
rv = ""
rv = +""
write( rv )
rv
end
Expand Down
40 changes: 25 additions & 15 deletions lib/rexml/entity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,34 @@ def to_s
# then:
# doctype.entity('yada').value #-> "nanoo bar nanoo"
def value
if @value
matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
sum = 0
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += entity_value.bytesize
end
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
@resolved_value ||= resolve_value
end

def parent=(other)
@resolved_value = nil
super
end

private
def resolve_value
return nil if @value.nil?
return @value unless @value.match?(PEREFERENCE_RE)

matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
sum = 0
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += entity_value.bytesize
end
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
end
return rv
end
nil
rv
end
end

Expand Down
4 changes: 2 additions & 2 deletions lib/rexml/formatters/pretty.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: false
# frozen_string_literal: true
require_relative 'default'

module REXML
Expand Down Expand Up @@ -58,7 +58,7 @@ def write_element(node, output)
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
string = ""
string = +""
old_level = @level
@level = 0
node.children.each { |child| write( child, string ) }
Expand Down
12 changes: 8 additions & 4 deletions lib/rexml/namespace.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: false
# frozen_string_literal: true

require_relative 'xmltokens'

Expand All @@ -10,21 +10,25 @@ module Namespace
# The expanded name of the object, valid if name is set
attr_accessor :prefix
include XMLTokens
NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u

# Sets the name and the expanded name
def name=( name )
@expanded_name = name
case name
when NAMESPLIT
if name.match?(NAME_WITHOUT_NAMESPACE)
@prefix = ""
@namespace = ""
@name = name
elsif name =~ NAMESPLIT
if $1
@prefix = $1
else
@prefix = ""
@namespace = ""
end
@name = $2
when ""
elsif name == ""
@prefix = nil
@namespace = nil
@name = nil
Expand Down
10 changes: 6 additions & 4 deletions lib/rexml/text.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: false
# frozen_string_literal: true
require_relative 'security'
require_relative 'entity'
require_relative 'doctype'
Expand Down Expand Up @@ -131,7 +131,7 @@ def parent= parent
def Text.check string, pattern, doctype

# illegal anywhere
if string !~ VALID_XML_CHARS
if !string.match?(VALID_XML_CHARS)
if String.method_defined? :encode
string.chars.each do |c|
case c.ord
Expand Down Expand Up @@ -371,7 +371,7 @@ def Text::normalize( input, doctype=nil, entity_filter=nil )
copy = input.to_s
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&amp;' )
copy = copy.gsub( "&", "&amp;" )
copy = copy.gsub( "&", "&amp;" ) if copy.include?("&")
if doctype
# Replace all ampersands that aren't part of an entity
doctype.entities.each_value do |entity|
Expand All @@ -382,7 +382,9 @@ def Text::normalize( input, doctype=nil, entity_filter=nil )
else
# Replace all ampersands that aren't part of an entity
DocType::DEFAULT_ENTITIES.each_value do |entity|
copy = copy.gsub(entity.value, "&#{entity.name};" )
if copy.include?(entity.value)
copy = copy.gsub(entity.value, "&#{entity.name};" )
end
end
end
copy
Expand Down
2 changes: 1 addition & 1 deletion test/test_core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1423,7 +1423,7 @@ def test_ticket_91
d.root.add_element( "bah" )
p=REXML::Formatters::Pretty.new(2)
p.compact = true # Don't add whitespace to text nodes unless necessary
p.write(d,out="")
p.write(d,out=+"")
assert_equal( expected, out )
end

Expand Down
8 changes: 3 additions & 5 deletions test/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,9 @@ def test_empty_value
<cd></cd>
EOF

assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
REXML::Security.entity_expansion_limit = 100
assert_equal(100, REXML::Security.entity_expansion_limit)
REXML::Document.new(xml)
REXML::Security.entity_expansion_limit = 90
assert_equal(90, REXML::Security.entity_expansion_limit)
assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
Expand Down

0 comments on commit f44e88d

Please sign in to comment.