diff --git a/benchmark/benchmark.rb b/benchmark/benchmark.rb index d9142ca2..731fa371 100755 --- a/benchmark/benchmark.rb +++ b/benchmark/benchmark.rb @@ -23,7 +23,7 @@ def compare_scrub_methods end module TestSet - def test_set options={} + def test_set(options = {}) scale = options[:rehearse] ? 10 : 1 puts self.class.name @@ -49,6 +49,7 @@ class HeadToHead < Measure class HeadToHeadRailsSanitize < Measure include TestSet + def bench(content, ntimes, fragment_p) clear_measure @@ -65,6 +66,7 @@ def bench(content, ntimes, fragment_p) class HeadToHeadRailsStripTags < Measure include TestSet + def bench(content, ntimes, fragment_p) clear_measure @@ -81,6 +83,7 @@ def bench(content, ntimes, fragment_p) class HeadToHeadSanitizerSanitize < Measure include TestSet + def bench(content, ntimes, fragment_p) clear_measure @@ -100,6 +103,7 @@ def bench(content, ntimes, fragment_p) class HeadToHeadHtml5LibSanitize < Measure include TestSet + def bench(content, ntimes, fragment_p) clear_measure @@ -120,6 +124,7 @@ def bench(content, ntimes, fragment_p) class HeadToHeadHTMLFilter < Measure include TestSet + def bench(content, ntimes, fragment_p) clear_measure diff --git a/benchmark/helper.rb b/benchmark/helper.rb index 6621e281..a1a52c78 100644 --- a/benchmark/helper.rb +++ b/benchmark/helper.rb @@ -1,13 +1,13 @@ -require 'rubygems' -require 'open-uri' -require 'hpricot' +require "rubygems" +require "open-uri" +require "hpricot" require File.expand_path(File.dirname(__FILE__) + "/../lib/loofah") -require 'benchmark' +require "benchmark" require "action_view" require "action_controller/vendor/html-scanner" require "sanitize" -require 'hitimes' -require 'htmlfilter' +require "hitimes" +require "htmlfilter" unless defined?(HTMLFilter) HTMLFilter = HtmlFilter @@ -19,20 +19,20 @@ class RailsSanitize end class HTML5libSanitize - require 'html5/html5parser' - require 'html5/liberalxmlparser' - require 'html5/treewalkers' - require 'html5/treebuilders' - require 'html5/serializer' - require 'html5/sanitizer' + require "html5/html5parser" + require "html5/liberalxmlparser" + require "html5/treewalkers" + require "html5/treebuilders" + require "html5/serializer" + require "html5/sanitizer" include HTML5 def sanitize(html) HTMLParser.parse_fragment(html, { - :tokenizer => HTMLSanitizer, - :encoding => 'utf-8', - :tree => TreeBuilders::REXML::TreeBuilder + :tokenizer => HTMLSanitizer, + :encoding => "utf-8", + :tree => TreeBuilders::REXML::TreeBuilder, }).to_s end end diff --git a/test/helper.rb b/test/helper.rb index 45fee19d..092e7f9c 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -1,8 +1,8 @@ -require 'rubygems' -require 'minitest/unit' -require 'minitest/spec' -require 'minitest/autorun' -require 'rr' +require "rubygems" +require "minitest/unit" +require "minitest/spec" +require "minitest/autorun" +require "rr" require File.expand_path(File.join(File.dirname(__FILE__), "..", "lib", "loofah")) diff --git a/test/html5/test_sanitizer.rb b/test/html5/test_sanitizer.rb index aefc2a15..21fbc9a6 100755 --- a/test/html5/test_sanitizer.rb +++ b/test/html5/test_sanitizer.rb @@ -9,29 +9,29 @@ class Html5TestSanitizer < Loofah::TestCase include Loofah - def sanitize_xhtml stream + def sanitize_xhtml(stream) Loofah.fragment(stream).scrub!(:escape).to_xhtml end - def sanitize_html stream + def sanitize_html(stream) Loofah.fragment(stream).scrub!(:escape).to_html end def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput) ## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing. - sane = sanitize_html(input).gsub('"',"'") - htmloutput = htmloutput.gsub('"',"'") - xhtmloutput = xhtmloutput.gsub('"',"'") - rexmloutput = rexmloutput.gsub('"',"'") + sane = sanitize_html(input).gsub('"', "'") + htmloutput = htmloutput.gsub('"', "'") + xhtmloutput = xhtmloutput.gsub('"', "'") + rexmloutput = rexmloutput.gsub('"', "'") ## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that ## it would require a lot of manual hacking to make the tests match libxml's output. ## instead, I'm taking the shotgun approach, and trying to match any of the described outputs. assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane), - %Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"}) + %Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"}) end - def assert_completes_in_reasonable_time &block + def assert_completes_in_reasonable_time(&block) t0 = Time.now block.call assert_in_delta t0, Time.now, 0.1 # arbitrary seconds @@ -39,30 +39,30 @@ def assert_completes_in_reasonable_time &block (HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name| define_method "test_should_allow_#{tag_name}_tag" do - input = "<#{tag_name} title='1'>foo bar baz" - htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz" + input = "<#{tag_name} title='1'>foo bar baz" + htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz" xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz" rexmloutput = xhtmloutput if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name) htmloutput = "foo <bad>bar</bad> baz" xhtmloutput = htmloutput - elsif tag_name == 'col' + elsif tag_name == "col" htmloutput = "foo <bad>bar</bad> baz" xhtmloutput = htmloutput rexmloutput = "" - elsif tag_name == 'table' + elsif tag_name == "table" htmloutput = "foo <bad>bar</bad>baz
" xhtmloutput = htmloutput - elsif tag_name == 'image' + elsif tag_name == "image" htmloutput = "foo <bad>bar</bad> baz" xhtmloutput = htmloutput rexmloutput = "foo <bad>bar</bad> baz" elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name) htmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz" xhtmloutput = htmloutput - htmloutput += '
' if tag_name == 'br' - rexmloutput = "<#{tag_name} title='1' />" + htmloutput += "
" if tag_name == "br" + rexmloutput = "<#{tag_name} title='1' />" end check_sanitization(input, htmloutput, xhtmloutput, rexmloutput) end @@ -80,9 +80,9 @@ def assert_completes_in_reasonable_time &block # end HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name| - next if attribute_name == 'style' + next if attribute_name == "style" define_method "test_should_allow_#{attribute_name}_attribute" do - input = "

foo bar baz

" + input = "

foo bar baz

" if %w[checked compact disabled ismap multiple nohref noshade nowrap readonly selected].include?(attribute_name) output = "

foo <bad>bar</bad> baz

" htmloutput = "

foo <bad>bar</bad> baz

" @@ -178,7 +178,6 @@ def test_should_disallow_other_uri_mediatypes check_sanitization(input, output, output, output) end - HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name| next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name) define_method "test_#{tag_name}_should_allow_local_href" do @@ -228,12 +227,12 @@ def test_figure_element_is_valid # check_sanitization(input, output, output, output) # end -# This affects only NS4. Is it worth fixing? -# def test_javascript_includes -# input = %(
foo
) -# output = "
foo
" -# check_sanitization(input, output, output, output) -# end + # This affects only NS4. Is it worth fixing? + # def test_javascript_includes + # input = %(
foo
) + # output = "
foo
" + # check_sanitization(input, output, output, output) + # end ## ## these tests primarily test the parser logic, not the sanitizer @@ -241,15 +240,15 @@ def test_figure_element_is_valid ## libxml2 here, so let's rely on the unit tests above to take care ## of our valid elements and attributes. ## - require 'json' - Dir[File.join(File.dirname(__FILE__), '..', 'assets', 'testdata_sanitizer_tests1.dat')].each do |filename| + require "json" + Dir[File.join(File.dirname(__FILE__), "..", "assets", "testdata_sanitizer_tests1.dat")].each do |filename| JSON::parse(open(filename).read).each do |test| - it "testdata sanitizer #{test['name']}" do + it "testdata sanitizer #{test["name"]}" do check_sanitization( - test['input'], - test['output'], - test['xhtml'] || test['output'], - test['rexml'] || test['output'] + test["input"], + test["output"], + test["xhtml"] || test["output"], + test["rexml"] || test["output"] ) end end @@ -338,7 +337,6 @@ def test_css_max_width assert_match %r/max-width/, sane.inner_html end - def test_issue_90_slow_regex skip("timing tests are hard to make pass and have little regression-testing value") diff --git a/test/integration/test_html.rb b/test/integration/test_html.rb index b00d5ac8..ab9746eb 100644 --- a/test/integration/test_html.rb +++ b/test/integration/test_html.rb @@ -35,10 +35,10 @@ class IntegrationTestHtml < Loofah::TestCase end end - context 'with an `encoding` arg' do + context "with an `encoding` arg" do it "sets the parent document's encoding to accordingly" do - html = Loofah.fragment "
bar
", 'US-ASCII' - assert_equal 'US-ASCII', html.document.encoding + html = Loofah.fragment "
bar
", "US-ASCII" + assert_equal "US-ASCII", html.document.encoding end end end @@ -69,4 +69,3 @@ class IntegrationTestHtml < Loofah::TestCase end end end - diff --git a/test/integration/test_scrubbers.rb b/test/integration/test_scrubbers.rb index 7d801fcb..ed8604f2 100644 --- a/test/integration/test_scrubbers.rb +++ b/test/integration/test_scrubbers.rb @@ -1,34 +1,33 @@ require "helper" class IntegrationTestScrubbers < Loofah::TestCase - INVALID_FRAGMENT = "foo

bar

bazz
quux
" - INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid>
quux
" - INVALID_PRUNED = "
quux
" + INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid>
quux
" + INVALID_PRUNED = "
quux
" INVALID_STRIPPED = "foo

bar

bazz
quux
" WHITEWASH_FRAGMENT = "no
foo
bar" - WHITEWASH_RESULT = "
foo
" + WHITEWASH_RESULT = "
foo
" NOFOLLOW_FRAGMENT = 'Click here' - NOFOLLOW_RESULT = 'Click here' + NOFOLLOW_RESULT = 'Click here' NOFOLLOW_WITH_REL_FRAGMENT = 'Click here' - NOFOLLOW_WITH_REL_RESULT = 'Click here' + NOFOLLOW_WITH_REL_RESULT = 'Click here' NOOPENER_FRAGMENT = 'Click here' - NOOPENER_RESULT = 'Click here' + NOOPENER_RESULT = 'Click here' NOOPENER_WITH_REL_FRAGMENT = 'Click here' - NOOPENER_WITH_REL_RESULT = 'Click here' + NOOPENER_WITH_REL_RESULT = 'Click here' UNPRINTABLE_FRAGMENT = "Lo\u2029ofah ro\u2028cks!" UNPRINTABLE_RESULT = "Loofah rocks!" - ENTITY_FRAGMENT = "

this is < that "&" the other > boo'ya

w00t
" - ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t) + ENTITY_FRAGMENT = "

this is < that "&" the other > boo'ya

w00t
" + ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t) - ENTITY_HACK_ATTACK = "
Hack attack!
<script>alert('evil')</script>
" + ENTITY_HACK_ATTACK = "
Hack attack!
<script>alert('evil')</script>
" ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!<script>alert('evil')</script>" ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!" @@ -39,7 +38,7 @@ class IntegrationTestScrubbers < Loofah::TestCase doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :escape - assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html + assert_equal INVALID_ESCAPED, doc.xpath("/html/body").inner_html assert_equal doc, result end end @@ -49,7 +48,7 @@ class IntegrationTestScrubbers < Loofah::TestCase doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :prune - assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html + assert_equal INVALID_PRUNED, doc.xpath("/html/body").inner_html assert_equal doc, result end end @@ -59,7 +58,7 @@ class IntegrationTestScrubbers < Loofah::TestCase doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :strip - assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html + assert_equal INVALID_STRIPPED, doc.xpath("/html/body").inner_html assert_equal doc, result end end @@ -69,7 +68,7 @@ class IntegrationTestScrubbers < Loofah::TestCase doc = Loofah::HTML::Document.parse "#{WHITEWASH_FRAGMENT}" result = doc.scrub! :whitewash - assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html + assert_equal WHITEWASH_RESULT, doc.xpath("/html/body").inner_html assert_equal doc, result end end @@ -79,7 +78,7 @@ class IntegrationTestScrubbers < Loofah::TestCase doc = Loofah::HTML::Document.parse "#{NOFOLLOW_FRAGMENT}" result = doc.scrub! :nofollow - assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html + assert_equal NOFOLLOW_RESULT, doc.xpath("/html/body").inner_html assert_equal doc, result end end @@ -177,7 +176,7 @@ class IntegrationTestScrubbers < Loofah::TestCase EOHTML node = xml.at_css "div.scrub" node.scrub!(:prune) - assert_match %r/I should remain/, xml.to_s + assert_match %r/I should remain/, xml.to_s refute_match %r/I should be removed/, xml.to_s end end @@ -202,8 +201,8 @@ class IntegrationTestScrubbers < Loofah::TestCase node_set = xml.css "div.scrub" assert_equal 2, node_set.length node_set.scrub!(:prune) - assert_match %r/I should remain/, xml.to_s - refute_match %r/I should be removed/, xml.to_s + assert_match %r/I should remain/, xml.to_s + refute_match %r/I should be removed/, xml.to_s refute_match %r/I should also be removed/, xml.to_s end end @@ -253,7 +252,6 @@ class IntegrationTestScrubbers < Loofah::TestCase end context ":nofollow" do - context "for a hyperlink that does not have a rel attribute" do it "add a 'nofollow' attribute to hyperlinks" do doc = Loofah::HTML::DocumentFragment.parse "
#{NOFOLLOW_FRAGMENT}
" @@ -266,15 +264,13 @@ class IntegrationTestScrubbers < Loofah::TestCase context "for a hyperlink that does have a rel attribute" do it "appends nofollow to rel attribute" do - doc = Loofah::HTML::DocumentFragment.parse "
#{NOFOLLOW_WITH_REL_FRAGMENT}
" - result = doc.scrub! :nofollow + doc = Loofah::HTML::DocumentFragment.parse "
#{NOFOLLOW_WITH_REL_FRAGMENT}
" + result = doc.scrub! :nofollow - assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html - assert_equal doc, result + assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html + assert_equal doc, result end end - - end context ":noopener" do @@ -367,7 +363,7 @@ class IntegrationTestScrubbers < Loofah::TestCase EOHTML node = xml.at_css "div.scrub" node.scrub!(:prune) - assert_match %r(I should remain), xml.to_s + assert_match %r(I should remain), xml.to_s refute_match %r(I should be removed), xml.to_s end end @@ -390,8 +386,8 @@ class IntegrationTestScrubbers < Loofah::TestCase node_set = xml.css "div.scrub" assert_equal 2, node_set.length node_set.scrub!(:prune) - assert_match %r/I should remain/, xml.to_s - refute_match %r/I should be removed/, xml.to_s + assert_match %r/I should remain/, xml.to_s + refute_match %r/I should be removed/, xml.to_s refute_match %r/I should also be removed/, xml.to_s end end diff --git a/test/integration/test_xml.rb b/test/integration/test_xml.rb index 45683055..7abc048d 100644 --- a/test/integration/test_xml.rb +++ b/test/integration/test_xml.rb @@ -18,7 +18,7 @@ class IntegrationTestXml < Loofah::TestCase end end assert_equal 2, xml.css("employee").length - + xml.scrub!(bring_out_your_dead) employees = xml.css "employee" @@ -42,7 +42,7 @@ class IntegrationTestXml < Loofah::TestCase end end assert_equal 2, xml.css("employee").length - + xml.scrub!(bring_out_your_dead) employees = xml.css "employee" diff --git a/test/unit/test_api.rb b/test/unit/test_api.rb index 31da1194..00b0cb67 100644 --- a/test/unit/test_api.rb +++ b/test/unit/test_api.rb @@ -1,10 +1,9 @@ require "helper" class UnitTestApi < Loofah::TestCase - - HTML = "
a
\n
b
" - XML_FRAGMENT = "
a
\n
b
" - XML = "#{XML_FRAGMENT}" + HTML = "
a
\n
b
" + XML_FRAGMENT = "
a
\n
b
" + XML = "#{XML_FRAGMENT}" describe "HTML" do it "creates documents" do @@ -117,26 +116,25 @@ class UnitTestApi < Loofah::TestCase def assert_html_documentish(doc) assert_kind_of Nokogiri::HTML::Document, doc - assert_kind_of Loofah::HTML::Document, doc + assert_kind_of Loofah::HTML::Document, doc assert_equal HTML, doc.xpath("/html/body").inner_html end def assert_html_fragmentish(doc) assert_kind_of Nokogiri::HTML::DocumentFragment, doc - assert_kind_of Loofah::HTML::DocumentFragment, doc + assert_kind_of Loofah::HTML::DocumentFragment, doc assert_equal HTML, doc.inner_html end def assert_xml_documentish(doc) assert_kind_of Nokogiri::XML::Document, doc - assert_kind_of Loofah::XML::Document, doc + assert_kind_of Loofah::XML::Document, doc assert_equal XML, doc.root.to_xml end def assert_xml_fragmentish(doc) assert_kind_of Nokogiri::XML::DocumentFragment, doc - assert_kind_of Loofah::XML::DocumentFragment, doc + assert_kind_of Loofah::XML::DocumentFragment, doc assert_equal XML_FRAGMENT, doc.children.to_xml end - end diff --git a/test/unit/test_helpers.rb b/test/unit/test_helpers.rb index c4bb3098..b82a529c 100644 --- a/test/unit/test_helpers.rb +++ b/test/unit/test_helpers.rb @@ -1,7 +1,6 @@ require "helper" class UnitTestHelpers < Loofah::TestCase - HTML_STRING = "
omgwtfbbq
" describe "Helpers" do diff --git a/test/unit/test_scrubber.rb b/test/unit/test_scrubber.rb index 1a9e358f..97f9aeb1 100644 --- a/test/unit/test_scrubber.rb +++ b/test/unit/test_scrubber.rb @@ -1,12 +1,11 @@ require "helper" class UnitTestScrubber < Loofah::TestCase - FRAGMENT = "hellogoodbye" - FRAGMENT_NODE_COUNT = 4 # span, text, span, text + FRAGMENT_NODE_COUNT = 4 # span, text, span, text FRAGMENT_NODE_STOP_TOP_DOWN = 2 # span, span DOCUMENT = "hellogoodbye" - DOCUMENT_NODE_COUNT = 8 # html, head, link, body, span, text, span, text + DOCUMENT_NODE_COUNT = 8 # html, head, link, body, span, text, span, text DOCUMENT_NODE_STOP_TOP_DOWN = 1 # html context "receiving a block" do @@ -148,7 +147,7 @@ class UnitTestScrubber < Loofah::TestCase @klass = Class.new(Loofah::Scrubber) do attr_accessor :count - def initialize(direction=nil) + def initialize(direction = nil) @direction = direction @count = 0 end @@ -215,7 +214,7 @@ def scrub(node) context "creating a new Scrubber class with no scrub method" do before do @klass = Class.new(Loofah::Scrubber) do - def initialize ; end + def initialize; end end @scrubber = @klass.new end diff --git a/test/unit/test_scrubbers.rb b/test/unit/test_scrubbers.rb index 792e1560..c757345b 100644 --- a/test/unit/test_scrubbers.rb +++ b/test/unit/test_scrubbers.rb @@ -1,7 +1,7 @@ require "helper" class UnitTestScrubbers < Loofah::TestCase - [ Loofah::HTML::Document, Loofah::HTML::DocumentFragment ].each do |klass| + [Loofah::HTML::Document, Loofah::HTML::DocumentFragment].each do |klass| context klass do context "bad scrub method" do it "raise a ScrubberNotFound exception" do