Skip to content

Commit

Permalink
run rufo on test files
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Nov 25, 2019
1 parent ad7d598 commit ed153a7
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 104 deletions.
7 changes: 6 additions & 1 deletion benchmark/benchmark.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def compare_scrub_methods
end

module TestSet
def test_set options={}
def test_set(options = {})
scale = options[:rehearse] ? 10 : 1
puts self.class.name

Expand All @@ -49,6 +49,7 @@ class HeadToHead < Measure

class HeadToHeadRailsSanitize < Measure
include TestSet

def bench(content, ntimes, fragment_p)
clear_measure

Expand All @@ -65,6 +66,7 @@ def bench(content, ntimes, fragment_p)

class HeadToHeadRailsStripTags < Measure
include TestSet

def bench(content, ntimes, fragment_p)
clear_measure

Expand All @@ -81,6 +83,7 @@ def bench(content, ntimes, fragment_p)

class HeadToHeadSanitizerSanitize < Measure
include TestSet

def bench(content, ntimes, fragment_p)
clear_measure

Expand All @@ -100,6 +103,7 @@ def bench(content, ntimes, fragment_p)

class HeadToHeadHtml5LibSanitize < Measure
include TestSet

def bench(content, ntimes, fragment_p)
clear_measure

Expand All @@ -120,6 +124,7 @@ def bench(content, ntimes, fragment_p)

class HeadToHeadHTMLFilter < Measure
include TestSet

def bench(content, ntimes, fragment_p)
clear_measure

Expand Down
30 changes: 15 additions & 15 deletions benchmark/helper.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
require 'rubygems'
require 'open-uri'
require 'hpricot'
require "rubygems"
require "open-uri"
require "hpricot"
require File.expand_path(File.dirname(__FILE__) + "/../lib/loofah")
require 'benchmark'
require "benchmark"
require "action_view"
require "action_controller/vendor/html-scanner"
require "sanitize"
require 'hitimes'
require 'htmlfilter'
require "hitimes"
require "htmlfilter"

unless defined?(HTMLFilter)
HTMLFilter = HtmlFilter
Expand All @@ -19,20 +19,20 @@ class RailsSanitize
end

class HTML5libSanitize
require 'html5/html5parser'
require 'html5/liberalxmlparser'
require 'html5/treewalkers'
require 'html5/treebuilders'
require 'html5/serializer'
require 'html5/sanitizer'
require "html5/html5parser"
require "html5/liberalxmlparser"
require "html5/treewalkers"
require "html5/treebuilders"
require "html5/serializer"
require "html5/sanitizer"

include HTML5

def sanitize(html)
HTMLParser.parse_fragment(html, {
:tokenizer => HTMLSanitizer,
:encoding => 'utf-8',
:tree => TreeBuilders::REXML::TreeBuilder
:tokenizer => HTMLSanitizer,
:encoding => "utf-8",
:tree => TreeBuilders::REXML::TreeBuilder,
}).to_s
end
end
Expand Down
10 changes: 5 additions & 5 deletions test/helper.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
require 'rubygems'
require 'minitest/unit'
require 'minitest/spec'
require 'minitest/autorun'
require 'rr'
require "rubygems"
require "minitest/unit"
require "minitest/spec"
require "minitest/autorun"
require "rr"

require File.expand_path(File.join(File.dirname(__FILE__), "..", "lib", "loofah"))

Expand Down
62 changes: 30 additions & 32 deletions test/html5/test_sanitizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,60 +9,60 @@
class Html5TestSanitizer < Loofah::TestCase
include Loofah

def sanitize_xhtml stream
def sanitize_xhtml(stream)
Loofah.fragment(stream).scrub!(:escape).to_xhtml
end

def sanitize_html stream
def sanitize_html(stream)
Loofah.fragment(stream).scrub!(:escape).to_html
end

def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
sane = sanitize_html(input).gsub('"',"'")
htmloutput = htmloutput.gsub('"',"'")
xhtmloutput = xhtmloutput.gsub('"',"'")
rexmloutput = rexmloutput.gsub('"',"'")
sane = sanitize_html(input).gsub('"', "'")
htmloutput = htmloutput.gsub('"', "'")
xhtmloutput = xhtmloutput.gsub('"', "'")
rexmloutput = rexmloutput.gsub('"', "'")

## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
## it would require a lot of manual hacking to make the tests match libxml's output.
## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane),
%Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"})
%Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"})
end

def assert_completes_in_reasonable_time &block
def assert_completes_in_reasonable_time(&block)
t0 = Time.now
block.call
assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
end

(HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
rexmloutput = xhtmloutput

if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
elsif tag_name == 'col'
elsif tag_name == "col"
htmloutput = "<col title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
rexmloutput = "<col title='1' />"
elsif tag_name == 'table'
elsif tag_name == "table"
htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt;baz<table title='1'> </table>"
xhtmloutput = htmloutput
elsif tag_name == 'image'
elsif tag_name == "image"
htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
htmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
htmloutput += '<br/>' if tag_name == 'br'
rexmloutput = "<#{tag_name} title='1' />"
htmloutput += "<br/>" if tag_name == "br"
rexmloutput = "<#{tag_name} title='1' />"
end
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
end
Expand All @@ -80,9 +80,9 @@ def assert_completes_in_reasonable_time &block
# end

HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
next if attribute_name == 'style'
next if attribute_name == "style"
define_method "test_should_allow_#{attribute_name}_attribute" do
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
if %w[checked compact disabled ismap multiple nohref noshade nowrap readonly selected].include?(attribute_name)
output = "<p #{attribute_name}>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
htmloutput = "<p #{attribute_name.downcase}>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
Expand Down Expand Up @@ -178,7 +178,6 @@ def test_should_disallow_other_uri_mediatypes
check_sanitization(input, output, output, output)
end


HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
define_method "test_#{tag_name}_should_allow_local_href" do
Expand Down Expand Up @@ -228,28 +227,28 @@ def test_figure_element_is_valid
# check_sanitization(input, output, output, output)
# end

# This affects only NS4. Is it worth fixing?
# def test_javascript_includes
# input = %(<div size="&{alert('XSS')}">foo</div>)
# output = "<div>foo</div>"
# check_sanitization(input, output, output, output)
# end
# This affects only NS4. Is it worth fixing?
# def test_javascript_includes
# input = %(<div size="&{alert('XSS')}">foo</div>)
# output = "<div>foo</div>"
# check_sanitization(input, output, output, output)
# end

##
## these tests primarily test the parser logic, not the sanitizer
## logic. i call bullshit. we're not writing a test suite for
## libxml2 here, so let's rely on the unit tests above to take care
## of our valid elements and attributes.
##
require 'json'
Dir[File.join(File.dirname(__FILE__), '..', 'assets', 'testdata_sanitizer_tests1.dat')].each do |filename|
require "json"
Dir[File.join(File.dirname(__FILE__), "..", "assets", "testdata_sanitizer_tests1.dat")].each do |filename|
JSON::parse(open(filename).read).each do |test|
it "testdata sanitizer #{test['name']}" do
it "testdata sanitizer #{test["name"]}" do
check_sanitization(
test['input'],
test['output'],
test['xhtml'] || test['output'],
test['rexml'] || test['output']
test["input"],
test["output"],
test["xhtml"] || test["output"],
test["rexml"] || test["output"]
)
end
end
Expand Down Expand Up @@ -338,7 +337,6 @@ def test_css_max_width
assert_match %r/max-width/, sane.inner_html
end


def test_issue_90_slow_regex
skip("timing tests are hard to make pass and have little regression-testing value")

Expand Down
7 changes: 3 additions & 4 deletions test/integration/test_html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ class IntegrationTestHtml < Loofah::TestCase
end
end

context 'with an `encoding` arg' do
context "with an `encoding` arg" do
it "sets the parent document's encoding to accordingly" do
html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
assert_equal 'US-ASCII', html.document.encoding
html = Loofah.fragment "<style>foo</style><div>bar</div>", "US-ASCII"
assert_equal "US-ASCII", html.document.encoding
end
end
end
Expand Down Expand Up @@ -69,4 +69,3 @@ class IntegrationTestHtml < Loofah::TestCase
end
end
end

Loading

0 comments on commit ed153a7

Please sign in to comment.