From a6e0a1ab90675a17b1b2be189129d94139e4b143 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 17 Nov 2022 13:25:01 -0500 Subject: [PATCH] fix: replace slow regex attribute check with crass parser --- lib/loofah/html5/scrub.rb | 27 ++++++++++++++++++++++- test/assets/testdata_sanitizer_tests1.dat | 6 ++--- test/html5/test_sanitizer.rb | 18 ++++----------- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/lib/loofah/html5/scrub.rb b/lib/loofah/html5/scrub.rb index 3924c47..4b35807 100644 --- a/lib/loofah/html5/scrub.rb +++ b/lib/loofah/html5/scrub.rb @@ -51,9 +51,11 @@ def scrub_attributes(node) end end end + if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name) - attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value + scrub_attribute_that_allows_local_ref(attr_node) end + if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m attr_node.remove next @@ -127,6 +129,29 @@ def scrub_css(style) Crass::Parser.stringify(sanitized_tree) end + def scrub_attribute_that_allows_local_ref(attr_node) + return unless attr_node.value + + nodes = Crass::Parser.new(attr_node.value).parse_component_values + + values = nodes.map do |node| + case node[:node] + when :url + if node[:value].start_with?("#") + node[:raw] + else + nil + end + when :hash, :ident, :string + node[:raw] + else + nil + end + end.compact + + attr_node.value = values.join(" ") + end + # # libxml2 >= 2.9.2 fails to escape comments within some attributes. # diff --git a/test/assets/testdata_sanitizer_tests1.dat b/test/assets/testdata_sanitizer_tests1.dat index 17751ff..5267a33 100644 --- a/test/assets/testdata_sanitizer_tests1.dat +++ b/test/assets/testdata_sanitizer_tests1.dat @@ -463,9 +463,9 @@ { "name": "absolute_uri_refs_in_svg_attributes", "input": "", - "rexml": "", - "xhtml": "", - "output": "" + "rexml": "", + "xhtml": "", + "output": "" }, { diff --git a/test/html5/test_sanitizer.rb b/test/html5/test_sanitizer.rb index c4e7f82..662d0b3 100755 --- a/test/html5/test_sanitizer.rb +++ b/test/html5/test_sanitizer.rb @@ -267,15 +267,15 @@ def test_figure_element_is_valid ## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name| - define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do + define_method "test_allow_uri_refs_in_svg_attribute_#{attr_name}" do input = "" output = "" check_sanitization(input, output, output, output) end - define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do - input = "" - output = "" + define_method "test_disallow_absolute_uri_refs_in_svg_attribute_#{attr_name}" do + input = "" + output = "" check_sanitization(input, output, output, output) end end @@ -480,16 +480,6 @@ def test_css_order assert_match %r/order:5/, sane.inner_html end - def test_issue_90_slow_regex - skip("timing tests are hard to make pass and have little regression-testing value") - - html = %q{} - - assert_completes_in_reasonable_time { - Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html) - } - end - def test_upper_case_css_property html = "
asdf
" sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)