From b889f89bf03b46d3ca990ab370dafe8f05690720 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 11 Jun 2024 13:40:35 -0400 Subject: [PATCH] extract CSS selector cache into CSS::SelectorCache The methods `Parser.set_cache`, `Parser.cache_on?`, and `Parser.without_cache(&blk)` have been removed. The cache is now injected by `CSS.xpath_for` (optionally, via a `cache:` keyword argument) instead of being built into the parser. Documentation for `CSS.xpath_for` has also been updated and improved. --- lib/nokogiri/css.rb | 37 +++++++--- lib/nokogiri/css/parser_extras.rb | 64 +---------------- lib/nokogiri/css/selector_cache.rb | 40 +++++++++++ lib/nokogiri/decorators/slop.rb | 8 +-- nokogiri.gemspec | 1 + test/css/test_css.rb | 14 ++-- test/css/test_parser_cache.rb | 106 +++++------------------------ 7 files changed, 97 insertions(+), 173 deletions(-) create mode 100644 lib/nokogiri/css/selector_cache.rb diff --git a/lib/nokogiri/css.rb b/lib/nokogiri/css.rb index 69cdfa3ee0e..b8104ca4753 100644 --- a/lib/nokogiri/css.rb +++ b/lib/nokogiri/css.rb @@ -14,7 +14,7 @@ def parse(selector) # :nodoc: # :call-seq: # xpath_for(selector_list) → Array - # xpath_for(selector_list [, prefix:] [, visitor:] [, ns:]) → Array + # xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array # # Translate a CSS selector list to the equivalent XPath expressions. # @@ -36,21 +36,31 @@ def parse(selector) # :nodoc: # value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see # examples). # + # [Keyword arguments] # - +prefix:+ (String) # # The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath # for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+. # + # - +ns:+ (Hash, nil) + # + # Namespaces that are referenced in the query, if any. This is a hash where the keys are the + # namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an + # empty set of namespaces. + # # - +visitor:+ (Nokogiri::CSS::XPathVisitor) # - # The visitor class to use to transform the AST into XPath. Default is - # +Nokogiri::CSS::XPathVisitor.new+. See Nokogiri::CSS::XPathVisitor for more information on - # some of the complex behavior that can be customized for your document type. + # Use this XPathVisitor object to transform the CSS AST into XPath expressions. See + # Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can + # be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+. # - # - +ns:+ (Hash) + # ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is + # provided, +prefix+ and +ns+ must not be present. # - # Namespaces that are referenced in the query, if any. This is a hash where the keys are the - # namespace prefix and the values are the namespace URIs. Default is an empty Hash. + # - +cache:+ (Boolean) + # + # Whether to use the SelectorCache for the translated query to ensure that repeated queries + # don't incur the overhead of re-parsing the selector. Default is +true+. # # [Returns] (Array) The equivalent set of XPath expressions for +selector_list+ # @@ -72,7 +82,10 @@ def parse(selector) # :nodoc: # def xpath_for( selector, options = nil, - prefix: options&.delete(:prefix), visitor: options&.delete(:visitor), ns: options&.delete(:ns) + prefix: options&.delete(:prefix), + visitor: options&.delete(:visitor), + ns: options&.delete(:ns), + cache: true ) unless options.nil? warn("Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated) @@ -96,12 +109,18 @@ def xpath_for( Nokogiri::CSS::XPathVisitor.new(**visitor_kw) end - Parser.new.xpath_for(selector, visitor) + if cache + key = SelectorCache.key(selector: selector, visitor: visitor) + SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor) + else + Parser.new.xpath_for(selector, visitor) + end end end end end +require_relative "css/selector_cache" require_relative "css/node" require_relative "css/xpath_visitor" x = $-w diff --git a/lib/nokogiri/css/parser_extras.rb b/lib/nokogiri/css/parser_extras.rb index 611d9cb2a30..7ffb2605b13 100644 --- a/lib/nokogiri/css/parser_extras.rb +++ b/lib/nokogiri/css/parser_extras.rb @@ -5,57 +5,6 @@ module Nokogiri module CSS class Parser < Racc::Parser # :nodoc: - CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off - - @cache = {} - @mutex = Mutex.new - - class << self - # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.) - def cache_on? - !Thread.current[CACHE_SWITCH_NAME] - end - - # Set a thread-local boolean to turn caching on and off. Truthy values turn the cache on, falsey values turn the cache off. - def set_cache(value) # rubocop:disable Naming/AccessorMethodName - Thread.current[CACHE_SWITCH_NAME] = !value - end - - # Get the css selector in +string+ from the cache - def [](string) - return unless cache_on? - - @mutex.synchronize { @cache[string] } - end - - # Set the css selector in +string+ in the cache to +value+ - def []=(string, value) - return value unless cache_on? - - @mutex.synchronize { @cache[string] = value } - end - - # Clear the cache - def clear_cache(create_new_object = false) - @mutex.synchronize do - if create_new_object - @cache = {} - else - @cache.clear - end - end - end - - # Execute +block+ without cache - def without_cache(&block) - original_cache_setting = cache_on? - set_cache(false) - yield - ensure - set_cache(original_cache_setting) - end - end - def initialize @tokenizer = Tokenizer.new super @@ -70,10 +19,9 @@ def next_token @tokenizer.next_token end - # Get the xpath for +string+ using +options+ - def xpath_for(string, visitor) - key = cache_key(string, visitor) - self.class[key] ||= parse(string).map do |ast| + # Get the xpath for +selector+ using +visitor+ + def xpath_for(selector, visitor) + parse(selector).map do |ast| ast.to_xpath(visitor) end end @@ -83,12 +31,6 @@ def on_error(error_token_id, error_value, value_stack) after = value_stack.compact.last raise SyntaxError, "unexpected '#{error_value}' after '#{after}'" end - - def cache_key(query, visitor) - if self.class.cache_on? - [query, visitor.config] - end - end end end end diff --git a/lib/nokogiri/css/selector_cache.rb b/lib/nokogiri/css/selector_cache.rb new file mode 100644 index 00000000000..ca32246219f --- /dev/null +++ b/lib/nokogiri/css/selector_cache.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +require "thread" + +module Nokogiri + module CSS + module SelectorCache # :nodoc: + @cache = {} + @mutex = Mutex.new + + class << self + # Retrieve the cached XPath expressions for the key + def [](key) + @mutex.synchronize { @cache[key] } + end + + # Insert the XPath expressions `value` at the cache key + def []=(key, value) + @mutex.synchronize { @cache[key] = value } + end + + # Clear the cache + def clear_cache(create_new_object = false) + @mutex.synchronize do + if create_new_object + @cache = {} + else + @cache.clear + end + end + end + + # Construct a unique key cache key + def key(selector:, visitor:) + [selector, visitor.config] + end + end + end + end +end diff --git a/lib/nokogiri/decorators/slop.rb b/lib/nokogiri/decorators/slop.rb index f012ffdfdb9..b8dfac12a04 100644 --- a/lib/nokogiri/decorators/slop.rb +++ b/lib/nokogiri/decorators/slop.rb @@ -23,11 +23,9 @@ def method_missing(name, *args, &block) list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]") end else - CSS::Parser.without_cache do - list = xpath( - *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX), - ) - end + list = xpath( + *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false), + ) end super if list.empty? diff --git a/nokogiri.gemspec b/nokogiri.gemspec index 44322172004..49477c6543f 100644 --- a/nokogiri.gemspec +++ b/nokogiri.gemspec @@ -233,6 +233,7 @@ Gem::Specification.new do |spec| "lib/nokogiri/css/parser.rb", "lib/nokogiri/css/parser.y", "lib/nokogiri/css/parser_extras.rb", + "lib/nokogiri/css/selector_cache.rb", "lib/nokogiri/css/syntax_error.rb", "lib/nokogiri/css/tokenizer.rb", "lib/nokogiri/css/tokenizer.rex", diff --git a/test/css/test_css.rb b/test/css/test_css.rb index 9093366ef0f..0cb3b37f5ab 100644 --- a/test/css/test_css.rb +++ b/test/css/test_css.rb @@ -9,16 +9,14 @@ end it "accepts a CSS::XPathVisitor" do - Nokogiri::CSS::Parser.without_cache do - mock_visitor = Minitest::Mock.new - mock_visitor.expect(:accept, "injected-value", [Nokogiri::CSS::Node]) - mock_visitor.expect(:prefix, "//") + mock_visitor = Minitest::Mock.new + mock_visitor.expect(:accept, "injected-value", [Nokogiri::CSS::Node]) + mock_visitor.expect(:prefix, "//") - result = Nokogiri::CSS.xpath_for("foo", visitor: mock_visitor) + result = Nokogiri::CSS.xpath_for("foo", visitor: mock_visitor, cache: false) - mock_visitor.verify - assert_equal(["//injected-value"], result) - end + mock_visitor.verify + assert_equal(["//injected-value"], result) end it "accepts an options hash" do diff --git a/test/css/test_parser_cache.rb b/test/css/test_parser_cache.rb index e78b3f48b36..afad8cd8bf5 100644 --- a/test/css/test_parser_cache.rb +++ b/test/css/test_parser_cache.rb @@ -8,8 +8,8 @@ def setup super @css = "a1 > b2 > c3" - Nokogiri::CSS::Parser.clear_cache - Nokogiri::CSS::Parser.class_eval do + Nokogiri::CSS::SelectorCache.clear_cache + Nokogiri::CSS::SelectorCache.class_eval do class << @cache alias_method :old_bracket, :[] @@ -24,48 +24,35 @@ def [](key) end end end - - assert_predicate(Nokogiri::CSS::Parser, :cache_on?) end def teardown - Nokogiri::CSS::Parser.clear_cache(true) - Nokogiri::CSS::Parser.set_cache(true) + Nokogiri::CSS::SelectorCache.clear_cache(true) super end [false, true].each do |cache_setting| define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do - Nokogiri::CSS::Parser.set_cache(cache_setting) - - Nokogiri::CSS.xpath_for(@css) - Nokogiri::CSS.xpath_for(@css) - Nokogiri::CSS::Parser.new.xpath_for(@css, Nokogiri::CSS::XPathVisitor.new(prefix: "//")) - Nokogiri::CSS::Parser.new.xpath_for(@css, Nokogiri::CSS::XPathVisitor.new(prefix: "//")) + Nokogiri::CSS.xpath_for(@css, cache: cache_setting) + Nokogiri::CSS.xpath_for(@css, cache: cache_setting) if cache_setting - assert_equal(1, Nokogiri::CSS::Parser.class_eval { @cache.count }) - assert_equal(4, Nokogiri::CSS::Parser.class_eval { @cache.access_count }) + assert_equal(1, Nokogiri::CSS::SelectorCache.class_eval { @cache.count }) + assert_equal(2, Nokogiri::CSS::SelectorCache.class_eval { @cache.access_count }) else - assert_equal(0, Nokogiri::CSS::Parser.class_eval { @cache.count }) - assert_equal(0, Nokogiri::CSS::Parser.class_eval { @cache.access_count }) + assert_equal(0, Nokogiri::CSS::SelectorCache.class_eval { @cache.count }) + assert_equal(0, Nokogiri::CSS::SelectorCache.class_eval { @cache.access_count }) end end end end class TestCssCache < Nokogiri::TestCase - def teardown - Nokogiri::CSS::Parser.set_cache(true) - super - end - def test_enabled_cache_is_used - Nokogiri::CSS::Parser.clear_cache - Nokogiri::CSS::Parser.set_cache(true) + Nokogiri::CSS::SelectorCache.clear_cache css = ".foo .bar .baz" - cache = Nokogiri::CSS::Parser.instance_variable_get(:@cache) + cache = Nokogiri::CSS::SelectorCache.instance_variable_get(:@cache) assert_empty(cache) Nokogiri::CSS.xpath_for(css) @@ -77,56 +64,20 @@ def test_enabled_cache_is_used end def test_disabled_cache_is_not_used - Nokogiri::CSS::Parser.clear_cache - Nokogiri::CSS::Parser.set_cache(false) - - css = ".foo .bar .baz" - cache = Nokogiri::CSS::Parser.instance_variable_get(:@cache) - - assert_empty(cache) - Nokogiri::CSS.xpath_for(css) - assert_empty(cache) - end - - def test_without_cache_avoids_cache - Nokogiri::CSS::Parser.clear_cache - Nokogiri::CSS::Parser.set_cache(true) + Nokogiri::CSS::SelectorCache.clear_cache css = ".foo .bar .baz" - cache = Nokogiri::CSS::Parser.instance_variable_get(:@cache) + cache = Nokogiri::CSS::SelectorCache.instance_variable_get(:@cache) assert_empty(cache) - Nokogiri::CSS::Parser.without_cache do - Nokogiri::CSS.xpath_for(css) - end + Nokogiri::CSS.xpath_for(css, cache: false) assert_empty(cache) end - def test_without_cache_resets_cache_value - Nokogiri::CSS::Parser.set_cache(true) - - Nokogiri::CSS::Parser.without_cache do - refute_predicate(Nokogiri::CSS::Parser, :cache_on?) - end - assert_predicate(Nokogiri::CSS::Parser, :cache_on?) - end - - def test_without_cache_resets_cache_value_even_after_exception - Nokogiri::CSS::Parser.set_cache(true) - - assert_raises(RuntimeError) do - Nokogiri::CSS::Parser.without_cache do - raise RuntimeError - end - end - assert_predicate(Nokogiri::CSS::Parser, :cache_on?) - end - def test_cache_key_on_ns_prefix_and_visitor_config - Nokogiri::CSS::Parser.clear_cache - Nokogiri::CSS::Parser.set_cache(true) + Nokogiri::CSS::SelectorCache.clear_cache - cache = Nokogiri::CSS::Parser.instance_variable_get(:@cache) + cache = Nokogiri::CSS::SelectorCache.instance_variable_get(:@cache) assert_empty(cache) Nokogiri::CSS.xpath_for("foo") @@ -151,30 +102,5 @@ def test_cache_key_on_ns_prefix_and_visitor_config ) assert_equal(5, cache.length) end - - def test_race_condition - # https://github.com/sparklemotion/nokogiri/issues/1935 - threads = [] - - Nokogiri::CSS::Parser.set_cache(true) - - threads << Thread.new do - Nokogiri::CSS::Parser.without_cache do - sleep(0.02) - end - end - - threads << Thread.new do - sleep(0.01) - - Nokogiri::CSS::Parser.without_cache do - sleep(0.02) - end - end - - threads.each(&:join) - - assert_predicate(Nokogiri::CSS::Parser, :cache_on?) - end end end