Skip to content

Commit

Permalink
extract CSS selector cache into CSS::SelectorCache (#3226)
Browse files Browse the repository at this point in the history
**What problem is this PR intended to solve?**

This is part of a continuing track of work to separate out the concerns
of CSS parser, CSS selector cache, and XPath expression translation into
distinct components.

In this PR, I've extracted a CSS::SelectorCache class providing
functionality that was previously built directly into CSS::Parser (with
an awkward API that I'm responsible for writing in 2008).

- The methods `Parser.set_cache`, `Parser.cache_on?`, and
`Parser.without_cache(&blk)` have been removed.
- The cache is now injected by `CSS.xpath_for` (optionally, via a
`cache:` keyword argument) instead of being built into the parser.
- Documentation for `CSS.xpath_for` has also been updated and improved.


**Have you included adequate test coverage?**

Mostly an internal refactor, existing test coverage is sufficient except
where I removed tests of the removed methods.


**Does this change affect the behavior of either the C or the Java
implementations?**

N/A
  • Loading branch information
flavorjones authored Jun 11, 2024
2 parents 396fa3f + b889f89 commit 409e041
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 173 deletions.
37 changes: 28 additions & 9 deletions lib/nokogiri/css.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def parse(selector) # :nodoc:

# :call-seq:
# xpath_for(selector_list) → Array<String>
# xpath_for(selector_list [, prefix:] [, visitor:] [, ns:]) → Array<String>
# xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
#
# Translate a CSS selector list to the equivalent XPath expressions.
#
Expand All @@ -36,21 +36,31 @@ def parse(selector) # :nodoc:
# value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
# examples).
#
# [Keyword arguments]
# - +prefix:+ (String)
#
# The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
# for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
#
# - +ns:+ (Hash<String ⇒ String>, nil)
#
# Namespaces that are referenced in the query, if any. This is a hash where the keys are the
# namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
# empty set of namespaces.
#
# - +visitor:+ (Nokogiri::CSS::XPathVisitor)
#
# The visitor class to use to transform the AST into XPath. Default is
# +Nokogiri::CSS::XPathVisitor.new+. See Nokogiri::CSS::XPathVisitor for more information on
# some of the complex behavior that can be customized for your document type.
# Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
# Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
# be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
#
# - +ns:+ (Hash<String ⇒ String>)
# ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
# provided, +prefix+ and +ns+ must not be present.
#
# Namespaces that are referenced in the query, if any. This is a hash where the keys are the
# namespace prefix and the values are the namespace URIs. Default is an empty Hash.
# - +cache:+ (Boolean)
#
# Whether to use the SelectorCache for the translated query to ensure that repeated queries
# don't incur the overhead of re-parsing the selector. Default is +true+.
#
# [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
#
Expand All @@ -72,7 +82,10 @@ def parse(selector) # :nodoc:
#
def xpath_for(
selector, options = nil,
prefix: options&.delete(:prefix), visitor: options&.delete(:visitor), ns: options&.delete(:ns)
prefix: options&.delete(:prefix),
visitor: options&.delete(:visitor),
ns: options&.delete(:ns),
cache: true
)
unless options.nil?
warn("Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
Expand All @@ -96,12 +109,18 @@ def xpath_for(
Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
end

Parser.new.xpath_for(selector, visitor)
if cache
key = SelectorCache.key(selector: selector, visitor: visitor)
SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
else
Parser.new.xpath_for(selector, visitor)
end
end
end
end
end

require_relative "css/selector_cache"
require_relative "css/node"
require_relative "css/xpath_visitor"
x = $-w
Expand Down
64 changes: 3 additions & 61 deletions lib/nokogiri/css/parser_extras.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,57 +5,6 @@
module Nokogiri
module CSS
class Parser < Racc::Parser # :nodoc:
CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off

@cache = {}
@mutex = Mutex.new

class << self
# Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
def cache_on?
!Thread.current[CACHE_SWITCH_NAME]
end

# Set a thread-local boolean to turn caching on and off. Truthy values turn the cache on, falsey values turn the cache off.
def set_cache(value) # rubocop:disable Naming/AccessorMethodName
Thread.current[CACHE_SWITCH_NAME] = !value
end

# Get the css selector in +string+ from the cache
def [](string)
return unless cache_on?

@mutex.synchronize { @cache[string] }
end

# Set the css selector in +string+ in the cache to +value+
def []=(string, value)
return value unless cache_on?

@mutex.synchronize { @cache[string] = value }
end

# Clear the cache
def clear_cache(create_new_object = false)
@mutex.synchronize do
if create_new_object
@cache = {}
else
@cache.clear
end
end
end

# Execute +block+ without cache
def without_cache(&block)
original_cache_setting = cache_on?
set_cache(false)
yield
ensure
set_cache(original_cache_setting)
end
end

def initialize
@tokenizer = Tokenizer.new
super
Expand All @@ -70,10 +19,9 @@ def next_token
@tokenizer.next_token
end

# Get the xpath for +string+ using +options+
def xpath_for(string, visitor)
key = cache_key(string, visitor)
self.class[key] ||= parse(string).map do |ast|
# Get the xpath for +selector+ using +visitor+
def xpath_for(selector, visitor)
parse(selector).map do |ast|
ast.to_xpath(visitor)
end
end
Expand All @@ -83,12 +31,6 @@ def on_error(error_token_id, error_value, value_stack)
after = value_stack.compact.last
raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
end

def cache_key(query, visitor)
if self.class.cache_on?
[query, visitor.config]
end
end
end
end
end
40 changes: 40 additions & 0 deletions lib/nokogiri/css/selector_cache.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# frozen_string_literal: true

require "thread"

module Nokogiri
module CSS
module SelectorCache # :nodoc:
@cache = {}
@mutex = Mutex.new

class << self
# Retrieve the cached XPath expressions for the key
def [](key)
@mutex.synchronize { @cache[key] }
end

# Insert the XPath expressions `value` at the cache key
def []=(key, value)
@mutex.synchronize { @cache[key] = value }
end

# Clear the cache
def clear_cache(create_new_object = false)
@mutex.synchronize do
if create_new_object
@cache = {}
else
@cache.clear
end
end
end

# Construct a unique key cache key
def key(selector:, visitor:)
[selector, visitor.config]
end
end
end
end
end
8 changes: 3 additions & 5 deletions lib/nokogiri/decorators/slop.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,9 @@ def method_missing(name, *args, &block)
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
end
else
CSS::Parser.without_cache do
list = xpath(
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX),
)
end
list = xpath(
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
)
end

super if list.empty?
Expand Down
1 change: 1 addition & 0 deletions nokogiri.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ Gem::Specification.new do |spec|
"lib/nokogiri/css/parser.rb",
"lib/nokogiri/css/parser.y",
"lib/nokogiri/css/parser_extras.rb",
"lib/nokogiri/css/selector_cache.rb",
"lib/nokogiri/css/syntax_error.rb",
"lib/nokogiri/css/tokenizer.rb",
"lib/nokogiri/css/tokenizer.rex",
Expand Down
14 changes: 6 additions & 8 deletions test/css/test_css.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,14 @@
end

it "accepts a CSS::XPathVisitor" do
Nokogiri::CSS::Parser.without_cache do
mock_visitor = Minitest::Mock.new
mock_visitor.expect(:accept, "injected-value", [Nokogiri::CSS::Node])
mock_visitor.expect(:prefix, "//")
mock_visitor = Minitest::Mock.new
mock_visitor.expect(:accept, "injected-value", [Nokogiri::CSS::Node])
mock_visitor.expect(:prefix, "//")

result = Nokogiri::CSS.xpath_for("foo", visitor: mock_visitor)
result = Nokogiri::CSS.xpath_for("foo", visitor: mock_visitor, cache: false)

mock_visitor.verify
assert_equal(["//injected-value"], result)
end
mock_visitor.verify
assert_equal(["//injected-value"], result)
end

it "accepts an options hash" do
Expand Down
Loading

0 comments on commit 409e041

Please sign in to comment.