Skip to content

Commit

Permalink
Optimization: memoize matches in StructuralEvaluator
Browse files Browse the repository at this point in the history
As the StructuralEvaluator looks for matches on multiple elements (preceding siblings, ancetors, etc), it's useful to keep a cache of previous match results. This saves re-executing the same match repeatedly.

The cache is implemented in a ThreadLocal, as an Evaluator may be reused across multiple queries.
  • Loading branch information
jhy committed May 30, 2023
1 parent 10ef981 commit c57e683
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 19 deletions.
2 changes: 2 additions & 0 deletions src/main/java/org/jsoup/select/Collector.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ private Collector() {}
@return list of matches; empty if none
*/
public static Elements collect (Evaluator eval, Element root) {
eval.reset();
Elements elements = new Elements();
NodeTraversor.traverse((node, depth) -> {
if (node instanceof Element) {
Expand All @@ -43,6 +44,7 @@ public static Elements collect (Evaluator eval, Element root) {
@return the first match; {@code null} if none
*/
public static @Nullable Element findFirst(Evaluator eval, Element root) {
eval.reset();
FirstFinder finder = new FirstFinder(eval);
return finder.find(root, root);
}
Expand Down
7 changes: 7 additions & 0 deletions src/main/java/org/jsoup/select/CombiningEvaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ public abstract class CombiningEvaluator extends Evaluator {
updateNumEvaluators();
}

@Override protected void reset() {
for (Evaluator evaluator : evaluators) {
evaluator.reset();
}
super.reset();
}

@Nullable Evaluator rightMostEvaluator() {
return num > 0 ? evaluators.get(num - 1) : null;
}
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/org/jsoup/select/Evaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ protected Evaluator() {
*/
public abstract boolean matches(Element root, Element element);

/**
Reset any internal state in this Evaluator before executing a new Collector evaluation.
*/
protected void reset() {
}

/**
* Evaluator for tag name
*/
Expand Down
62 changes: 43 additions & 19 deletions src/main/java/org/jsoup/select/StructuralEvaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,37 @@
* Base structural evaluator.
*/
abstract class StructuralEvaluator extends Evaluator {
Evaluator evaluator;
final Evaluator evaluator;

public StructuralEvaluator(Evaluator evaluator) {
this.evaluator = evaluator;
}

// Memoize inner matches, to save repeated re-evaluations of parent, sibling etc.
// root + element: Boolean matches. ThreadLocal in case the Evaluator is compiled then reused across multi threads
final ThreadLocal<IdentityHashMap<Element, IdentityHashMap<Element, Boolean>>>
threadMemo = ThreadLocal.withInitial(IdentityHashMap::new);

boolean memoMatches(final Element root, final Element element) {
// not using computeIfAbsent, as the lambda impl requires a new Supplier closure object on every hit: tons of GC
IdentityHashMap<Element, IdentityHashMap<Element, Boolean>> rootMemo = threadMemo.get();
IdentityHashMap<Element, Boolean> memo = rootMemo.get(root);
if (memo == null) {
memo = new IdentityHashMap<>();
rootMemo.put(root, memo);
}
Boolean matches = memo.get(element);
if (matches == null) {
matches = evaluator.matches(root, element);
memo.put(element, matches);
}
return matches;
}

@Override protected void reset() {
threadMemo.get().clear();
super.reset();
}

static class Root extends Evaluator {
@Override
Expand All @@ -22,7 +52,7 @@ static class Has extends StructuralEvaluator {
final Collector.FirstFinder finder;

public Has(Evaluator evaluator) {
this.evaluator = evaluator;
super(evaluator);
finder = new Collector.FirstFinder(evaluator);
}

Expand All @@ -48,12 +78,12 @@ public String toString() {

static class Not extends StructuralEvaluator {
public Not(Evaluator evaluator) {
this.evaluator = evaluator;
super(evaluator);
}

@Override
public boolean matches(Element root, Element node) {
return !evaluator.matches(root, node);
public boolean matches(Element root, Element element) {
return !memoMatches(root, element);
}

@Override
Expand All @@ -64,7 +94,7 @@ public String toString() {

static class Parent extends StructuralEvaluator {
public Parent(Evaluator evaluator) {
this.evaluator = evaluator;
super(evaluator);
}

@Override
Expand All @@ -74,7 +104,7 @@ public boolean matches(Element root, Element element) {

Element parent = element.parent();
while (parent != null) {
if (evaluator.matches(root, parent))
if (memoMatches(root, parent))
return true;
if (parent == root)
break;
Expand All @@ -91,7 +121,7 @@ public String toString() {

static class ImmediateParent extends StructuralEvaluator {
public ImmediateParent(Evaluator evaluator) {
this.evaluator = evaluator;
super(evaluator);
}

@Override
Expand All @@ -100,7 +130,7 @@ public boolean matches(Element root, Element element) {
return false;

Element parent = element.parent();
return parent != null && evaluator.matches(root, parent);
return parent != null && memoMatches(root, parent);
}

@Override
Expand All @@ -110,10 +140,8 @@ public String toString() {
}

static class PreviousSibling extends StructuralEvaluator {
private final IdentityHashMap<Element, Boolean> memo = new IdentityHashMap<>(); // memoize results

public PreviousSibling(Evaluator evaluator) {
this.evaluator = evaluator;
super(evaluator);
}

@Override
Expand All @@ -125,11 +153,7 @@ public boolean matches(Element root, Element element) {
final int size = element.elementSiblingIndex();
for (int i = 0; i < size; i++) {
final Element el = parent.child(i);
Boolean matches = memo.get(el);
if (matches == null) {
matches = evaluator.matches(root, el);
memo.put(el, matches);
}
boolean matches = memoMatches(root, el);
if (matches)
return true;
}
Expand All @@ -144,7 +168,7 @@ public String toString() {

static class ImmediatePreviousSibling extends StructuralEvaluator {
public ImmediatePreviousSibling(Evaluator evaluator) {
this.evaluator = evaluator;
super(evaluator);
}

@Override
Expand All @@ -153,7 +177,7 @@ public boolean matches(Element root, Element element) {
return false;

Element prev = element.previousElementSibling();
return prev != null && evaluator.matches(root, prev);
return prev != null && memoMatches(root, prev);
}

@Override
Expand Down
21 changes: 21 additions & 0 deletions src/test/java/org/jsoup/select/SelectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.jsoup.parser.Parser;
import org.junit.jupiter.api.Test;

import java.util.IdentityHashMap;
import java.util.Locale;

import static org.junit.jupiter.api.Assertions.*;
Expand Down Expand Up @@ -1143,4 +1144,24 @@ public void wildcardNamespaceMatchesNoNamespace() {
Selector.SelectorParseException ex = new Selector.SelectorParseException("%&");
assertEquals("%&", ex.getMessage());
}

@Test public void evaluatorMemosAreReset() {
Evaluator eval = QueryParser.parse("p ~ p");
CombiningEvaluator.And andEval = (CombiningEvaluator.And) eval;
StructuralEvaluator.PreviousSibling prevEval = (StructuralEvaluator.PreviousSibling) andEval.evaluators.get(0);
IdentityHashMap<Element, IdentityHashMap<Element, Boolean>> map = prevEval.threadMemo.get();
assertEquals(0, map.size()); // no memo yet

Document doc1 = Jsoup.parse("<p>One<p>Two<p>Three");
Document doc2 = Jsoup.parse("<p>One2<p>Two2<p>Three2");

Elements s1 = doc1.select(eval);
assertEquals(2, s1.size());
assertEquals("Two", s1.first().text());
Elements s2 = doc2.select(eval);
assertEquals(2, s2.size());
assertEquals("Two2", s2.first().text());

assertEquals(1, map.size()); // root of doc 2
}
}

0 comments on commit c57e683

Please sign in to comment.