From 374ded2bc3d57108fffbead844371ecc5fab63ee Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Fri, 22 Dec 2023 12:47:21 +1100 Subject: [PATCH] Use a threadlocal to hold the NodeIterator Ensures that an Evaluator query can be multi-threaded Fixes #2088 --- CHANGES.md | 3 + .../org/jsoup/select/StructuralEvaluator.java | 8 ++- .../java/org/jsoup/select/SelectorIT.java | 58 +++++++++++++++++++ .../java/org/jsoup/select/SelectorTest.java | 1 - 4 files changed, 67 insertions(+), 3 deletions(-) create mode 100644 src/test/java/org/jsoup/select/SelectorIT.java diff --git a/CHANGES.md b/CHANGES.md index 00b6b53160..d7559c2ef6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -27,6 +27,9 @@ * When generating XML-syntax output from parsed HTML, script nodes containing (pseudo) CData sections would have an extraneous CData section added, causing script execution errors. Now, the data content is emitted in a HTML/XML/XHTML polyglot format, if the data is not already within a CData section. [2078](https://github.com/jhy/jsoup/issues/2078) +* The `:has` evaluator held a non-thread-safe Iterator, and so if an Evaluator object was shared across multiple + concurrent threads, a NoSuchElement exception may be thrown, and the selected results may be incorrect. Now, the + iterator object is a thread-local. [2088](https://github.com/jhy/jsoup/issues/2088) --- Older changes for versions 0.1.1 (2010-Jan-31) through 1.17.1 (2023-Nov-27) may be found in diff --git a/src/main/java/org/jsoup/select/StructuralEvaluator.java b/src/main/java/org/jsoup/select/StructuralEvaluator.java index 1e84427068..8808439a80 100644 --- a/src/main/java/org/jsoup/select/StructuralEvaluator.java +++ b/src/main/java/org/jsoup/select/StructuralEvaluator.java @@ -58,16 +58,20 @@ public boolean matches(Element root, Element element) { } } - static class Has extends StructuralEvaluator { - final NodeIterator it = new NodeIterator<>(new Element("html"), Element.class); + final ThreadLocal> threadHasIter = + ThreadLocal.withInitial(() -> new NodeIterator<>(new Element("html"), Element.class)); // the element here is just a placeholder so this can be final - gets set in restart() + static class Has extends StructuralEvaluator { + public Has(Evaluator evaluator) { super(evaluator); } @Override public boolean matches(Element root, Element element) { // for :has, we only want to match children (or below), not the input element. And we want to minimize GCs + NodeIterator it = threadHasIter.get(); + it.restart(element); while (it.hasNext()) { Element el = it.next(); diff --git a/src/test/java/org/jsoup/select/SelectorIT.java b/src/test/java/org/jsoup/select/SelectorIT.java new file mode 100644 index 0000000000..ed884b7644 --- /dev/null +++ b/src/test/java/org/jsoup/select/SelectorIT.java @@ -0,0 +1,58 @@ +package org.jsoup.select; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.junit.jupiter.api.Test; + +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class SelectorIT { + + @Test + public void multiThreadHas() throws InterruptedException { + final String html = "

One

Two

"; + final Evaluator eval = QueryParser.parse("div:has(p)"); + + int numThreads = 20; + int numThreadLoops = 5; + + SelectorIT.ThreadCatcher catcher = new SelectorIT.ThreadCatcher(); + + Thread[] threads = new Thread[numThreads]; + for (int threadNum = 0; threadNum < numThreads; threadNum++) { + Thread thread = new Thread(() -> { + Document doc = Jsoup.parse(html); + for (int loop = 0; loop < numThreadLoops; loop++) { + Elements els = doc.select(eval); + assertEquals(1, els.size()); + assertEquals("2", els.get(0).id()); + } + }); + thread.setName("Runner-" + threadNum); + thread.start(); + thread.setUncaughtExceptionHandler(catcher); + threads[threadNum] = thread; + } + + // now join them all + for (Thread thread : threads) { + thread.join(); + } + + assertEquals(0, catcher.exceptionCount.get()); + } + + static class ThreadCatcher implements Thread.UncaughtExceptionHandler { + AtomicInteger exceptionCount = new AtomicInteger(); + + @Override + public void uncaughtException(Thread t, Throwable e) { + + e.printStackTrace(); + exceptionCount.incrementAndGet(); + } + } + +} diff --git a/src/test/java/org/jsoup/select/SelectorTest.java b/src/test/java/org/jsoup/select/SelectorTest.java index 0b1721a8b5..493580bb92 100644 --- a/src/test/java/org/jsoup/select/SelectorTest.java +++ b/src/test/java/org/jsoup/select/SelectorTest.java @@ -2,7 +2,6 @@ import org.jsoup.Jsoup; import org.jsoup.MultiLocaleExtension.MultiLocaleTest; -import org.jsoup.TextUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.parser.Parser;