From b034eaea629437b348d0c6435667e2efcfd6b092 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Tue, 20 Aug 2024 17:33:08 +0200 Subject: [PATCH] Drop ParallelIterable's queue low water mark As part of the change in commit 7831a8dfc3a2de546ca069f4fc1e7afd03777554, queue low water mark was introduced. However, it resulted in increased number of manifests being read when planning LIMIT queries in Trino Iceberg connector. To avoid increased I/O, back out the change for now. --- .../java/org/apache/iceberg/util/ParallelIterable.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/ParallelIterable.java b/core/src/main/java/org/apache/iceberg/util/ParallelIterable.java index 27cd96a39733..40bdf1e0c4f8 100644 --- a/core/src/main/java/org/apache/iceberg/util/ParallelIterable.java +++ b/core/src/main/java/org/apache/iceberg/util/ParallelIterable.java @@ -195,12 +195,9 @@ public synchronized boolean hasNext() { // If the consumer is processing records more slowly than the producers, the producers will // eventually fill the queue and yield, returning continuations. Continuations and new tasks // are started by checkTasks(). The check here prevents us from restarting continuations or - // starting new tasks too early (when queue is almost full) or too late (when queue is already - // emptied). Restarting too early would lead to tasks yielding very quickly (CPU waste on - // scheduling). Restarting too late would mean the consumer may need to wait for the tasks - // to produce new items. A consumer slower than producers shouldn't need to wait. - int queueLowWaterMark = maxQueueSize / 2; - if (queue.size() > queueLowWaterMark) { + // starting new tasks before the queue is emptied. Restarting too early would lead to tasks + // yielding very quickly (CPU waste on scheduling). + if (!queue.isEmpty()) { return true; }