rptest: tighten up timequery test

This commit makes a couple of changes to the timequery tests: 1. Run timequery on more offests (10 for each of the 12 segments) 2. Check that a maximum of two chunks are downloaded by any given timequery 3. Use the admin api to get the precise boundary between the cloud and local log. Previously, it was estimated based on record size.
redpanda-data · Aug 25, 2023 · ea3677a · ea3677a
1 parent a4e24da
commit ea3677a
Showing 1 changed file with 35 additions and 13 deletions.
diff --git a/tests/rptest/tests/timequery_test.py b/tests/rptest/tests/timequery_test.py
@@ -14,6 +14,7 @@
 from logging import Logger
 from typing import Callable
 
+from rptest.services.admin import Admin
 from rptest.services.cluster import cluster
 from rptest.tests.redpanda_test import RedpandaTest
 from rptest.services.redpanda import RedpandaService, SISettings, make_redpanda_service
@@ -127,6 +128,13 @@ def _test_timequery(self, cluster, cloud_storage: bool, batch_cache: bool):
         leader_node = cluster.get_node(
             next(rpk.describe_topic(topic.name)).leader)
 
+        # For when using cloud storage, we expect offsets ahead
+        # of this to still hit raft for their timequeries.
+        admin = Admin(self.redpanda)
+        status = admin.get_partition_cloud_storage_status(topic.name, 0)
+        local_start_offset = status["local_log_start_offset"]
+        local_last_offset = status["local_log_last_offset"]
+
         # Class defining expectations of timequery results to be checked
         class ex:
             def __init__(self, offset, ts=None, expect_read=True):
@@ -136,22 +144,21 @@ def __init__(self, offset, ts=None, expect_read=True):
                 self.offset = offset
                 self.expect_read = expect_read
 
-        # Selection of interesting cases
-        expectations = [
-            ex(0),  # First message
-            ex(msg_count // 4),  # 25%th message
-            ex(msg_count // 2),  # 50%th message
-            ex(msg_count - 1),  # last message
+        # We will do approx. 10 timequeries within each segment.
+        step = msg_count // total_segments // 10
+
+        expectations = []
+        for o in range(0, msg_count, step):
+            expect_read = o <= local_last_offset
+            expectations.append(ex(o, timestamps[o], expect_read))
+
+        # Add edge cases
+        expectations += [
             ex(0, timestamps[0] - 1000),  # Before the start of the log
             ex(-1, timestamps[msg_count - 1] + 1000,
                False)  # After last message
         ]
 
-        # For when using cloud storage, we expectr offsets ahead
-        # of this to still hit raft for their timequeries.  This is approximate,
-        # but fine as long as the test cases don't tread too near the gap.
-        local_start_offset = msg_count - ((local_retention) / record_size)
-
         is_redpanda = isinstance(cluster, RedpandaService)
 
         # Remember which offsets we already hit, so that we can
@@ -164,7 +171,17 @@ def __init__(self, offset, ts=None, expect_read=True):
         local_metrics = None
 
         def diff_bytes(old, new):
-            return new > old and new - old < self.log_segment_size
+            # Each timequery will download a maximum of two chunks, but
+            # we make the check extra generous to account for the index
+            # download.
+            return new - old <= self.chunk_size * 5
+
+        def diff_chunks(old, new):
+            # The sampling step for a segment's remote index is 64 KiB and the chunk
+            # size in this the is 128 KiB. Therefore, a timequery should never require
+            # more than two chunks. If the samples were perfectly aligned with the chunks,
+            # we'd only need one chunk, but that's not always the case.
+            return new - old <= 2
 
         for e in expectations:
             ts = e.ts
@@ -202,6 +219,10 @@ def diff_bytes(old, new):
                      diff_bytes)
                 ])
 
+                cloud_metrics.expect([(
+                    "vectorized_cloud_storage_read_path_chunks_hydrated_total",
+                    diff_chunks)])
+
             if is_redpanda and not cloud_storage and not batch_cache and e.expect_read:
                 # Expect to read at most one segment from disk: this validates that
                 # we are correctly looking up the right segment before seeking to
@@ -266,6 +287,7 @@ class TimeQueryTest(RedpandaTest, BaseTimeQuery):
     # lookup of the proper segment for a time index, as well
     # as the lookup of the offset within that segment.
     log_segment_size = 1024 * 1024
+    chunk_size = 1024 * 128
 
     def setUp(self):
         # Don't start up redpanda yet, because we will need the
@@ -287,7 +309,7 @@ def set_up_cluster(self, cloud_storage: bool, batch_cache: bool):
             'log_segment_size_min':
             32 * 1024,
             'cloud_storage_cache_chunk_size':
-            1024 * 128
+            self.chunk_size
         })
 
         if cloud_storage: