From 13e273fdbee9c6c9d2b55bdfcfae6f2ceb63d3f6 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Thu, 19 Oct 2023 13:51:31 +0100 Subject: [PATCH] admin: expose time point for last completed scrub When a full scrub of the log finishes, via one or more scrubs, persist the timestamp at which this happened. This timestamp is then exposed via the admin API. We will use this in our ducktape tests for end of test scrubbing. --- src/v/cloud_storage/partition_manifest.cc | 4 ++++ src/v/cloud_storage/types.cc | 3 +++ src/v/cloud_storage/types.h | 6 +++++- src/v/redpanda/admin/api-doc/shadow_indexing.json | 4 ++++ src/v/redpanda/admin_server.cc | 4 ++++ tests/rptest/tests/cloud_storage_scrubber_test.py | 2 ++ 6 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/v/cloud_storage/partition_manifest.cc b/src/v/cloud_storage/partition_manifest.cc index 998eead05e1db..8a6612384ad47 100644 --- a/src/v/cloud_storage/partition_manifest.cc +++ b/src/v/cloud_storage/partition_manifest.cc @@ -2732,6 +2732,10 @@ void partition_manifest::process_anomalies( _last_partition_scrub = scrub_timestamp; _last_scrubbed_offset = last_scrubbed_offset; + if (!_last_scrubbed_offset) { + _detected_anomalies.last_complete_scrub = scrub_timestamp; + } + vlog( cst_log.debug, "[{}] Anomalies processed: {{ detected: {}, last_partition_scrub: {}, " diff --git a/src/v/cloud_storage/types.cc b/src/v/cloud_storage/types.cc index 18572ac6a631a..6ef3dfdfd1df4 100644 --- a/src/v/cloud_storage/types.cc +++ b/src/v/cloud_storage/types.cc @@ -264,6 +264,9 @@ anomalies& anomalies::operator+=(anomalies&& other) { std::make_move_iterator(other.segment_metadata_anomalies.begin()), std::make_move_iterator(other.segment_metadata_anomalies.end())); + last_complete_scrub = std::max( + last_complete_scrub, other.last_complete_scrub); + return *this; } diff --git a/src/v/cloud_storage/types.h b/src/v/cloud_storage/types.h index 99e9100012ff1..da042f2b6bb3f 100644 --- a/src/v/cloud_storage/types.h +++ b/src/v/cloud_storage/types.h @@ -391,13 +391,17 @@ struct anomalies absl::node_hash_set missing_segments; // Segments that have metadata anomalies (e.g. gaps or overlaps) segment_meta_anomalies segment_metadata_anomalies; + // Optional timestamp indicating the last time point at which + // the scrub of the full log completed. + std::optional last_complete_scrub; auto serde_fields() { return std::tie( missing_partition_manifest, missing_spillover_manifests, missing_segments, - segment_metadata_anomalies); + segment_metadata_anomalies, + last_complete_scrub); } bool has_value() const; diff --git a/src/v/redpanda/admin/api-doc/shadow_indexing.json b/src/v/redpanda/admin/api-doc/shadow_indexing.json index 7e23f5ddb140e..b1bfebbb8d27c 100644 --- a/src/v/redpanda/admin/api-doc/shadow_indexing.json +++ b/src/v/redpanda/admin/api-doc/shadow_indexing.json @@ -518,6 +518,10 @@ "type": "array", "items": {"type": "metadata_anomaly"}, "nullable": true + }, + "last_complete_scrub_at": { + "type": "long", + "nullable": true } } } diff --git a/src/v/redpanda/admin_server.cc b/src/v/redpanda/admin_server.cc index 675f99e56e093..88ea21f68192b 100644 --- a/src/v/redpanda/admin_server.cc +++ b/src/v/redpanda/admin_server.cc @@ -5296,6 +5296,10 @@ map_anomalies_to_json( json.partition = ntp.tp.partition(); json.revision_id = initial_rev(); + if (detected.last_complete_scrub) { + json.last_complete_scrub_at = detected.last_complete_scrub->value(); + } + if (detected.missing_partition_manifest) { json.missing_partition_manifest = true; } diff --git a/tests/rptest/tests/cloud_storage_scrubber_test.py b/tests/rptest/tests/cloud_storage_scrubber_test.py index ed0582c7e6fdb..f9d89a3f61a8d 100644 --- a/tests/rptest/tests/cloud_storage_scrubber_test.py +++ b/tests/rptest/tests/cloud_storage_scrubber_test.py @@ -118,6 +118,8 @@ def _collect_anomalies(self): topic=self.topic, partition=pid) + anomalies.pop("last_complete_scrub_at", None) + ntpr = NTPR(ns=anomalies["ns"], topic=anomalies["topic"], partition=anomalies["partition"],