From c85d3a65e9b5cc6a91ddaf61396613ed8fb92836 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 26 Jan 2024 14:14:49 +0100 Subject: [PATCH] cloud_storage/remote_partition: handle race retention+timequery log a warning if when trying to get a async_manifest_view_cursor for a timequery the result is out_of_range. such error can happen if retention is kicking in and deleting some or all spillover manifests: in this case there is a window where the manifest are reclaimable but still kept in memory, and the timequery hits one of the manifest in the reclaimable range. handling this as a warning and no result is acceptable because the kafka client issuing the request can handle this failure, and otherwise handling this edge cases would increase the complexity of the callstack (cherry picked from commit 4ca728136ff385bebbcdcb9fb8d7f871a1c201c9) --- src/v/cloud_storage/remote_partition.cc | 28 ++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/v/cloud_storage/remote_partition.cc b/src/v/cloud_storage/remote_partition.cc index ab7ea6f9832ec..bda3ea3d09049 100644 --- a/src/v/cloud_storage/remote_partition.cc +++ b/src/v/cloud_storage/remote_partition.cc @@ -521,6 +521,7 @@ class partition_record_batch_reader_impl final cur.error() == error_outcome::out_of_range && ss::visit( query, + [&](model::offset) { return false; }, [&](kafka::offset query_offset) { // Special case queries below the start offset of the log. // The start offset may have advanced while the request was @@ -540,7 +541,32 @@ class partition_record_batch_reader_impl final } return false; }, - [](auto) { return false; })) { + [&](model::timestamp query_ts) { + // Special case, it can happen when a timequery falls below + // the clean offset. Caused when the query races with + // retention/gc. log a warning, since the kafka client can + // handle a failed query + auto const& spillovers = _partition->_manifest_view + ->stm_manifest() + .get_spillover_map(); + if ( + spillovers.empty() + || spillovers.get_max_timestamp_column() + .last_value() + .value_or(model::timestamp::max()()) + >= query_ts()) { + vlog( + _ctxlog.debug, + "Manifest query raced with retention and the result " + "is below the clean/start offset for {}", + query_ts); + return true; + } + + // query was not meant for archive region. fallthrough and + // log an error + return false; + })) { // error was handled co_return; }