From 1bd8de89b706526dee8222b2628b05add107c3cb Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Tue, 11 Jun 2024 23:58:29 +0200 Subject: [PATCH] s/disk_log_impl: don't prefix-truncate empty segments Consider the following sequence of events: 1. there is a single segment in the log, with offsets [0-9] 2. we call prefix_truncate(10) 3. concurrently, another batch of 5 messages is being appended. 4. empty segment with base_offset=10, dirty_offset=9 is created 5. the appended batch is placed at offsets 10-14 Previously, the empty segment would have passed the dirty_offset check and (after waiting for the append to finish) would get deleted (including the data at offsets 10-14). Check also the segment base_offset to prevent that. Fixes https://github.com/redpanda-data/redpanda/issues/19632 (cherry picked from commit 963ff237c08eef00a09b49cd4af1d9ee15ef20c1) Conflicts: src/v/storage/disk_log_impl.cc --- src/v/storage/disk_log_impl.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/v/storage/disk_log_impl.cc b/src/v/storage/disk_log_impl.cc index f539754ad4f4..226bd883b339 100644 --- a/src/v/storage/disk_log_impl.cc +++ b/src/v/storage/disk_log_impl.cc @@ -1932,7 +1932,11 @@ ss::future<> disk_log_impl::remove_prefix_full_segments(truncate_prefix_config cfg) { return ss::do_until( [this, cfg] { + // base_offset check is for the case of an empty segment + // (where dirty = base - 1). We don't want to remove it because + // batches may be concurrently appended to it and we should keep them. return _segs.empty() + || _segs.front()->offsets().base_offset >= cfg.start_offset || _segs.front()->offsets().dirty_offset >= cfg.start_offset; }, [this] {