From fa441d10d237a5793c8884b9975e3b68454528b4 Mon Sep 17 00:00:00 2001 From: Abhijat Malviya Date: Thu, 11 Apr 2024 12:35:01 +0530 Subject: [PATCH] cst: Add debug logs during chunk hydration (cherry picked from commit 33eeca9b05452b7fde1d13898008c334c87c3c4d) --- src/v/cloud_storage/segment_chunk_api.cc | 49 ++++++++++++++++++- .../segment_chunk_data_source.cc | 15 ++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/v/cloud_storage/segment_chunk_api.cc b/src/v/cloud_storage/segment_chunk_api.cc index 5e52be3a83f2c..9b35af5d426b8 100644 --- a/src/v/cloud_storage/segment_chunk_api.cc +++ b/src/v/cloud_storage/segment_chunk_api.cc @@ -12,7 +12,11 @@ #include "cloud_storage/logger.h" #include "cloud_storage/remote_segment.h" -#include "utils/gate_guard.h" +#include "ssx/watchdog.h" + +#include + +using namespace std::chrono_literals; namespace { constexpr auto cache_backoff_duration = 5s; @@ -124,8 +128,17 @@ ss::future segment_chunks::do_hydrate_and_materialize( const auto prefetch = prefetch_override.value_or( config::shard_local_cfg().cloud_storage_chunk_prefetch); + + vlog( + _ctxlog.debug, + "hydrating chunk start: {}, prefetch: {}", + chunk_start, + prefetch); + co_await _segment.hydrate_chunk( segment_chunk_range{_chunks, prefetch, chunk_start}); + + vlog(_ctxlog.debug, "materializing chunk start: {}", chunk_start, prefetch); co_return co_await _segment.materialize_chunk(chunk_start); } @@ -141,6 +154,12 @@ ss::future segment_chunks::hydrate_chunk( auto& chunk = _chunks[chunk_start]; auto curr_state = chunk.current_state; + + vlog( + _ctxlog.debug, + "hydrate_chunk for {}, current state: {}", + chunk_start, + curr_state); if (curr_state == chunk_state::hydrated) { vassert( chunk.handle, @@ -152,6 +171,11 @@ ss::future segment_chunks::hydrate_chunk( // If a download is already in progress, subsequent callers to hydrate are // added to a wait list, and notified when the download finishes. if (curr_state == chunk_state::download_in_progress) { + vlog( + _ctxlog.debug, + "adding waitor for {}, waiters before: {}", + chunk_start, + chunk.waiters.size()); co_return co_await add_waiter_to_chunk(chunk_start, chunk); } @@ -159,12 +183,30 @@ ss::future segment_chunks::hydrate_chunk( try { chunk.current_state = chunk_state::download_in_progress; + watchdog wd( + 300s, [path = _segment.get_segment_path(), start = chunk_start] { + vlog( + cst_log.error, + "Stuck during do_hydrate_and_materialize for segment path: {}, " + "chunk start: {}", + path(), + start); + }); + // Keep retrying if materialization fails. bool done = false; while (!done) { + vlog( + _ctxlog.debug, + "attempting do_hydrate_and_materialize for {}", + chunk_start); auto handle = co_await do_hydrate_and_materialize( chunk_start, prefetch_override); if (handle) { + vlog( + _ctxlog.debug, + "do_hydrate_and_materialize for {} complete", + chunk_start); done = true; chunk.handle = ss::make_lw_shared(std::move(handle)); } else { @@ -177,6 +219,11 @@ ss::future segment_chunks::hydrate_chunk( } } } catch (const std::exception& ex) { + vlog( + _ctxlog.error, + "Failed to hydrate chunk start {}, error: {}", + chunk_start, + ex.what()); chunk.current_state = chunk_state::not_available; while (!chunk.waiters.empty()) { chunk.waiters.front().set_to_current_exception(); diff --git a/src/v/cloud_storage/segment_chunk_data_source.cc b/src/v/cloud_storage/segment_chunk_data_source.cc index 75b4cf7eb4fa7..1be33ebf6bbb2 100644 --- a/src/v/cloud_storage/segment_chunk_data_source.cc +++ b/src/v/cloud_storage/segment_chunk_data_source.cc @@ -75,6 +75,11 @@ ss::future> chunk_data_source_impl::get() { ss::future<> chunk_data_source_impl::load_chunk_handle(chunk_start_offset_t chunk_start) { try { + vlog( + _ctxlog.debug, + "Hydrating chunk {} with prefetch {}", + chunk_start, + _prefetch_override); _current_data_file = co_await _chunks.hydrate_chunk( chunk_start, _prefetch_override); } catch (const ss::abort_requested_exception& ex) { @@ -101,10 +106,20 @@ ss::future<> chunk_data_source_impl::load_stream_for_chunk( co_await load_chunk_handle(chunk_start); } catch (...) { eptr = std::current_exception(); + vlog( + _ctxlog.error, + "Hydrating chunk {} failed with error {}", + chunk_start, + eptr); } if (eptr) { co_await maybe_close_stream(); + vlog( + _ctxlog.debug, + "Closed stream after error {} while hydrating chunk start {}", + eptr, + chunk_start); std::rethrow_exception(eptr); }