From a3c01c0cb43edc8f7bbc8b43a655e0f745879cbc Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:21:33 +0200 Subject: [PATCH 1/5] admin/server: fix rearm_log_level_timer _log_level_resets is a map, we want to extract the next expiring level_reset to rearm the _log_level_timer. previously we would extract the min_element based on the key (the logger name) so this sequence of actions set log-level kafka trace 10 set log-level raft trace 10 only kafka would be expired, because once expired the next run would pick again kafka, see that there is no expiration, and not rearm the timer. (cherry picked from commit bf6bc91ae98162e9504cb33fbffde62f5045211c) --- src/v/redpanda/admin/server.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/v/redpanda/admin/server.cc b/src/v/redpanda/admin/server.cc index 441683321e044..e99c5a5bd846a 100644 --- a/src/v/redpanda/admin/server.cc +++ b/src/v/redpanda/admin/server.cc @@ -658,11 +658,17 @@ void admin_server::log_exception( void admin_server::rearm_log_level_timer() { _log_level_timer.cancel(); - auto next = std::min_element( - _log_level_resets.begin(), _log_level_resets.end()); + if (_log_level_resets.empty()) { + return; + } - if (next != _log_level_resets.end() && next->second.expires.has_value()) { - _log_level_timer.arm(next->second.expires.value()); + auto reset_values = _log_level_resets | std::views::values; + auto& lvl_rst = *std::ranges::min_element( + reset_values, std::less<>{}, [](level_reset const& l) { + return l.expires.value_or(ss::timer<>::clock::time_point::max()); + }); + if (lvl_rst.expires.has_value()) { + _log_level_timer.arm(lvl_rst.expires.value()); } } From 62f8141d898e8ef276dc07a91ba30ca721398f4b Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:31:07 +0200 Subject: [PATCH 2/5] admin/server: remove unused level_reset::operator< (cherry picked from commit 0717cd1a0c9f352ad8a66d6cdf10ae58f36147a1) --- src/v/redpanda/admin/server.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/v/redpanda/admin/server.h b/src/v/redpanda/admin/server.h index 427520898b42a..c49f6956a2c13 100644 --- a/src/v/redpanda/admin/server.h +++ b/src/v/redpanda/admin/server.h @@ -655,16 +655,6 @@ class admin_server { : level(level) , expires(expires) {} - friend bool operator<(const level_reset& l, const level_reset& r) { - if (!l.expires.has_value()) { - return false; - } else if (!r.expires.has_value()) { - return true; - } else { - return l.expires.value() < r.expires.value(); - } - } - ss::log_level level; std::optional expires; }; From 95e2b37c8c213ccd39412c7e59cda2133771d2e8 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:32:59 +0200 Subject: [PATCH 3/5] tests/log_level_test: test_log_level_multiple_expiry the test shows that this sequence of actions set log-level admin-api-server trace 10 set log-level raft trace 10 works and both logger are reset after the expiry period (cherry picked from commit 6593dbec194446702799c3223caeb688000019cf) --- tests/rptest/tests/log_level_test.py | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/rptest/tests/log_level_test.py b/tests/rptest/tests/log_level_test.py index 4c90b8aa258a9..1c88c3b76879d 100644 --- a/tests/rptest/tests/log_level_test.py +++ b/tests/rptest/tests/log_level_test.py @@ -7,11 +7,13 @@ # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0 +import time import ducktape.errors import requests.exceptions import urllib.parse import json +from ducktape.mark import parametrize from ducktape.utils.util import wait_until from rptest.services.cluster import cluster from rptest.tests.redpanda_test import RedpandaTest @@ -101,6 +103,33 @@ def test_log_level_control(self): backoff_sec=1, err_msg="Never saw message") + @cluster(num_nodes=1) + @parametrize(loggers=("admin_api_server", "raft")) + @parametrize(loggers=("raft", "admin_api_server")) + def test_log_level_multiple_expiry(self, loggers=tuple[str, str]): + """ + Check that more than one logger can be in a modified level and be expired correctly + see https://redpandadata.atlassian.net/browse/CORE-96 + """ + admin = Admin(self.redpanda) + node = self.redpanda.nodes[0] + + first_logger, second_logger = loggers + # set two loggers to trace, expect that both of them expires in a timely fashion + with self.redpanda.monitor_log(node) as mon: + admin.set_log_level(first_logger, "trace", expires=10) + time.sleep(1) + admin.set_log_level(second_logger, "trace", expires=10) + mon.wait_until(f"Expiring log level for {{{first_logger}}}", + timeout_sec=15, + backoff_sec=1, + err_msg=f"Never saw Expiring for {first_logger}") + mon.wait_until(f"Expiring log level for {{{second_logger}}}", + timeout_sec=15, + backoff_sec=1, + err_msg=f"Never saw Expiring for {second_logger}") + + @cluster(num_nodes=3) def test_max_expiry(self): admin = Admin(self.redpanda) From 9dbc3ad20ff17a75f74f5b7392bf050bce083d3f Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:38:18 +0200 Subject: [PATCH 4/5] admin/server: ensure that a true expire-never logger is never expired previously, this sequence of actions set log-level raft trace 10 set log-level raft error 0 would result in raft logging at error level for only 10 seconds, despite the legit request to expire never. this is because in the code, the second request would not correctly reset the expire field if a previous name-level_reset was already in the map used for this operation (cherry picked from commit d781fb13e6051bf3b01821b9faac4687fa69f1c6) --- src/v/redpanda/admin/server.cc | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/v/redpanda/admin/server.cc b/src/v/redpanda/admin/server.cc index e99c5a5bd846a..749e7d1d1a6a0 100644 --- a/src/v/redpanda/admin/server.cc +++ b/src/v/redpanda/admin/server.cc @@ -1416,17 +1416,20 @@ void admin_server::register_config_routes() { ss::global_logger_registry().set_logger_level(name, new_level); - // expires=0 is same as not specifying it at all - if (expires_v / 1s > 0) { - auto when = ss::timer<>::clock::now() + expires_v; - auto res = _log_level_resets.try_emplace(name, cur_level, when); - if (!res.second) { - res.first->second.expires = when; + auto when = [&]() -> std::optional { + // expires=0 is same as not specifying it at all + if (expires_v / 1s > 0) { + return ss::timer<>::clock::now() + expires_v; + } else { + // new log level never expires, but we still want an entry in + // the resets map as a record of the default + return std::nullopt; } - } else { - // new log level never expires, but we still want an entry in the - // resets map as a record of the default - _log_level_resets.try_emplace(name, cur_level, std::nullopt); + }(); + + auto res = _log_level_resets.try_emplace(name, cur_level, when); + if (!res.second) { + res.first->second.expires = when; } rsp.expiration = expires_v / 1s; From ce29395c1a738a221c146a58de328205f3bd5e7f Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:46:57 +0200 Subject: [PATCH 5/5] tests/log_level_test: test_log_level_persist_a_never_expire_request test that set log-level admin_api_server trace 10 set log-level admin_api_server error 0 never resets the logger to the info level (cherry picked from commit 84748a8dcb4931258c5b9ef0cb3a32b9f2b9202b) --- tests/rptest/tests/log_level_test.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/rptest/tests/log_level_test.py b/tests/rptest/tests/log_level_test.py index 1c88c3b76879d..d0ff84908363a 100644 --- a/tests/rptest/tests/log_level_test.py +++ b/tests/rptest/tests/log_level_test.py @@ -129,6 +129,33 @@ def test_log_level_multiple_expiry(self, loggers=tuple[str, str]): backoff_sec=1, err_msg=f"Never saw Expiring for {second_logger}") + @cluster(num_nodes=1) + def test_log_level_persist_a_never_expire_request(self): + """ + check that this sequence of actions + set log-level admin_api_server trace 10 + set log-level admin_api_server error 0 + + never resets the logger to the info level + """ + admin = Admin(self.redpanda) + node = self.redpanda.nodes[0] + + with self.redpanda.monitor_log(node) as mon: + admin.set_log_level("admin_api_server", "trace", expires=10) + time.sleep(1) + admin.set_log_level("admin_api_server", "error", expires=0) + + try: + mon.wait_until("Expiring log level for {admin_api_server}", + timeout_sec=15, + backoff_sec=1) + assert False, "Should not have seen message" + except ducktape.errors.TimeoutError: + pass + + level = admin.get_log_level("admin_api_server")[0]["level"] + assert level == "error", f"expected level=error, got {level=}" @cluster(num_nodes=3) def test_max_expiry(self):