Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CORE-96]: admin/server fix set_log_level handling of overlapping expirations #18397

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 23 additions & 14 deletions src/v/redpanda/admin/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -661,11 +661,17 @@ void admin_server::log_exception(
void admin_server::rearm_log_level_timer() {
_log_level_timer.cancel();

auto next = std::min_element(
_log_level_resets.begin(), _log_level_resets.end());
if (_log_level_resets.empty()) {
return;
}

if (next != _log_level_resets.end() && next->second.expires.has_value()) {
_log_level_timer.arm(next->second.expires.value());
auto reset_values = _log_level_resets | std::views::values;
auto& lvl_rst = *std::ranges::min_element(
reset_values, std::less<>{}, [](level_reset const& l) {
return l.expires.value_or(ss::timer<>::clock::time_point::max());
});
if (lvl_rst.expires.has_value()) {
_log_level_timer.arm(lvl_rst.expires.value());
}
}

Expand Down Expand Up @@ -1416,17 +1422,20 @@ void admin_server::register_config_routes() {

ss::global_logger_registry().set_logger_level(name, new_level);

// expires=0 is same as not specifying it at all
if (expires_v / 1s > 0) {
auto when = ss::timer<>::clock::now() + expires_v;
auto res = _log_level_resets.try_emplace(name, cur_level, when);
if (!res.second) {
res.first->second.expires = when;
auto when = [&]() -> std::optional<level_reset::time_point> {
// expires=0 is same as not specifying it at all
if (expires_v / 1s > 0) {
return ss::timer<>::clock::now() + expires_v;
} else {
// new log level never expires, but we still want an entry in
// the resets map as a record of the default
return std::nullopt;
}
} else {
// new log level never expires, but we still want an entry in the
// resets map as a record of the default
_log_level_resets.try_emplace(name, cur_level, std::nullopt);
}();

auto res = _log_level_resets.try_emplace(name, cur_level, when);
if (!res.second) {
res.first->second.expires = when;
}

rsp.expiration = expires_v / 1s;
Expand Down
10 changes: 0 additions & 10 deletions src/v/redpanda/admin/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -655,16 +655,6 @@ class admin_server {
: level(level)
, expires(expires) {}

friend bool operator<(const level_reset& l, const level_reset& r) {
if (!l.expires.has_value()) {
return false;
} else if (!r.expires.has_value()) {
return true;
} else {
return l.expires.value() < r.expires.value();
}
}

ss::log_level level;
std::optional<time_point> expires;
};
Expand Down
56 changes: 56 additions & 0 deletions tests/rptest/tests/log_level_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0

import time
import ducktape.errors
import requests.exceptions
import urllib.parse
import json

from ducktape.mark import parametrize
from ducktape.utils.util import wait_until
from rptest.services.cluster import cluster
from rptest.tests.redpanda_test import RedpandaTest
Expand Down Expand Up @@ -101,6 +103,60 @@ def test_log_level_control(self):
backoff_sec=1,
err_msg="Never saw message")

@cluster(num_nodes=1)
@parametrize(loggers=("admin_api_server", "raft"))
@parametrize(loggers=("raft", "admin_api_server"))
def test_log_level_multiple_expiry(self, loggers=tuple[str, str]):
"""
Check that more than one logger can be in a modified level and be expired correctly
see https://redpandadata.atlassian.net/browse/CORE-96
"""
admin = Admin(self.redpanda)
node = self.redpanda.nodes[0]

first_logger, second_logger = loggers
# set two loggers to trace, expect that both of them expires in a timely fashion
with self.redpanda.monitor_log(node) as mon:
admin.set_log_level(first_logger, "trace", expires=10)
time.sleep(1)
admin.set_log_level(second_logger, "trace", expires=10)
mon.wait_until(f"Expiring log level for {{{first_logger}}}",
timeout_sec=15,
backoff_sec=1,
err_msg=f"Never saw Expiring for {first_logger}")
mon.wait_until(f"Expiring log level for {{{second_logger}}}",
timeout_sec=15,
backoff_sec=1,
err_msg=f"Never saw Expiring for {second_logger}")

@cluster(num_nodes=1)
def test_log_level_persist_a_never_expire_request(self):
"""
check that this sequence of actions
set log-level admin_api_server trace 10
set log-level admin_api_server error 0

never resets the logger to the info level
"""
admin = Admin(self.redpanda)
node = self.redpanda.nodes[0]

with self.redpanda.monitor_log(node) as mon:
admin.set_log_level("admin_api_server", "trace", expires=10)
time.sleep(1)
admin.set_log_level("admin_api_server", "error", expires=0)

try:
mon.wait_until("Expiring log level for {admin_api_server}",
timeout_sec=15,
backoff_sec=1)
assert False, "Should not have seen message"
except ducktape.errors.TimeoutError:
pass

level = admin.get_log_level("admin_api_server")[0]["level"]
assert level == "error", f"expected level=error, got {level=}"

@cluster(num_nodes=3)
def test_max_expiry(self):
admin = Admin(self.redpanda)
Expand Down
Loading