Skip to content

Commit

Permalink
Merge pull request #18397 from andijcr/fix/trace_logging_stacked_deac…
Browse files Browse the repository at this point in the history
…tivation

[CORE-96]: admin/server fix set_log_level handling of overlapping expirations
  • Loading branch information
michael-redpanda authored May 13, 2024
2 parents 5629704 + 84748a8 commit e10e6f2
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 24 deletions.
37 changes: 23 additions & 14 deletions src/v/redpanda/admin/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -662,11 +662,17 @@ void admin_server::log_exception(
void admin_server::rearm_log_level_timer() {
_log_level_timer.cancel();

auto next = std::min_element(
_log_level_resets.begin(), _log_level_resets.end());
if (_log_level_resets.empty()) {
return;
}

if (next != _log_level_resets.end() && next->second.expires.has_value()) {
_log_level_timer.arm(next->second.expires.value());
auto reset_values = _log_level_resets | std::views::values;
auto& lvl_rst = *std::ranges::min_element(
reset_values, std::less<>{}, [](level_reset const& l) {
return l.expires.value_or(ss::timer<>::clock::time_point::max());
});
if (lvl_rst.expires.has_value()) {
_log_level_timer.arm(lvl_rst.expires.value());
}
}

Expand Down Expand Up @@ -1446,17 +1452,20 @@ void admin_server::register_config_routes() {

ss::global_logger_registry().set_logger_level(name, new_level);

// expires=0 is same as not specifying it at all
if (expires_v / 1s > 0) {
auto when = ss::timer<>::clock::now() + expires_v;
auto res = _log_level_resets.try_emplace(name, cur_level, when);
if (!res.second) {
res.first->second.expires = when;
auto when = [&]() -> std::optional<level_reset::time_point> {
// expires=0 is same as not specifying it at all
if (expires_v / 1s > 0) {
return ss::timer<>::clock::now() + expires_v;
} else {
// new log level never expires, but we still want an entry in
// the resets map as a record of the default
return std::nullopt;
}
} else {
// new log level never expires, but we still want an entry in the
// resets map as a record of the default
_log_level_resets.try_emplace(name, cur_level, std::nullopt);
}();

auto res = _log_level_resets.try_emplace(name, cur_level, when);
if (!res.second) {
res.first->second.expires = when;
}

rsp.expiration = expires_v / 1s;
Expand Down
10 changes: 0 additions & 10 deletions src/v/redpanda/admin/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -655,16 +655,6 @@ class admin_server {
: level(level)
, expires(expires) {}

friend bool operator<(const level_reset& l, const level_reset& r) {
if (!l.expires.has_value()) {
return false;
} else if (!r.expires.has_value()) {
return true;
} else {
return l.expires.value() < r.expires.value();
}
}

ss::log_level level;
std::optional<time_point> expires;
};
Expand Down
56 changes: 56 additions & 0 deletions tests/rptest/tests/log_level_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0

import time
import ducktape.errors
import requests.exceptions
import urllib.parse
import json

from ducktape.mark import parametrize
from ducktape.utils.util import wait_until
from rptest.services.cluster import cluster
from rptest.tests.redpanda_test import RedpandaTest
Expand Down Expand Up @@ -101,6 +103,60 @@ def test_log_level_control(self):
backoff_sec=1,
err_msg="Never saw message")

@cluster(num_nodes=1)
@parametrize(loggers=("admin_api_server", "raft"))
@parametrize(loggers=("raft", "admin_api_server"))
def test_log_level_multiple_expiry(self, loggers=tuple[str, str]):
"""
Check that more than one logger can be in a modified level and be expired correctly
see https://redpandadata.atlassian.net/browse/CORE-96
"""
admin = Admin(self.redpanda)
node = self.redpanda.nodes[0]

first_logger, second_logger = loggers
# set two loggers to trace, expect that both of them expires in a timely fashion
with self.redpanda.monitor_log(node) as mon:
admin.set_log_level(first_logger, "trace", expires=10)
time.sleep(1)
admin.set_log_level(second_logger, "trace", expires=10)
mon.wait_until(f"Expiring log level for {{{first_logger}}}",
timeout_sec=15,
backoff_sec=1,
err_msg=f"Never saw Expiring for {first_logger}")
mon.wait_until(f"Expiring log level for {{{second_logger}}}",
timeout_sec=15,
backoff_sec=1,
err_msg=f"Never saw Expiring for {second_logger}")

@cluster(num_nodes=1)
def test_log_level_persist_a_never_expire_request(self):
"""
check that this sequence of actions
set log-level admin_api_server trace 10
set log-level admin_api_server error 0
never resets the logger to the info level
"""
admin = Admin(self.redpanda)
node = self.redpanda.nodes[0]

with self.redpanda.monitor_log(node) as mon:
admin.set_log_level("admin_api_server", "trace", expires=10)
time.sleep(1)
admin.set_log_level("admin_api_server", "error", expires=0)

try:
mon.wait_until("Expiring log level for {admin_api_server}",
timeout_sec=15,
backoff_sec=1)
assert False, "Should not have seen message"
except ducktape.errors.TimeoutError:
pass

level = admin.get_log_level("admin_api_server")[0]["level"]
assert level == "error", f"expected level=error, got {level=}"

@cluster(num_nodes=3)
def test_max_expiry(self):
admin = Admin(self.redpanda)
Expand Down

0 comments on commit e10e6f2

Please sign in to comment.