Skip to content

Commit

Permalink
audit: Disable auditing in recovery mode
Browse files Browse the repository at this point in the history
Fixes: #15226

Audit logging is not able to function in recovery mode as produce
messages are rejected.  In this situation, it's possible the cluster
may become unusable if audit messages are generated and the queues
are unable to drain.

Also fixed a logic error in the ducktape tests.

Signed-off-by: Michael Boquard <michael@redpanda.com>
  • Loading branch information
michael-redpanda committed Nov 30, 2023
1 parent 398eccd commit 6460a4b
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 2 deletions.
13 changes: 13 additions & 0 deletions src/v/security/audit/audit_log_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,19 @@ bool audit_log_manager::is_audit_event_enabled(event_type event_type) const {
}

ss::future<> audit_log_manager::start() {
bool recovery_mode_enabled = config::node().recovery_mode_enabled.value();
if (recovery_mode_enabled) {
vlog(
adtlog.warn,
"**************************************************************");
vlog(
adtlog.warn,
"Redpanda is operating in recovery mode. Auditing is disabled!");
vlog(
adtlog.warn,
"**************************************************************");
co_return;
}
_probe = std::make_unique<audit_probe>();
_probe->setup_metrics([this] {
return static_cast<double>(pending_events())
Expand Down
37 changes: 35 additions & 2 deletions tests/rptest/tests/audit_log_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ def ingest(self, records):
)
return
self.next_offset_ingest = len(records)
new_records = [json.loads(msg['value']) for msg in records]
new_records = [json.loads(msg['value']) for msg in new_records]
self.logger.info(f"Ingested: {len(new_records)} records")
self.logger.debug(f'Ingested records:')
for rec in new_records:
Expand Down Expand Up @@ -670,6 +670,39 @@ def test_drain_on_audit_disabled(self):
lambda record_count: record_count == 3,
"One stop event observed for shutdown node")

@cluster(num_nodes=5)
def test_recovery_mode(self):
"""
Tests that audit logging does not start when in recovery mode
"""

# Expect to find the audit system to come up
_ = self.find_matching_record(
partial(AuditLogTestsAppLifecycle.is_lifecycle_match,
"Audit System", True),
lambda record_count: record_count == 3,
"Single redpanda audit start event per node")
self.redpanda.restart_nodes(
self.redpanda.nodes,
override_cfg_params={"recovery_mode_enabled": True})
wait_until(lambda: self.redpanda.search_log_any(
'Redpanda is operating in recovery mode. Auditing is disabled!'),
timeout_sec=30,
backoff_sec=2,
err_msg="Did not find expected log statement")
self.redpanda.restart_nodes(
self.redpanda.nodes,
override_cfg_params={"recovery_mode_enabled": False})
# Now we should see it 6 times, 3 times for initial boot, and 3 more times for this latest
# boot. Seeing >6 would mean auditing somehow worked while in recovery mode
records = self.find_matching_record(
partial(AuditLogTestsAppLifecycle.is_lifecycle_match,
"Audit System", True),
lambda record_count: record_count >= 6,
"Single redpanda audit start event per node")
assert len(
records) == 6, f'Expected 6 start up records, found {len(records)}'


class AuditLogTestAdminApi(AuditLogTestBase):
"""Validates that audit logs are generated from admin API
Expand Down Expand Up @@ -1857,7 +1890,7 @@ def match_authn_user(user, svc_name, result, record):
_ = self.find_matching_record(
lambda record: match_authn_user(self.username, self.
sr_audit_svc_name, 1, record),
lambda record_count: record_count > 1, 'authn attempt in sr')
lambda record_count: record_count == 1, 'authn attempt in sr')

@cluster(num_nodes=5)
def test_sr_audit_bad_authn(self):
Expand Down

0 comments on commit 6460a4b

Please sign in to comment.