From 31c2c1c9ace0a09f599aea9f38e86b6217019c4b Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Thu, 19 Nov 2020 13:33:55 +0100 Subject: [PATCH 1/2] [Auditbeat] Recover from errors in audit monitoring routine The auditd module spawns a monitoring goroutine that fetches auditd status every 15s. Due to this routine using a single audit client, if an update fails (because a netlink message is late or other causes), the audit client can get out of sync with the stream, failing in all subsequent requests. For reasons that aren't 100% clear to me at the moment, this error condition leads to a lot of `[audit_send_repl]` (2.6.x) / `[audit_send_reply]` (3.x+) kernel threads being created. ``` ERROR [auditd] auditd/audit_linux.go:183 get status request failed:failed to get audit status ack: unexpected sequence number for reply (expected 6286 but got 6285) ``` ``` $ ps -ef [...] root 27790 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27791 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27792 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27793 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27794 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27795 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27796 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27797 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27798 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27799 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27800 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27801 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27802 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27803 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27804 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27805 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27806 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27807 2 0 12:52 ? 00:00:00 [audit_send_repl] root 27808 2 0 12:52 ? 00:00:00 [audit_send_repl] [...] ``` This patch updates the error-handling logic to create a new audit client when a status update fails, allowing to recover and preventing the proliferation of `audit_send_repl` kernel threads. --- CHANGELOG.next.asciidoc | 1 + auditbeat/module/auditd/audit_linux.go | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index cdf8d58d31c..a7da0fff7df 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -232,6 +232,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - system/socket: Fix kprobe grouping to allow running more than one instance. {pull}20325[20325] - system/socket: Fixed a crash due to concurrent map read and write. {issue}21192[21192] {pull}21690[21690] - file_integrity: stop monitoring excluded paths {issue}21278[21278] {pull}21282[21282] +- auditd: Fix an error condition causing a lot of `audit_send_reply` kernel threads being created. {pull}22673[22673] *Filebeat* diff --git a/auditbeat/module/auditd/audit_linux.go b/auditbeat/module/auditd/audit_linux.go index 1586eaeaffa..570c1fd0403 100644 --- a/auditbeat/module/auditd/audit_linux.go +++ b/auditbeat/module/auditd/audit_linux.go @@ -163,7 +163,9 @@ func (ms *MetricSet) Run(reporter mb.PushReporterV2) { ms.log.Errorw("Failure creating audit monitoring client", "error", err) } go func() { - defer client.Close() + defer func() { // Close the most recently allocated "client" instance. + client.Close() + }() timer := time.NewTicker(lostEventsUpdateInterval) defer timer.Stop() for { @@ -175,6 +177,13 @@ func (ms *MetricSet) Run(reporter mb.PushReporterV2) { ms.updateKernelLostMetric(status.Lost) } else { ms.log.Error("get status request failed:", err) + if err = client.Close(); err != nil { + ms.log.Errorw("Error closing audit monitoring client", "error", err) + } + client, err = libaudit.NewAuditClient(nil) + if err != nil { + ms.log.Errorw("Failure creating audit monitoring client", "error", err) + } } } } From c678973c5d5127f132b036b30ce9c73a880e2e74 Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Thu, 19 Nov 2020 16:57:25 +0100 Subject: [PATCH 2/2] Fix error handling mistake --- auditbeat/module/auditd/audit_linux.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/auditbeat/module/auditd/audit_linux.go b/auditbeat/module/auditd/audit_linux.go index 570c1fd0403..a2c9e004877 100644 --- a/auditbeat/module/auditd/audit_linux.go +++ b/auditbeat/module/auditd/audit_linux.go @@ -164,7 +164,9 @@ func (ms *MetricSet) Run(reporter mb.PushReporterV2) { } go func() { defer func() { // Close the most recently allocated "client" instance. - client.Close() + if client != nil { + client.Close() + } }() timer := time.NewTicker(lostEventsUpdateInterval) defer timer.Stop() @@ -183,6 +185,8 @@ func (ms *MetricSet) Run(reporter mb.PushReporterV2) { client, err = libaudit.NewAuditClient(nil) if err != nil { ms.log.Errorw("Failure creating audit monitoring client", "error", err) + reporter.Error(err) + return } } }