From 78e355b03d709c3164f7d7356cd2b103c29af9c5 Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Fri, 5 Jun 2020 17:45:00 +0200 Subject: [PATCH] Windows: fix service termination (#18916) Update the Windows service handling logic so that the service doesn't transition to the STOPPED state until the beater is terminated. Before this patch, a Beats service would report to be STOPPED as soon as it received the stop request. This causes some problems during service restarts, as the new service would start while the old one was still cleaning up. Fixes #18914 --- CHANGELOG.next.asciidoc | 1 + libbeat/cmd/instance/beat.go | 6 ++++++ libbeat/service/service.go | 5 +++++ libbeat/service/service_unix.go | 3 +++ libbeat/service/service_windows.go | 27 ++++++++++++++++++++++++--- 5 files changed, 39 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index d29e98c08a2..532faa78410 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -119,6 +119,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix an issue where error messages are not accurate in mapstriface. {issue}18662[18662] {pull}18663[18663] - Fix regression in `add_kubernetes_metadata`, so configured `indexers` and `matchers` are used if defaults are not disabled. {issue}18481[18481] {pull}18818[18818] - Fix potential race condition in fingerprint processor. {pull}18738[18738] +- Fixed a service restart failure under Windows. {issue}18914[18914] {pull}18916[18916] *Auditbeat* diff --git a/libbeat/cmd/instance/beat.go b/libbeat/cmd/instance/beat.go index ee5529c9036..eb58d36a454 100644 --- a/libbeat/cmd/instance/beat.go +++ b/libbeat/cmd/instance/beat.go @@ -389,6 +389,12 @@ func (b *Beat) launch(settings Settings, bt beat.Creator) error { return err } + // Windows: Mark service as stopped. + // After this is run, a Beat service is considered by the OS to be stopped + // and another instance of the process can be started. + // This must be the first deferred cleanup task (last to execute). + defer svc.NotifyTermination() + // Try to acquire exclusive lock on data path to prevent another beat instance // sharing same data path. bl := newLocker(b) diff --git a/libbeat/service/service.go b/libbeat/service/service.go index ec6e0fca672..4c56cfc28a2 100644 --- a/libbeat/service/service.go +++ b/libbeat/service/service.go @@ -67,6 +67,11 @@ func HandleSignals(stopFunction func(), cancel context.CancelFunc) { }) } +// NotifyTermination tells the OS that the service is stopped. +func NotifyTermination() { + notifyWindowsServiceStopped() +} + // cmdline flags var memprofile, cpuprofile, httpprof *string var cpuOut *os.File diff --git a/libbeat/service/service_unix.go b/libbeat/service/service_unix.go index 7c6bfb4d08a..7d20b04620e 100644 --- a/libbeat/service/service_unix.go +++ b/libbeat/service/service_unix.go @@ -22,3 +22,6 @@ package service // ProcessWindowsControlEvents is not used on non-windows platforms. func ProcessWindowsControlEvents(stopCallback func()) { } + +func notifyWindowsServiceStopped() { +} diff --git a/libbeat/service/service_windows.go b/libbeat/service/service_windows.go index 649bf85cfa8..a81f4fb5a0f 100644 --- a/libbeat/service/service_windows.go +++ b/libbeat/service/service_windows.go @@ -28,7 +28,15 @@ import ( "github.com/elastic/beats/v7/libbeat/logp" ) -type beatService struct{} +type beatService struct { + stopCallback func() + done chan struct{} +} + +var serviceInstance = &beatService{ + stopCallback: nil, + done: make(chan struct{}, 0), +} // Execute runs the beat service with the arguments and manages changes that // occur in the environment or runtime that may affect the beat. @@ -52,9 +60,22 @@ loop: } } changes <- svc.Status{State: svc.StopPending} + m.stopCallback() + // Block until notifyWindowsServiceStopped below is called. This is required + // as the windows/svc package will transition the service to STOPPED state + // once this function returns. + <-m.done return } +func (m *beatService) stop() { + close(m.done) +} + +func notifyWindowsServiceStopped() { + serviceInstance.stop() +} + // couldNotConnect is the errno for ERROR_FAILED_SERVICE_CONTROLLER_CONNECT. const couldNotConnect syscall.Errno = 1063 @@ -76,10 +97,10 @@ func ProcessWindowsControlEvents(stopCallback func()) { run = debug.Run } - err = run(os.Args[0], &beatService{}) + serviceInstance.stopCallback = stopCallback + err = run(os.Args[0], serviceInstance) if err == nil { - stopCallback() return }