diff --git a/client/allocrunner/taskrunner/logmon_hook.go b/client/allocrunner/taskrunner/logmon_hook.go index 7c68aa0f7b69..1f58d7e33644 100644 --- a/client/allocrunner/taskrunner/logmon_hook.go +++ b/client/allocrunner/taskrunner/logmon_hook.go @@ -95,22 +95,29 @@ func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig, func (h *logmonHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { - // Create a logmon client by reattaching or launching a new instance - if h.logmonPluginClient == nil || h.logmonPluginClient.Exited() { + // Attempt to reattach to logmon + if h.logmonPluginClient == nil { reattachConfig, err := reattachConfigFromHookData(req.PreviousState) if err != nil { h.logger.Error("failed to load reattach config", "error", err) return err } + if reattachConfig != nil { + if err := h.launchLogMon(reattachConfig); err != nil { + h.logger.Warn("failed to reattach to logmon process", "error", err) + } + } + + } - // Launch or reattach logmon instance for the task. - if err := h.launchLogMon(reattachConfig); err != nil { - // Retry errors launching logmon as logmon may have crashed and + // We did not reattach to a plugin and one is still not running. + if h.logmonPluginClient == nil || h.logmonPluginClient.Exited() { + if err := h.launchLogMon(nil); err != nil { + // Retry errors launching logmon as logmon may have crashed on start and // subsequent attempts will start a new one. h.logger.Error("failed to launch logmon process", "error", err) return structs.NewRecoverableError(err, true) } - } err := h.logmon.Start(&logmon.LogConfig{ diff --git a/client/allocrunner/taskrunner/logmon_hook_unix_test.go b/client/allocrunner/taskrunner/logmon_hook_unix_test.go index 0dac426376fd..582692a27bde 100644 --- a/client/allocrunner/taskrunner/logmon_hook_unix_test.go +++ b/client/allocrunner/taskrunner/logmon_hook_unix_test.go @@ -14,14 +14,13 @@ import ( "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/require" ) // TestTaskRunner_LogmonHook_StartCrashStop simulates logmon crashing while the // Nomad client is restarting and asserts failing to reattach to logmon causes -// a recoverable error (task restart). +// nomad to spawn a new logmon. func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { t.Parallel() @@ -46,6 +45,7 @@ func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { require.NoError(t, hook.Prestart(context.Background(), &req, &resp)) defer hook.Stop(context.Background(), nil, nil) + origState := resp.State origHookData := resp.State[logmonReattachKey] require.NotEmpty(t, origHookData) @@ -80,9 +80,8 @@ func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { } resp = interfaces.TaskPrestartResponse{} err = hook.Prestart(context.Background(), &req, &resp) - require.Error(t, err) - require.True(t, structs.IsRecoverable(err)) - require.Empty(t, resp.State) + require.NoError(t, err) + require.NotEqual(t, origState, resp.State) // Running stop should shutdown logmon require.NoError(t, hook.Stop(context.Background(), nil, nil))