Skip to content

Commit

Permalink
logs: fix missing allocation logs after update to Nomad 1.5.4 (#17087)
Browse files Browse the repository at this point in the history
When the server restarts for the upgrade, it loads the `structs.Job` from the
Raft snapshot/logs. The jobspec has long since been parsed, so none of the
guards around the default value are in play. The empty field value for `Enabled`
is the zero value, which is false.

This doesn't impact any running allocation because we don't replace running
allocations when either the client or server restart. But as soon as any
allocation gets rescheduled (ex. you drain all your clients during upgrades),
it'll be using the `structs.Job` that the server has, which has `Enabled =
false`, and logs will not be collected.

This changeset fixes the bug by adding a new field `Disabled` which defaults to
false (so that the zero value works), and deprecates the old field.

Fixes #17076
  • Loading branch information
tgross authored May 4, 2023
1 parent 351e010 commit 2aa3c74
Show file tree
Hide file tree
Showing 18 changed files with 108 additions and 72 deletions.
3 changes: 3 additions & 0 deletions .changelog/17087.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
logging: Fixed a bug where alloc logs would not be collected after an upgrade to 1.5.4
```
17 changes: 11 additions & 6 deletions api/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,16 +639,21 @@ func (g *TaskGroup) AddSpread(s *Spread) *TaskGroup {

// LogConfig provides configuration for log rotation
type LogConfig struct {
MaxFiles *int `mapstructure:"max_files" hcl:"max_files,optional"`
MaxFileSizeMB *int `mapstructure:"max_file_size" hcl:"max_file_size,optional"`
Enabled *bool `mapstructure:"enabled" hcl:"enabled,optional"`
MaxFiles *int `mapstructure:"max_files" hcl:"max_files,optional"`
MaxFileSizeMB *int `mapstructure:"max_file_size" hcl:"max_file_size,optional"`

// COMPAT(1.6.0): Enabled had to be swapped for Disabled to fix a backwards
// compatibility bug when restoring pre-1.5.4 jobs. Remove in 1.6.0
Enabled *bool `mapstructure:"enabled" hcl:"enabled,optional"`

Disabled *bool `mapstructure:"disabled" hcl:"disabled,optional"`
}

func DefaultLogConfig() *LogConfig {
return &LogConfig{
MaxFiles: pointerOf(10),
MaxFileSizeMB: pointerOf(10),
Enabled: pointerOf(true),
Disabled: pointerOf(false),
}
}

Expand All @@ -659,8 +664,8 @@ func (l *LogConfig) Canonicalize() {
if l.MaxFileSizeMB == nil {
l.MaxFileSizeMB = pointerOf(10)
}
if l.Enabled == nil {
l.Enabled = pointerOf(true)
if l.Disabled == nil {
l.Disabled = pointerOf(false)
}
}

Expand Down
24 changes: 12 additions & 12 deletions client/allocrunner/taskrunner/logmon_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ type logmonHook struct {

type logmonHookConfig struct {
logDir string
enabled bool
disabled bool
stdoutFifo string
stderrFifo string
}
Expand All @@ -63,12 +63,12 @@ func newLogMonHook(tr *TaskRunner, logger hclog.Logger) *logmonHook {

func newLogMonHookConfig(taskName string, logCfg *structs.LogConfig, logDir string) *logmonHookConfig {
cfg := &logmonHookConfig{
logDir: logDir,
enabled: logCfg.Enabled,
logDir: logDir,
disabled: logCfg.Disabled,
}

// If logging is disabled configure task's stdout/err to point to devnull
if !logCfg.Enabled {
if logCfg.Disabled {
cfg.stdoutFifo = os.DevNull
cfg.stderrFifo = os.DevNull
return cfg
Expand Down Expand Up @@ -116,7 +116,7 @@ func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig,

func (h *logmonHook) Prestart(ctx context.Context,
req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
if !h.isLoggingEnabled() {
if h.isLoggingDisabled() {
return nil
}

Expand Down Expand Up @@ -151,27 +151,27 @@ func (h *logmonHook) Prestart(ctx context.Context,
}
}

func (h *logmonHook) isLoggingEnabled() bool {
if !h.config.enabled {
func (h *logmonHook) isLoggingDisabled() bool {
if h.config.disabled {
h.logger.Debug("log collection is disabled by task")
return false
return true
}

// internal plugins have access to a capability to disable logging and
// metrics via a private interface; this is an "experimental" interface and
// currently only the docker driver exposes this to users.
ic, ok := h.runner.driver.(drivers.InternalCapabilitiesDriver)
if !ok {
return true
return false
}

caps := ic.InternalCapabilities()
if caps.DisableLogCollection {
h.logger.Debug("log collection is disabled by driver")
return false
return true
}

return true
return false
}

func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPrestartRequest) error {
Expand Down Expand Up @@ -219,7 +219,7 @@ func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPr
}

func (h *logmonHook) Stop(_ context.Context, req *interfaces.TaskStopRequest, _ *interfaces.TaskStopResponse) error {
if !h.isLoggingEnabled() {
if h.isLoggingDisabled() {
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion client/allocrunner/taskrunner/logmon_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func TestTaskRunner_LogmonHook_Disabled(t *testing.T) {

alloc := mock.BatchAlloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.LogConfig.Enabled = false
task.LogConfig.Disabled = true

dir := t.TempDir()

Expand Down
16 changes: 10 additions & 6 deletions command/agent/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1242,11 +1242,7 @@ func ApiTaskToStructsTask(job *structs.Job, group *structs.TaskGroup,

structsTask.Resources = ApiResourcesToStructs(apiTask.Resources)

structsTask.LogConfig = &structs.LogConfig{
MaxFiles: *apiTask.LogConfig.MaxFiles,
MaxFileSizeMB: *apiTask.LogConfig.MaxFileSizeMB,
Enabled: *apiTask.LogConfig.Enabled,
}
structsTask.LogConfig = apiLogConfigToStructs(apiTask.LogConfig)

if len(apiTask.Artifacts) > 0 {
structsTask.Artifacts = []*structs.TaskArtifact{}
Expand Down Expand Up @@ -1809,13 +1805,21 @@ func apiLogConfigToStructs(in *api.LogConfig) *structs.LogConfig {
if in == nil {
return nil
}

return &structs.LogConfig{
Enabled: *in.Enabled,
Disabled: dereferenceBool(in.Disabled),
MaxFiles: dereferenceInt(in.MaxFiles),
MaxFileSizeMB: dereferenceInt(in.MaxFileSizeMB),
}
}

func dereferenceBool(in *bool) bool {
if in == nil {
return false
}
return *in
}

func dereferenceInt(in *int) int {
if in == nil {
return 0
Expand Down
43 changes: 33 additions & 10 deletions command/agent/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2767,7 +2767,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
KillTimeout: pointer.Of(10 * time.Second),
KillSignal: "SIGQUIT",
LogConfig: &api.LogConfig{
Enabled: pointer.Of(true),
Disabled: pointer.Of(true),
MaxFiles: pointer.Of(10),
MaxFileSizeMB: pointer.Of(100),
},
Expand Down Expand Up @@ -3185,7 +3185,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
KillTimeout: 10 * time.Second,
KillSignal: "SIGQUIT",
LogConfig: &structs.LogConfig{
Enabled: true,
Disabled: true,
MaxFiles: 10,
MaxFileSizeMB: 100,
},
Expand Down Expand Up @@ -3342,7 +3342,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
KillTimeout: pointer.Of(10 * time.Second),
KillSignal: "SIGQUIT",
LogConfig: &api.LogConfig{
Enabled: pointer.Of(true),
Disabled: pointer.Of(true),
MaxFiles: pointer.Of(10),
MaxFileSizeMB: pointer.Of(100),
},
Expand Down Expand Up @@ -3468,7 +3468,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
KillTimeout: 10 * time.Second,
KillSignal: "SIGQUIT",
LogConfig: &structs.LogConfig{
Enabled: true,
Disabled: true,
MaxFiles: 10,
MaxFileSizeMB: 100,
},
Expand Down Expand Up @@ -3639,16 +3639,39 @@ func TestConversion_dereferenceInt(t *testing.T) {

func TestConversion_apiLogConfigToStructs(t *testing.T) {
ci.Parallel(t)
require.Nil(t, apiLogConfigToStructs(nil))
require.Equal(t, &structs.LogConfig{
Enabled: true,
must.Nil(t, apiLogConfigToStructs(nil))
must.Eq(t, &structs.LogConfig{
Disabled: true,
MaxFiles: 2,
MaxFileSizeMB: 8,
}, apiLogConfigToStructs(&api.LogConfig{
Enabled: pointer.Of(true),
Disabled: pointer.Of(true),
MaxFiles: pointer.Of(2),
MaxFileSizeMB: pointer.Of(8),
}))

// COMPAT(1.6.0): verify backwards compatibility fixes
// Note: we're intentionally ignoring the Enabled: false case
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{
Enabled: pointer.Of(false),
}))
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{
Enabled: pointer.Of(true),
}))
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{}))
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{
Disabled: pointer.Of(false),
}))
must.Eq(t, &structs.LogConfig{Disabled: false},
apiLogConfigToStructs(&api.LogConfig{
Enabled: pointer.Of(false),
Disabled: pointer.Of(false),
}))

}

func TestConversion_apiResourcesToStructs(t *testing.T) {
Expand Down Expand Up @@ -3743,7 +3766,7 @@ func TestConversion_apiConnectSidecarTaskToStructs(t *testing.T) {
Meta: meta,
KillTimeout: &timeout,
LogConfig: &structs.LogConfig{
Enabled: true,
Disabled: true,
MaxFiles: 2,
MaxFileSizeMB: 8,
},
Expand All @@ -3762,7 +3785,7 @@ func TestConversion_apiConnectSidecarTaskToStructs(t *testing.T) {
Meta: meta,
KillTimeout: &timeout,
LogConfig: &api.LogConfig{
Enabled: pointer.Of(true),
Disabled: pointer.Of(true),
MaxFiles: pointer.Of(2),
MaxFileSizeMB: pointer.Of(8),
},
Expand Down
3 changes: 2 additions & 1 deletion jobspec/parse_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ func parseTask(item *ast.ObjectItem, keys []string) (*api.Task, error) {
valid := []string{
"max_files",
"max_file_size",
"enabled",
"enabled", // COMPAT(1.6.0): remove in favor of disabled
"disabled",
}
if err := checkHCLKeys(logsBlock.Val, valid); err != nil {
return nil, multierror.Prefix(err, "logs ->")
Expand Down
2 changes: 1 addition & 1 deletion jobspec/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ func TestParse(t *testing.T) {
LogConfig: &api.LogConfig{
MaxFiles: intToPtr(14),
MaxFileSizeMB: intToPtr(101),
Enabled: boolToPtr(true),
Disabled: boolToPtr(false),
},
Artifacts: []*api.TaskArtifact{
{
Expand Down
2 changes: 1 addition & 1 deletion jobspec/test-fixtures/basic.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ job "binstore-storagelocker" {
}

logs {
enabled = true
disabled = false
max_files = 14
max_file_size = 101
}
Expand Down
24 changes: 12 additions & 12 deletions nomad/structs/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4420,7 +4420,7 @@ func TestTaskDiff(t *testing.T) {
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 10,
Enabled: false,
Disabled: true,
},
},
Expected: &TaskDiff{
Expand All @@ -4432,9 +4432,9 @@ func TestTaskDiff(t *testing.T) {
Fields: []*FieldDiff{
{
Type: DiffTypeAdded,
Name: "Enabled",
Name: "Disabled",
Old: "",
New: "false",
New: "true",
},
{
Type: DiffTypeAdded,
Expand All @@ -4459,7 +4459,7 @@ func TestTaskDiff(t *testing.T) {
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 10,
Enabled: false,
Disabled: true,
},
},
New: &Task{},
Expand All @@ -4472,8 +4472,8 @@ func TestTaskDiff(t *testing.T) {
Fields: []*FieldDiff{
{
Type: DiffTypeDeleted,
Name: "Enabled",
Old: "false",
Name: "Disabled",
Old: "true",
New: "",
},
{
Expand All @@ -4499,14 +4499,14 @@ func TestTaskDiff(t *testing.T) {
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 10,
Enabled: false,
Disabled: false,
},
},
New: &Task{
LogConfig: &LogConfig{
MaxFiles: 2,
MaxFileSizeMB: 20,
Enabled: true,
Disabled: true,
},
},
Expected: &TaskDiff{
Expand All @@ -4518,7 +4518,7 @@ func TestTaskDiff(t *testing.T) {
Fields: []*FieldDiff{
{
Type: DiffTypeEdited,
Name: "Enabled",
Name: "Disabled",
Old: "false",
New: "true",
},
Expand Down Expand Up @@ -4546,14 +4546,14 @@ func TestTaskDiff(t *testing.T) {
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 10,
Enabled: false,
Disabled: false,
},
},
New: &Task{
LogConfig: &LogConfig{
MaxFiles: 1,
MaxFileSizeMB: 20,
Enabled: true,
Disabled: true,
},
},
Expected: &TaskDiff{
Expand All @@ -4565,7 +4565,7 @@ func TestTaskDiff(t *testing.T) {
Fields: []*FieldDiff{
{
Type: DiffTypeEdited,
Name: "Enabled",
Name: "Disabled",
Old: "false",
New: "true",
},
Expand Down
Loading

0 comments on commit 2aa3c74

Please sign in to comment.