From 304a037e39cb9d15bc6103671a3991d08e989afb Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Thu, 4 Jan 2018 15:04:45 -0800 Subject: [PATCH 1/5] Fix service.check_restart stanza propagation There was a bug in jobspec parsing, a bug in CheckRestart merging, and a bug in CheckRestart canonicalization. All are now tested. --- CHANGELOG.md | 3 +- api/tasks.go | 6 ++-- command/agent/job_endpoint_test.go | 26 ++++++++++++-- jobspec/parse.go | 1 + jobspec/parse_test.go | 36 +++++++++++++++++++ .../test-fixtures/service-check-restart.hcl | 21 +++++++++++ 6 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 jobspec/test-fixtures/service-check-restart.hcl diff --git a/CHANGELOG.md b/CHANGELOG.md index 795157226241..964ba1e4a64f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ BUG FIXES: allocations could result in improper placement counts [[GH-3717](https://github.com/hashicorp/nomad/issues/3717)] * client: Migrated ephemeral_disk's maintain directory permissions [[GH-3723](https://github.com/hashicorp/nomad/issues/3723)] * config: Revert minimum CPU limit back to 20 from 100. + * discovery: Fix handling of `service.check_restart` [[GH-3718](https://github.com/hashicorp/nomad/issues/3718)] * ui: Fix requests using client-side certificates in Firefox. [[GH-3728](https://github.com/hashicorp/nomad/pull/3728)] ## 0.7.1 (December 19, 2017) @@ -663,7 +664,7 @@ BUG FIXES: * client: Killing an allocation doesn't cause allocation stats to block [[GH-1454](https://github.com/hashicorp/nomad/issues/1454)] * driver/docker: Disable swap on docker driver [[GH-1480](https://github.com/hashicorp/nomad/issues/1480)] - * driver/docker: Fix improper gating on privileged mode [[GH-1506](https://github.com/hashicorp/nomad/issues/1506)] + * driver/docker: Fix improper gating on priviledged mode [[GH-1506](https://github.com/hashicorp/nomad/issues/1506)] * driver/docker: Default network type is "nat" on Windows [[GH-1521](https://github.com/hashicorp/nomad/issues/1521)] * driver/docker: Cleanup created volume when destroying container [[GH-1519](https://github.com/hashicorp/nomad/issues/1519)] * driver/rkt: Set host environment variables [[GH-1581](https://github.com/hashicorp/nomad/issues/1581)] diff --git a/api/tasks.go b/api/tasks.go index 7dc2950b187f..847bb869fa74 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -136,7 +136,7 @@ func (c *CheckRestart) Merge(o *CheckRestart) *CheckRestart { nc.Grace = o.Grace } - if nc.IgnoreWarnings { + if !nc.IgnoreWarnings { nc.IgnoreWarnings = o.IgnoreWarnings } @@ -189,9 +189,9 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) { // Canonicallize CheckRestart on Checks and merge Service.CheckRestart // into each check. - for _, c := range s.Checks { + for i, c := range s.Checks { + s.Checks[i].CheckRestart = c.CheckRestart.Merge(s.CheckRestart) c.CheckRestart.Canonicalize() - c.CheckRestart = c.CheckRestart.Merge(s.CheckRestart) } } diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index 019a82ae059a..b595e28ab2aa 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -1212,6 +1212,10 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { Name: "serviceA", Tags: []string{"1", "2"}, PortLabel: "foo", + CheckRestart: &api.CheckRestart{ + Limit: 4, + Grace: helper.TimeToPtr(11 * time.Second), + }, Checks: []api.ServiceCheck{ { Id: "hello", @@ -1228,10 +1232,17 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { InitialStatus: "ok", CheckRestart: &api.CheckRestart{ Limit: 3, - Grace: helper.TimeToPtr(10 * time.Second), IgnoreWarnings: true, }, }, + { + Id: "check2id", + Name: "check2", + Type: "tcp", + PortLabel: "foo", + Interval: 4 * time.Second, + Timeout: 2 * time.Second, + }, }, }, }, @@ -1425,10 +1436,21 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) { InitialStatus: "ok", CheckRestart: &structs.CheckRestart{ Limit: 3, - Grace: 10 * time.Second, + Grace: 11 * time.Second, IgnoreWarnings: true, }, }, + { + Name: "check2", + Type: "tcp", + PortLabel: "foo", + Interval: 4 * time.Second, + Timeout: 2 * time.Second, + CheckRestart: &structs.CheckRestart{ + Limit: 4, + Grace: 11 * time.Second, + }, + }, }, }, }, diff --git a/jobspec/parse.go b/jobspec/parse.go index d25f38bd0f8e..babe41b1798d 100644 --- a/jobspec/parse.go +++ b/jobspec/parse.go @@ -912,6 +912,7 @@ func parseServices(jobName string, taskGroupName string, task *api.Task, service "port", "check", "address_mode", + "check_restart", } if err := helper.CheckHCLKeys(o.Val, valid); err != nil { return multierror.Prefix(err, fmt.Sprintf("service (%d) ->", idx)) diff --git a/jobspec/parse_test.go b/jobspec/parse_test.go index 2dfc890d42ff..4134e9ee4513 100644 --- a/jobspec/parse_test.go +++ b/jobspec/parse_test.go @@ -631,6 +631,42 @@ func TestParse(t *testing.T) { }, false, }, + { + "service-check-restart.hcl", + &api.Job{ + ID: helper.StringToPtr("service_check_restart"), + Name: helper.StringToPtr("service_check_restart"), + Type: helper.StringToPtr("service"), + TaskGroups: []*api.TaskGroup{ + { + Name: helper.StringToPtr("group"), + Tasks: []*api.Task{ + { + Name: "task", + Services: []*api.Service{ + { + Name: "http-service", + CheckRestart: &api.CheckRestart{ + Limit: 3, + Grace: helper.TimeToPtr(10 * time.Second), + IgnoreWarnings: true, + }, + Checks: []api.ServiceCheck{ + { + Name: "random-check", + Type: "tcp", + PortLabel: "9001", + }, + }, + }, + }, + }, + }, + }, + }, + }, + false, + }, } for _, tc := range cases { diff --git a/jobspec/test-fixtures/service-check-restart.hcl b/jobspec/test-fixtures/service-check-restart.hcl new file mode 100644 index 000000000000..d34f70003c43 --- /dev/null +++ b/jobspec/test-fixtures/service-check-restart.hcl @@ -0,0 +1,21 @@ +job "service_check_restart" { + type = "service" + group "group" { + task "task" { + service { + name = "http-service" + check_restart { + limit = 3 + grace = "10s" + ignore_warnings = true + } + check { + name = "random-check" + type = "tcp" + port = "9001" + } + } + } + } +} + From 566abcbe23643c3c823c4359b86cd766852d4567 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 9 Jan 2018 14:15:31 -0800 Subject: [PATCH 2/5] Move changelog entry from bug fix to feature It was never really implemented to begin with --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 964ba1e4a64f..ab026ddd4ce3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,13 +5,16 @@ __BACKWARDS INCOMPATIBILITIES:__ that absolute URLs are not allowed, but it was not enforced. Absolute URLs in HTTP check paths will now fail to validate. [[GH-3685](https://github.com/hashicorp/nomad/issues/3685)] +IMPROVEMENTS: + * discovery: Allow `check_restart` to be specified in the `service` stanza. + [GH-3718] + BUG FIXES: * core: Fix search endpoint forwarding for multi-region clusters [[GH-3680](https://github.com/hashicorp/nomad/issues/3680)] * core: Fix an issue in which batch jobs with queued placements and lost allocations could result in improper placement counts [[GH-3717](https://github.com/hashicorp/nomad/issues/3717)] * client: Migrated ephemeral_disk's maintain directory permissions [[GH-3723](https://github.com/hashicorp/nomad/issues/3723)] * config: Revert minimum CPU limit back to 20 from 100. - * discovery: Fix handling of `service.check_restart` [[GH-3718](https://github.com/hashicorp/nomad/issues/3718)] * ui: Fix requests using client-side certificates in Firefox. [[GH-3728](https://github.com/hashicorp/nomad/pull/3728)] ## 0.7.1 (December 19, 2017) From 9b9a4af182d002e68e2a8db233153df9ae95de89 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 9 Jan 2018 14:53:34 -0800 Subject: [PATCH 3/5] Invert and test CheckRestart merge logic --- api/tasks.go | 14 ++++++-------- api/tasks_test.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index 847bb869fa74..a7e3de40af5b 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -128,15 +128,15 @@ func (c *CheckRestart) Merge(o *CheckRestart) *CheckRestart { return nc } - if nc.Limit == 0 { + if o.Limit > 0 { nc.Limit = o.Limit } - if nc.Grace == nil { + if o.Grace != nil { nc.Grace = o.Grace } - if !nc.IgnoreWarnings { + if o.IgnoreWarnings { nc.IgnoreWarnings = o.IgnoreWarnings } @@ -185,13 +185,11 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) { s.AddressMode = "auto" } - s.CheckRestart.Canonicalize() - // Canonicallize CheckRestart on Checks and merge Service.CheckRestart // into each check. - for i, c := range s.Checks { - s.Checks[i].CheckRestart = c.CheckRestart.Merge(s.CheckRestart) - c.CheckRestart.Canonicalize() + for i, check := range s.Checks { + s.Checks[i].CheckRestart = s.CheckRestart.Merge(check.CheckRestart) + s.Checks[i].CheckRestart.Canonicalize() } } diff --git a/api/tasks_test.go b/api/tasks_test.go index d870eab27da9..7542c60940a1 100644 --- a/api/tasks_test.go +++ b/api/tasks_test.go @@ -3,6 +3,7 @@ package api import ( "reflect" "testing" + "time" "github.com/hashicorp/nomad/helper" "github.com/stretchr/testify/assert" @@ -266,3 +267,51 @@ func TestTaskGroup_Canonicalize_Update(t *testing.T) { tg.Canonicalize(job) assert.Nil(t, tg.Update) } + +// TestService_CheckRestart asserts Service.CheckRestart settings are properly +// inherited by Checks. +func TestService_CheckRestart(t *testing.T) { + job := &Job{Name: helper.StringToPtr("job")} + tg := &TaskGroup{Name: helper.StringToPtr("group")} + task := &Task{Name: "task"} + service := &Service{ + CheckRestart: &CheckRestart{ + Limit: 11, + Grace: helper.TimeToPtr(11 * time.Second), + IgnoreWarnings: true, + }, + Checks: []ServiceCheck{ + { + Name: "all-set", + CheckRestart: &CheckRestart{ + Limit: 22, + Grace: helper.TimeToPtr(22 * time.Second), + IgnoreWarnings: true, + }, + }, + { + Name: "some-set", + CheckRestart: &CheckRestart{ + Limit: 33, + Grace: helper.TimeToPtr(33 * time.Second), + }, + }, + { + Name: "unset", + }, + }, + } + + service.Canonicalize(task, tg, job) + assert.Equal(t, service.Checks[0].CheckRestart.Limit, 22) + assert.Equal(t, *service.Checks[0].CheckRestart.Grace, 22*time.Second) + assert.True(t, service.Checks[0].CheckRestart.IgnoreWarnings) + + assert.Equal(t, service.Checks[1].CheckRestart.Limit, 33) + assert.Equal(t, *service.Checks[1].CheckRestart.Grace, 33*time.Second) + assert.True(t, service.Checks[1].CheckRestart.IgnoreWarnings) + + assert.Equal(t, service.Checks[2].CheckRestart.Limit, 11) + assert.Equal(t, *service.Checks[2].CheckRestart.Grace, 11*time.Second) + assert.True(t, service.Checks[2].CheckRestart.IgnoreWarnings) +} From 756305f0299564f1ca46f6c2864c496d013fa8a0 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 9 Jan 2018 15:18:22 -0800 Subject: [PATCH 4/5] Revert "Missed header mention of server.check_restart" This reverts commit 8295f81dddf8b53c0b78707be6fddc6e30f95640. --- website/source/docs/job-specification/check_restart.html.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/website/source/docs/job-specification/check_restart.html.md b/website/source/docs/job-specification/check_restart.html.md index f61648f0fa9c..19b720276b48 100644 --- a/website/source/docs/job-specification/check_restart.html.md +++ b/website/source/docs/job-specification/check_restart.html.md @@ -10,6 +10,12 @@ description: |- # `check_restart` Stanza + + + +
Placement + job -> group -> task -> service -> **check_restart** +
Placement From b658ac35aec07e18ca6e939e85b32706fbcc097b Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 9 Jan 2018 15:18:34 -0800 Subject: [PATCH 5/5] Revert "Remove mention of check_restart on service" This reverts commit 758b98685be4a2997bd0bc54f55b73ac3d0365cc. --- website/source/docs/job-specification/check_restart.html.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/website/source/docs/job-specification/check_restart.html.md b/website/source/docs/job-specification/check_restart.html.md index 19b720276b48..d183bd0547d7 100644 --- a/website/source/docs/job-specification/check_restart.html.md +++ b/website/source/docs/job-specification/check_restart.html.md @@ -28,7 +28,10 @@ As of Nomad 0.7 the `check_restart` stanza instructs Nomad when to restart tasks with unhealthy service checks. When a health check in Consul has been unhealthy for the `limit` specified in a `check_restart` stanza, it is restarted according to the task group's [`restart` policy][restart_stanza]. The -`check_restart` settings apply to [`check`s][check_stanza]. +`check_restart` settings apply to [`check`s][check_stanza], but may also be +placed on [`service`s][service_stanza] to apply to all checks on a service. +If `check_restart` is set on both the check and service, the stanzas are +merged with the check values taking precedence. ```hcl job "mysql" { @@ -146,3 +149,4 @@ details. [check_stanza]: /docs/job-specification/service.html#check-parameters "check stanza" [restart_stanza]: /docs/job-specification/restart.html "restart stanza" +[service_stanza]: /docs/job-specification/service.html "service stanza"