Skip to content

Commit

Permalink
Merge pull request #5719 from hashicorp/f-auto-promote-canaries
Browse files Browse the repository at this point in the history
auto promote canaries
  • Loading branch information
langmartin committed May 22, 2019
2 parents df84e07 + c9a837c commit 643d840
Show file tree
Hide file tree
Showing 30 changed files with 625 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ IMPROVEMENTS:
* core: Add node name to output of `nomad node status` command in verbose mode [[GH-5224](https://github.com/hashicorp/nomad/pull/5224)]
* core: Reduce the size of the raft transaction for plans by only sending fields updated by the plan applier [[GH-5602](https://github.com/hashicorp/nomad/pull/5602)]
* api: Support configuring `http.Client` used by golang `api` package [[GH-5275](https://github.com/hashicorp/nomad/pull/5275)]
* core: Add job update `auto_promote` flag, which causes deployments to promote themselves when all canaries become healthy [[GH-5719](https://github.com/hashicorp/nomad/pull/5719)]
* api: Add preemption related fields to API results that return an allocation list. [[GH-5580](https://github.com/hashicorp/nomad/pull/5580)]
* api: Add additional config options to scheduler configuration endpoint to disable preemption [[GH-5628](https://github.com/hashicorp/nomad/issues/5628)]
* client: Reduce unnecessary lost nodes on server failure [[GH-5654](https://github.com/hashicorp/nomad/issues/5654)]
Expand Down
14 changes: 13 additions & 1 deletion api/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,13 +375,15 @@ type UpdateStrategy struct {
MinHealthyTime *time.Duration `mapstructure:"min_healthy_time"`
HealthyDeadline *time.Duration `mapstructure:"healthy_deadline"`
ProgressDeadline *time.Duration `mapstructure:"progress_deadline"`
AutoRevert *bool `mapstructure:"auto_revert"`
Canary *int `mapstructure:"canary"`
AutoRevert *bool `mapstructure:"auto_revert"`
AutoPromote *bool `mapstructure:"auto_promote"`
}

// DefaultUpdateStrategy provides a baseline that can be used to upgrade
// jobs with the old policy or for populating field defaults.
func DefaultUpdateStrategy() *UpdateStrategy {
// boolPtr fields are omitted to avoid masking an unconfigured nil
return &UpdateStrategy{
Stagger: timeToPtr(30 * time.Second),
MaxParallel: intToPtr(1),
Expand Down Expand Up @@ -433,6 +435,10 @@ func (u *UpdateStrategy) Copy() *UpdateStrategy {
copy.Canary = intToPtr(*u.Canary)
}

if u.AutoPromote != nil {
copy.AutoPromote = boolToPtr(*u.AutoPromote)
}

return copy
}

Expand Down Expand Up @@ -472,11 +478,17 @@ func (u *UpdateStrategy) Merge(o *UpdateStrategy) {
if o.Canary != nil {
u.Canary = intToPtr(*o.Canary)
}

if o.AutoPromote != nil {
u.AutoPromote = boolToPtr(*o.AutoPromote)
}
}

func (u *UpdateStrategy) Canonicalize() {
d := DefaultUpdateStrategy()

// boolPtr fields are omitted to avoid masking an unconfigured nil

if u.MaxParallel == nil {
u.MaxParallel = d.MaxParallel
}
Expand Down
7 changes: 7 additions & 0 deletions api/jobs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ func TestJobs_Canonicalize(t *testing.T) {
ProgressDeadline: timeToPtr(10 * time.Minute),
AutoRevert: boolToPtr(false),
Canary: intToPtr(0),
AutoPromote: nil,
},
TaskGroups: []*TaskGroup{
{
Expand Down Expand Up @@ -357,6 +358,7 @@ func TestJobs_Canonicalize(t *testing.T) {
ProgressDeadline: timeToPtr(10 * time.Minute),
AutoRevert: boolToPtr(false),
Canary: intToPtr(0),
AutoPromote: nil,
},
Migrate: DefaultMigrateStrategy(),
Tasks: []*Task{
Expand Down Expand Up @@ -486,6 +488,7 @@ func TestJobs_Canonicalize(t *testing.T) {
ProgressDeadline: timeToPtr(7 * time.Minute),
AutoRevert: boolToPtr(false),
Canary: intToPtr(0),
AutoPromote: boolToPtr(false),
},
TaskGroups: []*TaskGroup{
{
Expand All @@ -497,6 +500,7 @@ func TestJobs_Canonicalize(t *testing.T) {
MinHealthyTime: timeToPtr(1 * time.Second),
AutoRevert: boolToPtr(true),
Canary: intToPtr(1),
AutoPromote: boolToPtr(true),
},
Tasks: []*Task{
{
Expand Down Expand Up @@ -541,6 +545,7 @@ func TestJobs_Canonicalize(t *testing.T) {
ProgressDeadline: timeToPtr(7 * time.Minute),
AutoRevert: boolToPtr(false),
Canary: intToPtr(0),
AutoPromote: boolToPtr(false),
},
TaskGroups: []*TaskGroup{
{
Expand Down Expand Up @@ -574,6 +579,7 @@ func TestJobs_Canonicalize(t *testing.T) {
ProgressDeadline: timeToPtr(7 * time.Minute),
AutoRevert: boolToPtr(true),
Canary: intToPtr(1),
AutoPromote: boolToPtr(true),
},
Migrate: DefaultMigrateStrategy(),
Tasks: []*Task{
Expand Down Expand Up @@ -616,6 +622,7 @@ func TestJobs_Canonicalize(t *testing.T) {
ProgressDeadline: timeToPtr(7 * time.Minute),
AutoRevert: boolToPtr(false),
Canary: intToPtr(0),
AutoPromote: boolToPtr(false),
},
Migrate: DefaultMigrateStrategy(),
Tasks: []*Task{
Expand Down
1 change: 1 addition & 0 deletions api/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ func NewTaskGroup(name string, count int) *TaskGroup {
}
}

// Canonicalize sets defaults and merges settings that should be inherited from the job
func (g *TaskGroup) Canonicalize(job *Job) {
if g.Name == nil {
g.Name = stringToPtr("")
Expand Down
1 change: 1 addition & 0 deletions api/tasks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ func TestTaskGroup_Canonicalize_Update(t *testing.T) {
ID: stringToPtr("test"),
Update: &UpdateStrategy{
AutoRevert: boolToPtr(false),
AutoPromote: boolToPtr(false),
Canary: intToPtr(0),
HealthCheck: stringToPtr(""),
HealthyDeadline: timeToPtr(0),
Expand Down
14 changes: 12 additions & 2 deletions command/agent/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,9 @@ func ApiJobToStructJob(job *api.Job) *structs.Job {
Affinities: ApiAffinitiesToStructs(job.Affinities),
}

// COMPAT: Remove in 0.7.0. Update has been pushed into the task groups
// Update has been pushed into the task groups. stagger and max_parallel are
// preserved at the job level, but all other values are discarded. The job.Update
// api value is merged into TaskGroups already in api.Canonicalize
if job.Update != nil {
j.Update = structs.UpdateStrategy{}

Expand Down Expand Up @@ -718,9 +720,17 @@ func ApiTgToStructsTG(taskGroup *api.TaskGroup, tg *structs.TaskGroup) {
MinHealthyTime: *taskGroup.Update.MinHealthyTime,
HealthyDeadline: *taskGroup.Update.HealthyDeadline,
ProgressDeadline: *taskGroup.Update.ProgressDeadline,
AutoRevert: *taskGroup.Update.AutoRevert,
Canary: *taskGroup.Update.Canary,
}

// boolPtr fields may be nil, others will have pointers to default values via Canonicalize
if taskGroup.Update.AutoRevert != nil {
tg.Update.AutoRevert = *taskGroup.Update.AutoRevert
}

if taskGroup.Update.AutoPromote != nil {
tg.Update.AutoPromote = *taskGroup.Update.AutoPromote
}
}

if l := len(taskGroup.Tasks); l != 0 {
Expand Down
74 changes: 74 additions & 0 deletions command/agent/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1624,6 +1624,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
HealthyDeadline: 5 * time.Minute,
ProgressDeadline: 5 * time.Minute,
AutoRevert: true,
AutoPromote: false,
Canary: 1,
},
Meta: map[string]string{
Expand Down Expand Up @@ -2039,6 +2040,79 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
}
}

func TestJobs_ApiJobToStructsJobUpdate(t *testing.T) {
apiJob := &api.Job{
Update: &api.UpdateStrategy{
Stagger: helper.TimeToPtr(1 * time.Second),
MaxParallel: helper.IntToPtr(5),
HealthCheck: helper.StringToPtr(structs.UpdateStrategyHealthCheck_Manual),
MinHealthyTime: helper.TimeToPtr(1 * time.Minute),
HealthyDeadline: helper.TimeToPtr(3 * time.Minute),
ProgressDeadline: helper.TimeToPtr(3 * time.Minute),
AutoRevert: helper.BoolToPtr(false),
AutoPromote: nil,
Canary: helper.IntToPtr(1),
},
TaskGroups: []*api.TaskGroup{
{
Update: &api.UpdateStrategy{
Canary: helper.IntToPtr(2),
AutoRevert: helper.BoolToPtr(true),
},
}, {
Update: &api.UpdateStrategy{
Canary: helper.IntToPtr(3),
AutoPromote: helper.BoolToPtr(true),
},
},
},
}

structsJob := ApiJobToStructJob(apiJob)

// Update has been moved from job down to the groups
jobUpdate := structs.UpdateStrategy{
Stagger: 1000000000,
MaxParallel: 5,
HealthCheck: "",
MinHealthyTime: 0,
HealthyDeadline: 0,
ProgressDeadline: 0,
AutoRevert: false,
AutoPromote: false,
Canary: 0,
}

// But the groups inherit settings from the job update
group1 := structs.UpdateStrategy{
Stagger: 1000000000,
MaxParallel: 5,
HealthCheck: "manual",
MinHealthyTime: 60000000000,
HealthyDeadline: 180000000000,
ProgressDeadline: 180000000000,
AutoRevert: true,
AutoPromote: false,
Canary: 2,
}

group2 := structs.UpdateStrategy{
Stagger: 1000000000,
MaxParallel: 5,
HealthCheck: "manual",
MinHealthyTime: 60000000000,
HealthyDeadline: 180000000000,
ProgressDeadline: 180000000000,
AutoRevert: false,
AutoPromote: true,
Canary: 3,
}

require.Equal(t, jobUpdate, structsJob.Update)
require.Equal(t, group1, *structsJob.TaskGroups[0].Update)
require.Equal(t, group2, *structsJob.TaskGroups[1].Update)
}

// TestHTTP_JobValidate_SystemMigrate asserts that a system job with a migrate
// stanza fails to validate but does not panic (see #5477).
func TestHTTP_JobValidate_SystemMigrate(t *testing.T) {
Expand Down
4 changes: 2 additions & 2 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ You'll need AWS credentials (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) to

Running
===========
After completing the provisioning step above, you should see CLI output showing the IP addresses of Nomad client machines. To run the tests, set the NOMAD_ADDR variable to one of the client IPs.
After completing the provisioning step above, you should see CLI output showing the IP addresses of Nomad client machines. To run the tests, set the NOMAD_ADDR variable to `http://[client IP]:4646/`

```
$ NOMAD_ADDR=<> $NOMAD_E2E=1 go test -v
$ NOMAD_ADDR=<> NOMAD_E2E=1 go test -v
```
79 changes: 79 additions & 0 deletions e2e/deployment/deployment.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package deployment

import (
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/stretchr/testify/require"

"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/helper/uuid"
)

type DeploymentTest struct {
framework.TC
jobIds []string
}

func init() {
framework.AddSuites(&framework.TestSuite{
Component: "Deployment",
CanRunLocal: true,
Cases: []framework.TestCase{
new(DeploymentTest),
},
})
}

func (tc *DeploymentTest) BeforeAll(f *framework.F) {
// Ensure cluster has leader before running tests
e2eutil.WaitForLeader(f.T(), tc.Nomad())
e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 4)
}

func (tc *DeploymentTest) TestDeploymentAutoPromote(f *framework.F) {
t := f.T()
nomadClient := tc.Nomad()
uuid := uuid.Generate()
jobId := "deployment" + uuid[0:8]
tc.jobIds = append(tc.jobIds, jobId)
e2eutil.RegisterAndWaitForAllocs(t, nomadClient, "deployment/input/deployment_auto0.nomad", jobId)

// Upgrade
e2eutil.RegisterAllocs(t, nomadClient, "deployment/input/deployment_auto1.nomad", jobId)
var deploy *api.Deployment
ds, _, err := nomadClient.Deployments().List(nil)
require.NoError(t, err)

// Find the deployment
for _, d := range ds {
if d.JobID == jobId {
deploy = d
break
}
}

// Deployment is auto pending the upgrade of "two" which has a longer time to health
run := structs.DeploymentStatusRunning
require.Equal(t, run, deploy.Status)
require.Equal(t, structs.DeploymentStatusDescriptionRunningAutoPromotion, deploy.StatusDescription)

// Deployment is eventually running
e2eutil.WaitForDeployment(t, nomadClient, deploy.ID, run, structs.DeploymentStatusDescriptionRunning)

deploy, _, _ = nomadClient.Deployments().Info(deploy.ID, nil)
require.Equal(t, run, deploy.Status)
require.Equal(t, structs.DeploymentStatusDescriptionRunning, deploy.StatusDescription)
}

func (tc *DeploymentTest) AfterEach(f *framework.F) {
nomadClient := tc.Nomad()
jobs := nomadClient.Jobs()
// Stop all jobs in test
for _, id := range tc.jobIds {
jobs.Deregister(id, true, nil)
}
tc.jobIds = []string{}
// Garbage collect
nomadClient.System().GarbageCollect()
}
54 changes: 54 additions & 0 deletions e2e/deployment/input/deployment_auto0.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
job "deployment_auto.nomad" {
datacenters = ["dc1"]

group "one" {
count = 3

update {
max_parallel = 3
auto_promote = true
canary = 2
}

task "one" {
driver = "raw_exec"

config {
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["1000000"]
}

resources {
cpu = 20
memory = 20
}
}
}

group "two" {
count = 3

update {
max_parallel = 2
auto_promote = true
canary = 2
min_healthy_time = "2s"
}

task "two" {
driver = "raw_exec"

config {
command = "/bin/sleep"
# change args to update the job, the only changes
args = ["2000000"]
}

resources {
cpu = 20
memory = 20
}
}
}
}
Loading

0 comments on commit 643d840

Please sign in to comment.