Skip to content

Commit

Permalink
fix auto promote of canary task groups when deploying them alongside …
Browse files Browse the repository at this point in the history
…rolling deploy taskgroups that do not use the canary deployment system
  • Loading branch information
kainoaseto committed Jan 19, 2022
1 parent a0c0b80 commit 0f61a8d
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 19 deletions.
9 changes: 8 additions & 1 deletion nomad/deploymentwatcher/deployment_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,16 @@ func (w *deploymentWatcher) autoPromoteDeployment(allocs []*structs.AllocListStu
return nil
}

// AutoPromote iff every task group is marked auto_promote and is healthy. The whole
// AutoPromote iff every task group with canaries is marked auto_promote and is healthy. The whole
// job version has been incremented, so we promote together. See also AutoRevert
for _, dstate := range d.TaskGroups {

// skip auto promote canary validation if the task group has no canaries
// to prevent auto promote hanging on mixed canary/non-canary taskgroup deploys
if dstate.DesiredCanaries < 1 {
continue
}

if !dstate.AutoPromote || dstate.DesiredCanaries != len(dstate.PlacedCanaries) {
return nil
}
Expand Down
66 changes: 49 additions & 17 deletions nomad/deploymentwatcher/deployments_watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,30 +535,40 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) {
w, m := defaultTestDeploymentWatcher(t)
now := time.Now()

// Create 1 UpdateStrategy, 1 job (1 TaskGroup), 2 canaries, and 1 deployment
upd := structs.DefaultUpdateStrategy.Copy()
upd.AutoPromote = true
upd.MaxParallel = 2
upd.Canary = 2
upd.ProgressDeadline = 5 * time.Second
// Create 1 UpdateStrategy, 1 job (2 TaskGroups), 2 canaries, and 1 deployment
canaryUpd := structs.DefaultUpdateStrategy.Copy()
canaryUpd.AutoPromote = true
canaryUpd.MaxParallel = 2
canaryUpd.Canary = 2
canaryUpd.ProgressDeadline = 5 * time.Second

j := mock.Job()
j.TaskGroups[0].Update = upd
rollingUpd := structs.DefaultUpdateStrategy.Copy()
rollingUpd.ProgressDeadline = 5 * time.Second

j := mock.MultiTaskGroupJob()
j.TaskGroups[0].Update = canaryUpd
j.TaskGroups[1].Update = rollingUpd

d := mock.Deployment()
d.JobID = j.ID
// This is created in scheduler.computeGroup at runtime, where properties from the
// UpdateStrategy are copied in
d.TaskGroups = map[string]*structs.DeploymentState{
"web": {
AutoPromote: upd.AutoPromote,
AutoRevert: upd.AutoRevert,
ProgressDeadline: upd.ProgressDeadline,
AutoPromote: canaryUpd.AutoPromote,
AutoRevert: canaryUpd.AutoRevert,
ProgressDeadline: canaryUpd.ProgressDeadline,
DesiredTotal: 2,
},
"api": {
AutoPromote: rollingUpd.AutoPromote,
AutoRevert: rollingUpd.AutoRevert,
ProgressDeadline: rollingUpd.ProgressDeadline,
DesiredTotal: 2,
},
}

alloc := func() *structs.Allocation {
canaryAlloc := func() *structs.Allocation {
a := mock.Alloc()
a.DeploymentID = d.ID
a.CreateTime = now.UnixNano()
Expand All @@ -569,14 +579,36 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) {
return a
}

a := alloc()
b := alloc()
rollingAlloc := func() *structs.Allocation {
a := mock.Alloc()
a.DeploymentID = d.ID
a.CreateTime = now.UnixNano()
a.ModifyTime = now.UnixNano()
a.TaskGroup = "api"
a.AllocatedResources.Tasks["api"] = a.AllocatedResources.Tasks["web"].Copy()
delete(a.AllocatedResources.Tasks, "web")
a.TaskResources["api"] = a.TaskResources["web"].Copy()
delete(a.TaskResources, "web")
a.DeploymentStatus = &structs.AllocDeploymentStatus{
Canary: false,
}
return a
}

// Web taskgroup (0)
a := canaryAlloc()
b := canaryAlloc()

// Api taskgroup (1)
c := rollingAlloc()
e := rollingAlloc()

d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID, b.ID}
d.TaskGroups[a.TaskGroup].DesiredCanaries = 2
d.TaskGroups[c.TaskGroup].PlacedAllocs = 2
require.NoError(t, m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), j), "UpsertJob")
require.NoError(t, m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment")
require.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a, b}), "UpsertAllocs")
require.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a, b, c, e}), "UpsertAllocs")

// =============================================================
// Support method calls
Expand All @@ -595,7 +627,7 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) {

matchConfig1 := &matchDeploymentAllocHealthRequestConfig{
DeploymentID: d.ID,
Healthy: []string{a.ID, b.ID},
Healthy: []string{a.ID, b.ID, c.ID, e.ID},
Eval: true,
}
matcher1 := matchDeploymentAllocHealthRequest(matchConfig1)
Expand Down Expand Up @@ -629,7 +661,7 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) {
// Mark the canaries healthy
req := &structs.DeploymentAllocHealthRequest{
DeploymentID: d.ID,
HealthyAllocationIDs: []string{a.ID, b.ID},
HealthyAllocationIDs: []string{a.ID, b.ID, c.ID, e.ID},
}
var resp structs.DeploymentUpdateResponse
// Calls w.raft.UpdateDeploymentAllocHealth, which is implemented by StateStore in
Expand Down
84 changes: 83 additions & 1 deletion nomad/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,90 @@ func Job() *structs.Job {
return job
}

func MultiTaskGroupJob() *structs.Job {
job := Job()
apiTaskGroup := &structs.TaskGroup{
Name: "api",
Count: 10,
EphemeralDisk: &structs.EphemeralDisk{
SizeMB: 150,
},
RestartPolicy: &structs.RestartPolicy{
Attempts: 3,
Interval: 10 * time.Minute,
Delay: 1 * time.Minute,
Mode: structs.RestartPolicyModeDelay,
},
ReschedulePolicy: &structs.ReschedulePolicy{
Attempts: 2,
Interval: 10 * time.Minute,
Delay: 5 * time.Second,
DelayFunction: "constant",
},
Migrate: structs.DefaultMigrateStrategy(),
Networks: []*structs.NetworkResource{
{
Mode: "host",
DynamicPorts: []structs.Port{
{Label: "http"},
{Label: "admin"},
},
},
},
Tasks: []*structs.Task{
{
Name: "api",
Driver: "exec",
Config: map[string]interface{}{
"command": "/bin/date",
},
Env: map[string]string{
"FOO": "bar",
},
Services: []*structs.Service{
{
Name: "${TASK}-backend",
PortLabel: "http",
Tags: []string{"pci:${meta.pci-dss}", "datacenter:${node.datacenter}"},
Checks: []*structs.ServiceCheck{
{
Name: "check-table",
Type: structs.ServiceCheckScript,
Command: "/usr/local/check-table-${meta.database}",
Args: []string{"${meta.version}"},
Interval: 30 * time.Second,
Timeout: 5 * time.Second,
},
},
},
{
Name: "${TASK}-admin",
PortLabel: "admin",
},
},
LogConfig: structs.DefaultLogConfig(),
Resources: &structs.Resources{
CPU: 500,
MemoryMB: 256,
},
Meta: map[string]string{
"foo": "bar",
},
},
},
Meta: map[string]string{
"elb_check_type": "http",
"elb_check_interval": "30s",
"elb_check_min": "3",
},
}
job.TaskGroups = append(job.TaskGroups, apiTaskGroup)
job.Canonicalize()
return job
}

func LifecycleSideTask(resources structs.Resources, i int) *structs.Task {
return &structs.Task{
return &structs.Task{
Name: fmt.Sprintf("side-%d", i),
Driver: "exec",
Config: map[string]interface{}{
Expand Down

0 comments on commit 0f61a8d

Please sign in to comment.