Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scheduler: handle MRD jobs correctly in computeDeploymentPaused #14649

Closed
wants to merge 10 commits into from
3 changes: 3 additions & 0 deletions .changelog/14649.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed a bug where MRD jobs wouldn't get deployments even if they previously had one
pkazmierczak marked this conversation as resolved.
Show resolved Hide resolved
```
44 changes: 34 additions & 10 deletions scheduler/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ func (a *allocReconciler) Compute() *reconcileResults {
return a.result
}

a.computeDeploymentPaused()
a.computeDeploymentPausedAndFailed()
deploymentComplete := a.computeDeploymentComplete(m)
a.computeDeploymentUpdates(deploymentComplete)

Expand Down Expand Up @@ -265,7 +265,7 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) {
}
}

// computeDeploymentPaused is responsible for setting flags on the
// computeDeploymentPausedAndFailed is responsible for setting flags on the
// allocReconciler that indicate the state of the deployment if one
// is required. The flags that are managed are:
// 1. deploymentFailed: Did the current deployment fail just as named.
Expand All @@ -275,19 +275,41 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) {
// is detected by the deploymentWatcher during it will enter a paused
// state. This flag tells Compute we're paused or pending, so we should
// not make placements on the deployment.
func (a *allocReconciler) computeDeploymentPaused() {
func (a *allocReconciler) computeDeploymentPausedAndFailed() {
if a.deployment != nil {
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused ||
a.deployment.Status == structs.DeploymentStatusPending
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
}
if a.deployment == nil {
if a.job.IsMultiregion() &&
a.job.UsesDeployments() &&
!(a.job.IsPeriodic() || a.job.IsParameterized()) {

a.deploymentPaused = true
}
// compute MRD jobs
a.computeMRDDeploymentPaused()
}

// computeMRDDeploymentPaused sets deployment options that are specific to
// multi-region jobs
func (a *allocReconciler) computeMRDDeploymentPaused() {
// consider only jobs without current deployment
if a.deployment != nil {
return
}

// consider only jobs that are MRD
if !a.job.IsMultiregion() {
return
}

// consider only just that use deployments
if !a.job.UsesDeployments() {
return
}

// this condition covers against the following cases:
// - node updates, when we have a previous deployment (we don't want deployment to be paused)
// - stop -purge operation, when job version is 0
// - job updates, when current job version is strictly greater than the old one
if a.oldDeployment == nil || a.job.Version > a.oldDeployment.JobVersion || a.job.Version == 0 {
a.deploymentPaused = true
}
}

Expand Down Expand Up @@ -890,9 +912,11 @@ func (a *allocReconciler) createDeployment(groupName string, strategy *structs.U
if a.deployment == nil {
a.deployment = structs.NewDeployment(a.job, a.evalPriority)
// in multiregion jobs, most deployments start in a pending state
if a.job.IsMultiregion() && !(a.job.IsPeriodic() && a.job.IsParameterized()) {
if a.job.IsMultiregion() && a.job.UsesDeployments() {
a.deployment.Status = structs.DeploymentStatusPending
a.deployment.StatusDescription = structs.DeploymentStatusDescriptionPendingForPeer
// do not create placements for MRD when deployment is pending
a.result.place = []allocPlaceResult{}
}
a.result.deployment = a.deployment
}
Expand Down
126 changes: 82 additions & 44 deletions scheduler/reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6002,12 +6002,14 @@ func TestReconciler_ComputeDeploymentPaused(t *testing.T) {
ci.Parallel(t)

type testCase struct {
name string
jobType string
isMultiregion bool
isPeriodic bool
isParameterized bool
expected bool
name string
jobType string
isMultiregion bool
isPeriodic bool
isParameterized bool
oldDeployment bool
expectedPaused bool
expectedPlacements int
}

multiregionCfg := mock.MultiregionJob().Multiregion
Expand All @@ -6018,52 +6020,64 @@ func TestReconciler_ComputeDeploymentPaused(t *testing.T) {

testCases := []testCase{
{
name: "single region service is not paused",
jobType: structs.JobTypeService,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
name: "single region service is not paused",
jobType: structs.JobTypeService,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
oldDeployment: false,
expectedPaused: false,
expectedPlacements: 1,
},
{
name: "multiregion service is paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: false,
expected: true,
name: "multiregion service is paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: false,
oldDeployment: false,
expectedPaused: true,
expectedPlacements: 0,
},
{
name: "multiregion periodic service is not paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: true,
isParameterized: false,
expected: false,
name: "single region batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
oldDeployment: false,
expectedPaused: false,
expectedPlacements: 1,
},
{
name: "multiregion parameterized service is not paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: true,
expected: false,
name: "multiregion batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: true,
isPeriodic: false,
isParameterized: false,
oldDeployment: false,
expectedPaused: false,
expectedPlacements: 1,
},
{
name: "single region batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
name: "multiregion service job with previous deployment is not paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: false,
oldDeployment: true,
expectedPaused: false,
expectedPlacements: 1,
},
{
name: "multiregion batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
name: "multiregion service job without previous deployment is paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: false,
oldDeployment: false,
expectedPaused: true,
expectedPlacements: 0,
},
}

Expand Down Expand Up @@ -6091,13 +6105,37 @@ func TestReconciler_ComputeDeploymentPaused(t *testing.T) {
job.ParameterizedJob = parameterizedCfg
}

// no need for 10
job.TaskGroups[0].Count = 1

reconciler := NewAllocReconciler(
testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, nil, nil, "", job.Priority, true)

_ = reconciler.Compute()
if tc.oldDeployment {
job.Version = 1
reconciler.oldDeployment = &structs.Deployment{
ID: uuid.Generate(),
Namespace: "test",
JobID: job.ID,
JobVersion: job.Version,
JobModifyIndex: job.ModifyIndex,
JobSpecModifyIndex: job.ModifyIndex,
JobCreateIndex: job.CreateIndex,
IsMultiregion: true,
TaskGroups: nil,
Status: structs.DeploymentStatusCancelled,
StatusDescription: "",
EvalPriority: 50,
CreateIndex: 0,
ModifyIndex: 0,
}
}

reconciler.Compute()

require.Equal(t, tc.expected, reconciler.deploymentPaused)
require.Equal(t, tc.expectedPaused, reconciler.deploymentPaused)
require.Len(t, reconciler.result.place, tc.expectedPlacements)
})
}
}