Skip to content

Commit

Permalink
scheduler: Fix bug where the would treat multiregion jobs as paused f…
Browse files Browse the repository at this point in the history
…or job types that don't use deployments (#14659)

* scheduler: Fix bug where the scheduler would treat multiregion jobs as paused for job types that don't use deployments

Co-authored-by: Tim Gross <tgross@hashicorp.com>

Co-authored-by: Tim Gross <tgross@hashicorp.com>
  • Loading branch information
DerekStrickland and tgross committed Sep 22, 2022
1 parent 451ecf3 commit 85a4df5
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .changelog/14659.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Fixed bug where the scheduler would treat multiregion jobs as paused for job types that don't use deployments
```
11 changes: 11 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -5747,6 +5747,17 @@ func (j *Job) GetScalingPolicies() []*ScalingPolicy {
return ret
}

// UsesDeployments returns a boolean indicating whether the job configuration
// results in a deployment during scheduling.
func (j *Job) UsesDeployments() bool {
switch j.Type {
case JobTypeService:
return true
default:
return false
}
}

// ScalingPolicyListStub is used to return a subset of scaling policy information
// for the scaling policy list
type ScalingPolicyListStub struct {
Expand Down
18 changes: 14 additions & 4 deletions scheduler/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,17 +265,27 @@ func (a *allocReconciler) computeDeploymentUpdates(deploymentComplete bool) {
}
}

// computeDeploymentPaused is responsible for setting flags on the
// allocReconciler that indicate the state of the deployment if one
// is required. The flags that are managed are:
// 1. deploymentFailed: Did the current deployment fail just as named.
// 2. deploymentPaused: Multiregion job types that use deployments run
// the deployments later during the fan-out stage. When the deployment
// is created it will be in a pending state. If an invariant violation
// is detected by the deploymentWatcher during it will enter a paused
// state. This flag tells Compute we're paused or pending, so we should
// not make placements on the deployment.
func (a *allocReconciler) computeDeploymentPaused() {
if a.deployment != nil {
a.deploymentPaused = a.deployment.Status == structs.DeploymentStatusPaused ||
a.deployment.Status == structs.DeploymentStatusPending
a.deploymentFailed = a.deployment.Status == structs.DeploymentStatusFailed
}
if a.deployment == nil {
// When we create the deployment later, it will be in a pending
// state. But we also need to tell Compute we're paused, otherwise we
// make placements on the paused deployment.
if a.job.IsMultiregion() && !(a.job.IsPeriodic() || a.job.IsParameterized()) {
if a.job.IsMultiregion() &&
a.job.UsesDeployments() &&
!(a.job.IsPeriodic() || a.job.IsParameterized()) {

a.deploymentPaused = true
}
}
Expand Down
106 changes: 106 additions & 0 deletions scheduler/reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5995,3 +5995,109 @@ func TestReconciler_Client_Disconnect_Canaries(t *testing.T) {
})
}
}

// Tests the reconciler properly handles the logic for computeDeploymentPaused
// for various job types.
func TestReconciler_ComputeDeploymentPaused(t *testing.T) {
ci.Parallel(t)

type testCase struct {
name string
jobType string
isMultiregion bool
isPeriodic bool
isParameterized bool
expected bool
}

multiregionCfg := mock.MultiregionJob().Multiregion
periodicCfg := mock.PeriodicJob().Periodic
parameterizedCfg := &structs.ParameterizedJobConfig{
Payload: structs.DispatchPayloadRequired,
}

testCases := []testCase{
{
name: "single region service is not paused",
jobType: structs.JobTypeService,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
},
{
name: "multiregion service is paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: false,
expected: true,
},
{
name: "multiregion periodic service is not paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: true,
isParameterized: false,
expected: false,
},
{
name: "multiregion parameterized service is not paused",
jobType: structs.JobTypeService,
isMultiregion: true,
isPeriodic: false,
isParameterized: true,
expected: false,
},
{
name: "single region batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
},
{
name: "multiregion batch job is not paused",
jobType: structs.JobTypeBatch,
isMultiregion: false,
isPeriodic: false,
isParameterized: false,
expected: false,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
var job *structs.Job

if tc.jobType == structs.JobTypeService {
job = mock.Job()
} else if tc.jobType == structs.JobTypeBatch {
job = mock.BatchJob()
}

require.NotNil(t, job, "invalid job type", tc.jobType)

if tc.isMultiregion {
job.Multiregion = multiregionCfg
}

if tc.isPeriodic {
job.Periodic = periodicCfg
}

if tc.isParameterized {
job.ParameterizedJob = parameterizedCfg
}

reconciler := NewAllocReconciler(
testlog.HCLogger(t), allocUpdateFnIgnore, false, job.ID, job,
nil, nil, nil, "", job.Priority, true)

_ = reconciler.Compute()

require.Equal(t, tc.expected, reconciler.deploymentPaused)
})
}
}

0 comments on commit 85a4df5

Please sign in to comment.