Skip to content

Commit

Permalink
Startup policy implementation (#246)
Browse files Browse the repository at this point in the history
* Implement startup policy for jobset

* linter fixes

* fix error event for startup policy finished

* add suspend utility functions

* add active case from pr comments

* pr comment renames

* cleaning up create and suspend for startup policy

* update condition

* update comment on jobset

* rename job suspend functions

* code cleanup and pr comments

* shorten startup policy

* address comments in integration tests

* complicated rebase fixes

* integration tests are now working

* address daniels comments

* rename variable

* update with more comments from daniel

* pr comments

* missed one ensure condition

* update manifests

* update deepcopy

* daniel comments

* formatting fixes

* add order option to integration test
  • Loading branch information
kannon92 committed Feb 13, 2024
1 parent 9857889 commit bf2f217
Show file tree
Hide file tree
Showing 30 changed files with 1,467 additions and 92 deletions.
27 changes: 27 additions & 0 deletions api/jobset/v1alpha2/jobset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ const (
JobSetFailed JobSetConditionType = "Failed"
// JobSetSuspended means the job is suspended
JobSetSuspended JobSetConditionType = "Suspended"
// JobSetStartupPolicyCompleted means the StartupPolicy was complete
JobSetStartupPolicyCompleted JobSetConditionType = "StartupPolicyCompleted"
)

// JobSetSpec defines the desired state of JobSet
Expand Down Expand Up @@ -79,6 +81,10 @@ type JobSetSpec struct {
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="Value is immutable"
FailurePolicy *FailurePolicy `json:"failurePolicy,omitempty"`

// StartupPolicy, if set, configures in what order jobs must be started
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="Value is immutable"
StartupPolicy *StartupPolicy `json:"startupPolicy,omitempty"`

// Suspend suspends all running child Jobs when set to true.
Suspend *bool `json:"suspend,omitempty"`
}
Expand Down Expand Up @@ -191,6 +197,27 @@ type SuccessPolicy struct {
TargetReplicatedJobs []string `json:"targetReplicatedJobs,omitempty"`
}

type StartupPolicyOptions string

const (
// This is the default setting
// AnyOrder means that we will start the replicated jobs
// without any specific order.
AnyOrder StartupPolicyOptions = "AnyOrder"
// InOrder starts the replicated jobs in order
// that they are listed.
InOrder StartupPolicyOptions = "InOrder"
)

type StartupPolicy struct {
// StartupPolicyOrder determines the startup order of the ReplicatedJobs.
// AnyOrder means to start replicated jobs in any order.
// InOrder means to start them as they are listed in the JobSet. A ReplicatedJob is started only
// when all the jobs of the previous one are ready.
// +kubebuilder:validation:Enum=AnyOrder;InOrder
StartupPolicyOrder StartupPolicyOptions `json:"startupPolicyOrder"`
}

func init() {
SchemeBuilder.Register(&JobSet{}, &JobSetList{})
}
3 changes: 3 additions & 0 deletions api/jobset/v1alpha2/jobset_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ func (js *JobSet) Default() {
if js.Spec.SuccessPolicy == nil {
js.Spec.SuccessPolicy = &SuccessPolicy{Operator: OperatorAll}
}
if js.Spec.StartupPolicy == nil {
js.Spec.StartupPolicy = &StartupPolicy{StartupPolicyOrder: AnyOrder}
}
for i := range js.Spec.ReplicatedJobs {
// Default job completion mode to indexed.
if js.Spec.ReplicatedJobs[i].Template.Spec.CompletionMode == nil {
Expand Down
71 changes: 70 additions & 1 deletion api/jobset/v1alpha2/jobset_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ var TestPodTemplate = corev1.PodTemplateSpec{

func TestJobSetDefaulting(t *testing.T) {
defaultSuccessPolicy := &SuccessPolicy{Operator: OperatorAll}
defaultStartupPolicy := &StartupPolicy{StartupPolicyOrder: AnyOrder}
testCases := []struct {
name string
js *JobSet
Expand All @@ -40,6 +41,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -55,6 +57,7 @@ func TestJobSetDefaulting(t *testing.T) {
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand Down Expand Up @@ -90,6 +93,7 @@ func TestJobSetDefaulting(t *testing.T) {
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -109,6 +113,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -124,6 +129,7 @@ func TestJobSetDefaulting(t *testing.T) {
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -143,6 +149,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(false)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -159,6 +166,7 @@ func TestJobSetDefaulting(t *testing.T) {
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(false)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -178,6 +186,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -196,6 +205,7 @@ func TestJobSetDefaulting(t *testing.T) {
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -219,6 +229,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -239,6 +250,7 @@ func TestJobSetDefaulting(t *testing.T) {
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand All @@ -261,7 +273,8 @@ func TestJobSetDefaulting(t *testing.T) {
name: "success policy unset",
js: &JobSet{
Spec: JobSetSpec{
Network: &Network{EnableDNSHostnames: ptr.To(true)},
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -280,6 +293,7 @@ func TestJobSetDefaulting(t *testing.T) {
},
want: &JobSet{
Spec: JobSetSpec{
StartupPolicy: defaultStartupPolicy,
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
Expand All @@ -301,12 +315,64 @@ func TestJobSetDefaulting(t *testing.T) {
},
{
name: "success policy operator set, replicatedJobNames unset",
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: &SuccessPolicy{
Operator: OperatorAny,
},
Network: &Network{EnableDNSHostnames: ptr.To(true)},
StartupPolicy: defaultStartupPolicy,
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Spec: batchv1.JobSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyAlways,
},
},
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
},
},
},
},
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: &SuccessPolicy{
Operator: OperatorAny,
},
StartupPolicy: defaultStartupPolicy,
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Spec: batchv1.JobSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyAlways,
},
},
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
},
},
},
},
},
{
name: "startup policy order InOrder set",
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: &SuccessPolicy{
Operator: OperatorAny,
},
Network: &Network{EnableDNSHostnames: ptr.To(true)},
StartupPolicy: &StartupPolicy{
StartupPolicyOrder: InOrder,
},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -328,6 +394,9 @@ func TestJobSetDefaulting(t *testing.T) {
SuccessPolicy: &SuccessPolicy{
Operator: OperatorAny,
},
StartupPolicy: &StartupPolicy{
StartupPolicyOrder: InOrder,
},
Network: &Network{EnableDNSHostnames: ptr.To(true)},
ReplicatedJobs: []ReplicatedJob{
{
Expand Down
30 changes: 29 additions & 1 deletion api/jobset/v1alpha2/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions api/jobset/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions client-go/applyconfiguration/jobset/v1alpha2/jobsetspec.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bf2f217

Please sign in to comment.